[test]Add the string testcase of index and query (#16884)

Signed-off-by: jingkl <jingjing.jia@zilliz.com>
pull/16927/head
jingkl 2022-05-11 21:55:53 +08:00 committed by GitHub
parent 03309a7ef3
commit 3579ae8240
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 309 additions and 19 deletions

View File

@ -14,7 +14,7 @@ from pymilvus.orm.types import CONSISTENCY_STRONG
from common.common_func import param_info
TIMEOUT = 20
INDEX_NAME = "_default_idx"
# keep small timeout for stability tests
# TIMEOUT = 5
@ -218,9 +218,11 @@ class ApiCollectionWrapper:
return res, check_result
@trace()
def create_index(self, field_name, index_params, check_task=None, check_items=None, **kwargs):
def create_index(self, field_name, index_params, index_name=None, check_task=None, check_items=None, **kwargs):
timeout = kwargs.get("timeout", TIMEOUT * 2)
kwargs.update({"timeout": timeout})
index_name = INDEX_NAME if index_name is None else index_name
index_name = kwargs.get("index_name", index_name)
kwargs.update({"timeout": timeout, "index_name": index_name})
func_name = sys._getframe().f_code.co_name
res, check = api_request([self.collection.create_index, field_name, index_params], **kwargs)
@ -229,16 +231,22 @@ class ApiCollectionWrapper:
return res, check_result
@trace()
def has_index(self, check_task=None, check_items=None):
def has_index(self, index_name=None, check_task=None, check_items=None, **kwargs):
index_name = INDEX_NAME if index_name is None else index_name
index_name = kwargs.get("index_name", index_name)
kwargs.update({"index_name": index_name})
func_name = sys._getframe().f_code.co_name
res, check = api_request([self.collection.has_index])
check_result = ResponseChecker(res, func_name, check_task, check_items, check).run()
res, check = api_request([self.collection.has_index], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result
@trace()
def drop_index(self, check_task=None, check_items=None, **kwargs):
def drop_index(self, index_name=None, check_task=None, check_items=None, **kwargs):
timeout = kwargs.get("timeout", TIMEOUT)
kwargs.update({"timeout": timeout})
index_name = INDEX_NAME if index_name is None else index_name
index_name = kwargs.get("index_name", index_name)
kwargs.update({"timeout": timeout, "index_name": index_name})
func_name = sys._getframe().f_code.co_name
res, check = api_request([self.collection.drop_index], **kwargs)

View File

@ -6,15 +6,19 @@ from check.func_check import ResponseChecker
from utils.api_request import api_request
TIMEOUT = 20
TIMEOUT = 20
INDEX_NAME = "_default_idx"
class ApiIndexWrapper:
index = None
def init_index(self, collection, field_name, index_params, check_task=None, check_items=None, **kwargs):
def init_index(self, collection, field_name, index_params, index_name=None, check_task=None, check_items=None, **kwargs):
timeout = kwargs.get("timeout", TIMEOUT * 2)
kwargs.update({"timeout": timeout})
index_name = INDEX_NAME if index_name is None else index_name
index_name = kwargs.get("index_name", index_name)
kwargs.update({"timeout": timeout, "index_name": index_name})
""" In order to distinguish the same name of index """
func_name = sys._getframe().f_code.co_name
res, is_succ = api_request([Index, collection, field_name, index_params], **kwargs)
@ -24,9 +28,11 @@ class ApiIndexWrapper:
index_params=index_params, **kwargs).run()
return res, check_result
def drop(self, check_task=None, check_items=None, **kwargs):
def drop(self, index_name=None ,check_task=None, check_items=None, **kwargs):
timeout = kwargs.get("timeout", TIMEOUT)
kwargs.update({"timeout": timeout})
index_name = INDEX_NAME if index_name is None else index_name
index_name = kwargs.get("index_name", index_name)
kwargs.update({"timeout": timeout, "index_name": index_name})
func_name = sys._getframe().f_code.co_name
res, is_succ = api_request([self.index.drop], **kwargs)

View File

@ -9,7 +9,7 @@ allure-pytest==2.7.0
pytest-print==0.2.1
pytest-level==0.1.1
pytest-xdist==2.2.1
pymilvus==2.1.0.dev50
pymilvus==2.1.0.dev56
pytest-rerunfailures==9.1.1
git+https://github.com/Projectplace/pytest-tags
ndg-httpsclient

View File

@ -23,6 +23,13 @@ uid = "test_index"
# BUILD_TIMEOUT = 300
field_name = default_float_vec_field_name
binary_field_name = default_binary_vec_field_name
default_string_field_name =ct.default_string_field_name
index_name1=cf.gen_unique_str("float")
index_name2=cf.gen_unique_str("varhar")
index_name3=cf.gen_unique_str("binary")
default_string_index_params ={}
default_binary_schema = cf.gen_default_binary_collection_schema()
default_binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}}
# query = gen_search_vectors_params(field_name, default_entities, default_top_k, 1)
default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"}
@ -615,8 +622,6 @@ class TestIndexBase:
index = connect.describe_index(collection, "")
assert not index # FLAT is the last index_type, drop all indexes in server
@pytest.mark.tags(CaseLabel.L2)
# @pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_different_index_repeatedly_B(self, connect, collection):
"""
target: check if index can be created repeatedly, with the different create_index params
@ -1310,3 +1315,188 @@ class TestIndexAsync:
res = future.result()
# TODO:
log.info(res)
class TestIndexString(TestcaseBase):
"""
******************************************************************
The following cases are used to test create index about string
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_string_field(self):
"""
target: test create index with string field is not primary
method: 1.create collection and insert data
2.only create an index with string field is not primary
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params)
cf.assert_equal_index(index, collection_w.indexes[0])
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_string_before_load(self):
"""
target: test create index with string field before load
method: 1.create collection and insert data
2.create an index with string field before load
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(ct.default_nb)
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params)
cf.assert_equal_index(index, collection_w.collection.indexes[0])
collection_w.load()
assert collection_w.num_entities==default_nb
@pytest.mark.tags(CaseLabel.L1)
def test_load_after_create_index_with_string(self):
"""
target: test load after create index with string field
method: 1.create collection and insert data
2.collection load after create index with string field
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(ct.default_nb)
collection_w.insert(data=data)
collection_w.load()
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params)
cf.assert_equal_index(index, collection_w.collection.indexes[0])
assert collection_w.num_entities==default_nb
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_string_field_is_primary(self):
"""
target: test create index with string field is primary
method: 1.create collection
2.insert data
3.only create an index with string field is primary
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_string_pk_default_collection_schema()
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params)
cf.assert_equal_index(index, collection_w.collection.indexes[0])
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_or_not_with_string_field(self):
"""
target: test create index, half of the string fields are indexed and half are not
method: 1.create collection
2.insert data
3.half of the indexes are created and half are not in the string fields
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
string_fields = [cf.gen_string_field(name="test_string")]
schema = cf.gen_schema_multi_string_fields(string_fields)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
df = cf.gen_dataframe_multi_string_fields(string_fields=string_fields)
collection_w.insert(df)
self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_same_index_name(self):
"""
target: test create index with different fields use same index name
method: 1.create collection
2.insert data
3.only create index with different fields use same index name
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
collection_w.create_index(default_float_vec_field_name, default_index_params,
index_name=index_name2,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: "CreateIndex failed"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_different_index_fields(self):
"""
target: test create index with different fields
method: 1.create collection
2.insert data
3.create different indexes with string and float vector field
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(default_float_vec_field_name, default_index_params, index_name=index_name1)
assert collection_w.has_index(index_name=index_name1)[0]==True
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
assert collection_w.has_index(index_name=index_name2)[0]==True
assert len(collection_w.collection.indexes)==2
@pytest.mark.tags(CaseLabel.L1)
def test_create_different_index_binary_fields(self):
"""
target: testing the creation of indexes with string and binary fields
method: 1.create collection
2.insert data
3.create different indexes with string and binary vector field
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data()
collection_w.insert(data=df)
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
assert collection_w.has_index(index_name=index_name2)[0]==True
collection_w.create_index(default_binary_vec_field_name, default_binary_index_params, index_name=index_name3)
assert collection_w.has_index(index_name=index_name3)[0]==True
assert len(collection_w.collection.indexes)==2
@pytest.mark.tags(CaseLabel.L1)
def test_drop_index_with_string_field(self):
"""
target: test drop index with string field
method: 1.create collection and insert data
2.create index and use index.drop() drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params)
cf.assert_equal_index(index, collection_w.collection.indexes[0])
self.index_wrap.drop()
assert len(collection_w.collection.indexes) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_collection_drop_index_with_string(self):
"""
target: test drop index with string field
method: 1.create collection and insert data
2.create index and uses collection.drop_index () drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
collection_w.drop_index(index_name=index_name2)
assert len(collection_w.collection.indexes) == 0

View File

@ -16,6 +16,9 @@ import utils.util_pymilvus as ut
prefix = "query"
exp_res = "exp_res"
default_term_expr = f'{ct.default_int64_field_name} in [0, 1]'
default_mix_expr = "int64 >= 0 && varchar >= \"0\""
default_invaild_expr = "varchar >= 0"
default_string_term_expr = f'{ct.default_string_field_name} in [\"0\", \"1\"]'
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}}
@ -23,6 +26,7 @@ default_entities = ut.gen_entities(ut.default_nb, is_normal=True)
default_pos = 5
default_int_field_name = "int64"
default_float_field_name = "float"
default_string_field_name = "varchar"
class TestQueryParams(TestcaseBase):
@ -237,8 +241,9 @@ class TestQueryParams(TestcaseBase):
ct.default_int64_field_name: pd.Series(data=[i for i in range(ct.default_nb)]),
ct.default_int32_field_name: pd.Series(data=[np.int32(i) for i in range(ct.default_nb)], dtype="int32"),
ct.default_int16_field_name: pd.Series(data=[np.int16(i) for i in range(ct.default_nb)], dtype="int16"),
ct.default_float_field_name: pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
ct.default_float_field_name: pd.Series(data=[np.float32(i) for i in range(ct.default_nb)], dtype="float32"),
ct.default_double_field_name: pd.Series(data=[np.double(i) for i in range(ct.default_nb)], dtype="double"),
ct.default_string_field_name: pd.Series(data=[str(i) for i in range(ct.default_nb)], dtype="string"),
ct.default_float_vec_field_name: cf.gen_vectors(ct.default_nb, ct.default_dim)
})
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
@ -248,7 +253,7 @@ class TestQueryParams(TestcaseBase):
# query by non_primary non_vector scalar field
non_primary_field = [ct.default_int32_field_name, ct.default_int16_field_name,
ct.default_float_field_name, ct.default_double_field_name]
ct.default_float_field_name, ct.default_double_field_name, ct.default_string_field_name]
# exp res: first two rows and all fields expect last vec field
res = df.iloc[:2, :-1].to_dict('records')
@ -1190,3 +1195,84 @@ class TestQueryOperation(TestcaseBase):
collection_w.query(f'{ct.default_int64_field_name} in [1]',
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
class TestqueryString(TestcaseBase):
"""
******************************************************************
The following cases are used to test query with string
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_is_not_primary(self):
"""
target: test query data with string field is not primary
method: create collection and insert data
collection.load()
query with string expr in string field is not primary
expected: query successfully
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2]
res = vectors[0].iloc[:2, :3].to_dict('records')
output_fields = [default_float_field_name, default_string_field_name]
collection_w.query(default_string_term_expr, output_fields=output_fields,
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(default_string_field_name))
def test_query_string_is_primary(self, expression):
"""
target: test query with output field only primary field
method: specify string primary field as output field
expected: return string primary field
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True, primary_field=ct.default_string_field_name)[0:2]
res, _ = collection_w.query(expression, output_fields=[ct.default_string_field_name])
assert list(res[0].keys()) == [ct.default_string_field_name]
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_with_mix_expr(self):
"""
target: test query data
method: create collection and insert data
query with mix expr in string field and int field
expected: query successfully
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True, primary_field=ct.default_string_field_name)[0:2]
res = vectors[0].iloc[:, 1:3].to_dict('records')
output_fields = [default_float_field_name, default_string_field_name]
collection_w.query(default_mix_expr, output_fields=output_fields,
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expression", cf.gen_invaild_string_expressions())
def test_query_with_invalid_string_expr(self, expression):
"""
target: test query data
method: create collection and insert data
query with invalid expr
expected: Raise exception
"""
collection_w = self.init_collection_general(prefix, insert_data=True)[0]
collection_w.query(expression, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: "type mismatch"})
@pytest.mark.tags(CaseLabel.L1)
def test_query_string_expr_with_binary(self):
"""
target: test query string expr with binary
method: query string expr with binary
expected: verify query successfully
"""
collection_w, vectors= self.init_collection_general(prefix, insert_data=True, is_binary=True, is_index=True)[0:2]
collection_w.create_index(ct.default_binary_vec_field_name, binary_index_params)
collection_w.load()
assert collection_w.has_index()[0]
res, _ = collection_w.query(default_string_term_expr, output_fields=[ct.default_binary_vec_field_name])
assert len(res) == 2