Add cases for term not in and query non-primary field (#7554)

Signed-off-by: ThreadDao <yufen.zong@zilliz.com>
pull/7567/head
ThreadDao 2021-09-08 12:10:00 +08:00 committed by GitHub
parent a560aefc36
commit 416bfeafc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 254 additions and 61 deletions

View File

@ -116,14 +116,38 @@ Collections:
testcase: testcase:
name: test_etcd_podkill name: test_etcd_podkill
chaos: chaos_etcd_podkill.yaml chaos: chaos_etcd_podkill.yaml
expectation:
cluster_1_node:
create: fail
insert: fail
flush: fail
index: fail
search: fail
query: fail
- -
testcase: testcase:
name: test_minio_podkill name: test_minio_podkill
chaos: chaos_minio_podkill.yaml chaos: chaos_minio_podkill.yaml
expectation:
cluster_1_node:
create: fail
insert: fail
flush: fail
index: fail
search: fail
query: fail
- -
testcase: testcase:
name: test_pulsar_podkill name: test_pulsar_podkill
chaos: chaos_minio_podkill.yaml chaos: chaos_pulsar_podkill.yaml
expectation:
cluster_1_node:
create: fail
insert: fail
flush: fail
index: fail
search: fail
query: fail
- -
testcase: testcase:
name: test_querynode_cpu100p name: test_querynode_cpu100p

View File

@ -14,7 +14,9 @@ import threading
import traceback import traceback
"""" Methods of processing data """ """" Methods of processing data """
#l2 = lambda x, y: np.linalg.norm(np.array(x) - np.array(y))
# l2 = lambda x, y: np.linalg.norm(np.array(x) - np.array(y))
def gen_unique_str(str_value=None): def gen_unique_str(str_value=None):
@ -187,6 +189,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
bool_values = pd.Series(data=[np.bool(i) for i in range(start, start + nb)], dtype="bool") bool_values = pd.Series(data=[np.bool(i) for i in range(start, start + nb)], dtype="bool")
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32") float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double") double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
# string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
float_vec_values = gen_vectors(nb, dim) float_vec_values = gen_vectors(nb, dim)
df = pd.DataFrame({ df = pd.DataFrame({
ct.default_int64_field_name: int64_values, ct.default_int64_field_name: int64_values,
@ -195,6 +198,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
ct.default_int8_field_name: int8_values, ct.default_int8_field_name: int8_values,
ct.default_bool_field_name: bool_values, ct.default_bool_field_name: bool_values,
ct.default_float_field_name: float_values, ct.default_float_field_name: float_values,
# ct.default_string_field_name: string_values,
ct.default_double_field_name: double_values, ct.default_double_field_name: double_values,
ct.default_float_vec_field_name: float_vec_values ct.default_float_vec_field_name: float_vec_values
}) })
@ -297,6 +301,7 @@ def gen_invaild_search_params_type():
search_params.append(annoy_search_param) search_params.append(annoy_search_param)
return search_params return search_params
def gen_search_param(index_type, metric_type="L2"): def gen_search_param(index_type, metric_type="L2"):
search_params = [] search_params = []
if index_type in ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_SQ8H", "IVF_PQ"] \ if index_type in ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_SQ8H", "IVF_PQ"] \
@ -321,6 +326,7 @@ def gen_search_param(index_type, metric_type="L2"):
raise Exception("Invalid index_type.") raise Exception("Invalid index_type.")
return search_params return search_params
def gen_all_type_fields(): def gen_all_type_fields():
fields = [] fields = []
for k, v in DataType.__members__.items(): for k, v in DataType.__members__.items():
@ -385,11 +391,13 @@ def tanimoto(x, y):
y = np.asarray(y, np.bool) y = np.asarray(y, np.bool)
return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())) return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
def tanimoto_calc(x, y): def tanimoto_calc(x, y):
x = np.asarray(x, np.bool) x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool) y = np.asarray(y, np.bool)
return np.double((len(x) - np.bitwise_xor(x, y).sum())) / (len(y) + np.bitwise_xor(x, y).sum()) return np.double((len(x) - np.bitwise_xor(x, y).sum())) / (len(y) + np.bitwise_xor(x, y).sum())
def substructure(x, y): def substructure(x, y):
x = np.asarray(x, np.bool) x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool) y = np.asarray(y, np.bool)
@ -401,6 +409,7 @@ def superstructure(x, y):
y = np.asarray(y, np.bool) y = np.asarray(y, np.bool)
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x) return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
def compare_distance_2d_vector(x, y, distance, metric, sqrt): def compare_distance_2d_vector(x, y, distance, metric, sqrt):
for i in range(len(x)): for i in range(len(x)):
for j in range(len(y)): for j in range(len(y)):

View File

@ -26,6 +26,7 @@ default_int32_field_name = "int32"
default_int64_field_name = "int64" default_int64_field_name = "int64"
default_float_field_name = "float" default_float_field_name = "float"
default_double_field_name = "double" default_double_field_name = "double"
default_string_field_name = "string"
default_float_vec_field_name = "float_vector" default_float_vec_field_name = "float_vector"
another_float_vec_field_name = "float_vector1" another_float_vec_field_name = "float_vector1"
default_binary_vec_field_name = "binary_vector" default_binary_vec_field_name = "binary_vector"

View File

@ -11,7 +11,6 @@ from common.common_type import CaseLabel, CheckTasks
from utils.utils import * from utils.utils import *
from common.constants import * from common.constants import *
prefix = "collection" prefix = "collection"
exp_name = "name" exp_name = "name"
exp_schema = "schema" exp_schema = "schema"
@ -39,7 +38,6 @@ default_single_query = {
} }
class TestCollectionParams(TestcaseBase): class TestCollectionParams(TestcaseBase):
""" Test case of collection interface """ """ Test case of collection interface """
@ -601,8 +599,9 @@ class TestCollectionParams(TestcaseBase):
int_field_one = cf.gen_int64_field(is_primary=True) int_field_one = cf.gen_int64_field(is_primary=True)
int_field_two = cf.gen_int64_field(name="int2", is_primary=True) int_field_two = cf.gen_int64_field(name="int2", is_primary=True)
error = {ct.err_code: 0, ct.err_msg: "Primary key field can only be one."} error = {ct.err_code: 0, ct.err_msg: "Primary key field can only be one."}
self.collection_schema_wrap.init_collection_schema(fields=[int_field_one, int_field_two, cf.gen_float_vec_field()], self.collection_schema_wrap.init_collection_schema(
check_task=CheckTasks.err_res, check_items=error) fields=[int_field_one, int_field_two, cf.gen_float_vec_field()],
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_collection_primary_inconsistent(self): def test_collection_primary_inconsistent(self):
@ -821,7 +820,8 @@ class TestCollectionParams(TestcaseBase):
int_field, _ = self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=DataType.INT64, int_field, _ = self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=DataType.INT64,
dim=ct.default_dim) dim=ct.default_dim)
float_vec_field = cf.gen_float_vec_field() float_vec_field = cf.gen_float_vec_field()
schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field=ct.default_int64_field_name) schema = cf.gen_collection_schema(fields=[int_field, float_vec_field],
primary_field=ct.default_int64_field_name)
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property,
check_items={exp_name: c_name, exp_schema: schema}) check_items={exp_name: c_name, exp_schema: schema})
@ -962,6 +962,26 @@ class TestCollectionOperation(TestcaseBase):
check_items={exp_name: c_name, exp_schema: default_schema}) check_items={exp_name: c_name, exp_schema: default_schema})
assert self.utility_wrap.has_collection(c_name)[0] assert self.utility_wrap.has_collection(c_name)[0]
@pytest.mark.tags(CaseLabel.L2)
def test_collection_all_datatype_fields(self):
"""
target: test create collection with all dataType fields
method: create collection with all dataType schema
expected: create successfully
"""
self._connect()
fields = []
for k, v in DataType.__members__.items():
if v and v != DataType.UNKNOWN and v != DataType.FLOAT_VECTOR and v != DataType.BINARY_VECTOR:
field, _ = self.field_schema_wrap.init_field_schema(name=k.lower(), dtype=v)
fields.append(field)
fields.append(cf.gen_float_vec_field())
schema, _ = self.collection_schema_wrap.init_collection_schema(fields,
primary_field=ct.default_int64_field_name)
c_name = cf.gen_unique_str(prefix)
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property,
check_items={exp_name: c_name, exp_schema: schema})
class TestCollectionDataframe(TestcaseBase): class TestCollectionDataframe(TestcaseBase):
""" """
@ -1519,7 +1539,7 @@ class TestCollectionMultiCollections:
stats = connect.get_collection_stats(collection_list[i]) stats = connect.get_collection_stats(collection_list[i])
assert stats[row_count] == default_nb assert stats[row_count] == default_nb
connect.drop_collection(collection_list[i]) connect.drop_collection(collection_list[i])
class TestGetCollectionStats: class TestGetCollectionStats:
""" """
@ -1788,7 +1808,7 @@ class TestGetCollectionStats:
connect.insert(collection, entities, partition_name=default_tag) connect.insert(collection, entities, partition_name=default_tag)
connect.flush([collection]) connect.flush([collection])
stats = connect.get_collection_stats(collection) stats = connect.get_collection_stats(collection)
assert stats[row_count] == insert_count*2 assert stats[row_count] == insert_count * 2
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
def test_get_collection_stats_partitions_D(self, connect, collection, insert_count): def test_get_collection_stats_partitions_D(self, connect, collection, insert_count):
@ -1806,7 +1826,7 @@ class TestGetCollectionStats:
connect.insert(collection, entities, partition_name=new_tag) connect.insert(collection, entities, partition_name=new_tag)
connect.flush([collection]) connect.flush([collection])
stats = connect.get_collection_stats(collection) stats = connect.get_collection_stats(collection)
assert stats[row_count] == insert_count*2 assert stats[row_count] == insert_count * 2
# TODO: assert metric type in stats response # TODO: assert metric type in stats response
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
@ -1902,7 +1922,7 @@ class TestGetCollectionStats:
assert index == index_2 assert index == index_2
# break # break
connect.drop_collection(collection_list[i]) connect.drop_collection(collection_list[i])
class TestCreateCollection: class TestCreateCollection:
""" """
@ -2090,7 +2110,7 @@ class TestCreateCollectionInvalid(object):
assert code == 1 assert code == 1
message = getattr(e, 'message', "The exception does not contain the field of message.") message = getattr(e, 'message', "The exception does not contain the field of message.")
assert message == "maximum field's number should be limited to 64" assert message == "maximum field's number should be limited to 64"
class TestDescribeCollection: class TestDescribeCollection:
@ -2124,6 +2144,7 @@ class TestDescribeCollection:
The following cases are used to test `describe_collection` function, no data in collection The following cases are used to test `describe_collection` function, no data in collection
****************************************************************** ******************************************************************
""" """
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
def test_collection_fields(self, connect, get_filter_field, get_vector_field): def test_collection_fields(self, connect, get_filter_field, get_vector_field):
''' '''
@ -2216,6 +2237,7 @@ class TestDescribeCollection:
The following cases are used to test `describe_collection` function, and insert data in collection The following cases are used to test `describe_collection` function, and insert data in collection
****************************************************************** ******************************************************************
""" """
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
def test_describe_collection_fields_after_insert(self, connect, get_filter_field, get_vector_field): def test_describe_collection_fields_after_insert(self, connect, get_filter_field, get_vector_field):
''' '''
@ -2243,12 +2265,13 @@ class TestDescribeCollection:
elif field["type"] == vector_field: elif field["type"] == vector_field:
assert field["name"] == vector_field["name"] assert field["name"] == vector_field["name"]
assert field["params"] == vector_field["params"] assert field["params"] == vector_field["params"]
class TestDescribeCollectionInvalid(object): class TestDescribeCollectionInvalid(object):
""" """
Test describe collection with invalid params Test describe collection with invalid params
""" """
@pytest.fixture( @pytest.fixture(
scope="function", scope="function",
params=gen_invalid_strs() params=gen_invalid_strs()
@ -2367,8 +2390,8 @@ class TestDropCollectionInvalid(object):
def test_drop_collection_with_empty_or_None_collection_name(self, connect, collection_name): def test_drop_collection_with_empty_or_None_collection_name(self, connect, collection_name):
with pytest.raises(Exception) as e: with pytest.raises(Exception) as e:
connect.has_collection(collection_name) connect.has_collection(collection_name)
class TestHasCollection: class TestHasCollection:
""" """
****************************************************************** ******************************************************************
@ -2415,6 +2438,7 @@ class TestHasCollection:
def has(): def has():
assert connect.has_collection(collection_name) assert connect.has_collection(collection_name)
# assert not assert_collection(connect, collection_name) # assert not assert_collection(connect, collection_name)
for i in range(threads_num): for i in range(threads_num):
t = MyThread(target=has, args=()) t = MyThread(target=has, args=())
threads.append(t) threads.append(t)
@ -2428,6 +2452,7 @@ class TestHasCollectionInvalid(object):
""" """
Test has collection with invalid params Test has collection with invalid params
""" """
@pytest.fixture( @pytest.fixture(
scope="function", scope="function",
params=gen_invalid_strs() params=gen_invalid_strs()
@ -2452,7 +2477,7 @@ class TestHasCollectionInvalid(object):
collection_name = None collection_name = None
with pytest.raises(Exception) as e: with pytest.raises(Exception) as e:
connect.has_collection(collection_name) connect.has_collection(collection_name)
class TestListCollections: class TestListCollections:
""" """
@ -2785,7 +2810,7 @@ class TestLoadCollection:
with pytest.raises(Exception): with pytest.raises(Exception):
connect.search(collection, default_single_query) connect.search(collection, default_single_query)
# assert len(res[0]) == 0 # assert len(res[0]) == 0
class TestReleaseAdvanced: class TestReleaseAdvanced:
@ -2917,7 +2942,7 @@ class TestReleaseAdvanced:
expected: expected:
""" """
pass pass
class TestLoadCollectionInvalid(object): class TestLoadCollectionInvalid(object):
""" """
@ -2942,7 +2967,7 @@ class TestLoadCollectionInvalid(object):
collection_name = get_collection_name collection_name = get_collection_name
with pytest.raises(Exception) as e: with pytest.raises(Exception) as e:
connect.release_collection(collection_name) connect.release_collection(collection_name)
class TestLoadPartition: class TestLoadPartition:
""" """
@ -3140,8 +3165,3 @@ class TestLoadPartitionInvalid(object):
partition_name = get_partition_name partition_name = get_partition_name
with pytest.raises(Exception) as e: with pytest.raises(Exception) as e:
connect.load_partitions(collection, [partition_name]) connect.load_partitions(collection, [partition_name])

View File

@ -691,6 +691,22 @@ class TestInsertOperation(TestcaseBase):
assert collection_w.num_entities == ct.default_nb assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="issue #7513")
def test_insert_all_datatype_collection(self):
"""
target: test insert into collection that contains all datatype fields
method: 1.create all datatype collection 2.insert data
expected: verify num entities
"""
self._connect()
# need to add string field
df = cf.gen_dataframe_all_data_type()
log.debug(df.head(3))
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
primary_field=ct.default_int64_field_name)
assert self.collection_wrap.num_entities == ct.default_nb
class TestInsertAsync(TestcaseBase): class TestInsertAsync(TestcaseBase):
""" """

View File

@ -1,5 +1,7 @@
import pytest import pytest
import random import random
import numpy as np
import pandas as pd
from pymilvus import DefaultConfig from pymilvus import DefaultConfig
from base.client_base import TestcaseBase from base.client_base import TestcaseBase
@ -82,7 +84,7 @@ class TestQueryBase(TestcaseBase):
check_items={exp_res: res[:1]}) check_items={exp_res: res[:1]})
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_query_auto_id_not_existed_primary_key(self): def test_query_auto_id_not_existed_primary_values(self):
""" """
target: test query on auto_id true collection target: test query on auto_id true collection
method: 1.create auto_id true collection 2.query with not existed primary keys method: 1.create auto_id true collection 2.query with not existed primary keys
@ -111,7 +113,7 @@ class TestQueryBase(TestcaseBase):
collection_w.query(None, check_task=CheckTasks.err_res, check_items=error) collection_w.query(None, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_query_expr_non_string(self): def test_query_non_string_expr(self):
""" """
target: test query with non-string expr target: test query with non-string expr
method: query with non-string expr, eg 1, [] .. method: query with non-string expr, eg 1, [] ..
@ -161,34 +163,87 @@ class TestQueryBase(TestcaseBase):
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_query_expr_unsupported_field(self): def test_query_expr_non_primary_fields(self):
""" """
target: test query on unsupported field target: test query on non-primary non-vector fields
method: query on float field method: query on non-primary non-vector fields
expected: raise exception expected: verify query result
""" """
collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) self._connect()
term_expr = f'{ct.default_float_field_name} in [1., 2.]' # construct dataframe and inert data
error = {ct.err_code: 1, ct.err_msg: "column is not int64"} df = pd.DataFrame({
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) ct.default_int64_field_name: pd.Series(data=[i for i in range(ct.default_nb)]),
ct.default_int32_field_name: pd.Series(data=[np.int32(i) for i in range(ct.default_nb)], dtype="int32"),
ct.default_int16_field_name: pd.Series(data=[np.int16(i) for i in range(ct.default_nb)], dtype="int16"),
ct.default_float_field_name: pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
ct.default_double_field_name: pd.Series(data=[np.double(i) for i in range(ct.default_nb)], dtype="double"),
ct.default_float_vec_field_name: cf.gen_vectors(ct.default_nb, ct.default_dim)
})
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
primary_field=ct.default_int64_field_name)
assert self.collection_wrap.num_entities == ct.default_nb
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L1) # query by non_primary non_vector scalar field
def test_query_expr_non_primary_field(self): non_primary_field = [ct.default_int32_field_name, ct.default_int16_field_name,
ct.default_float_field_name, ct.default_double_field_name]
# exp res: first two rows and all fields expect last vec field
res = df.iloc[:2, :-1].to_dict('records')
for field in non_primary_field:
filter_values = df[field].tolist()[:2]
term_expr = f'{field} in {filter_values}'
self.collection_wrap.query(term_expr, output_fields=["*"],
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="issue #7521 #7522")
def test_query_expr_by_bool_field(self):
""" """
target: test query on non-primary field target: test query by bool field and output binary field
method: query on non-primary int field method: 1.create and insert with [int64, float, bool, float_vec] fields
expected: raise exception 2.query by bool field, and output all int64, bool fields
expected: verify query result and output fields
""" """
fields = [cf.gen_int64_field(), cf.gen_int64_field(name='int2', is_primary=True), cf.gen_float_vec_field()] self._connect()
schema = cf.gen_collection_schema(fields) df = cf.gen_default_dataframe_data()
collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) bool_values = pd.Series(data=[True if i % 2 == 0 else False for i in range(ct.default_nb)], dtype="bool")
nb = 100 df.insert(2, ct.default_bool_field_name, bool_values)
data = [[i for i in range(nb)], [i for i in range(nb)], cf.gen_vectors(nb, ct.default_dim)] self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
collection_w.insert(data) primary_field=ct.default_int64_field_name)
assert collection_w.num_entities == nb assert self.collection_wrap.num_entities == ct.default_nb
assert collection_w.primary_field.name == 'int2' self.collection_wrap.load()
error = {ct.err_code: 1, ct.err_msg: "column is not primary key"} term_expr = f'{ct.default_bool_field_name} in [True]'
collection_w.query(default_term_expr, check_task=CheckTasks.err_res, check_items=error) res, _ = self.collection_wrap.query(term_expr, output_fields=[ct.default_bool_field_name])
assert len(res) == ct.default_nb / 2
assert set(res[0].keys()) == set(ct.default_int64_field_name, ct.default_bool_field_name)
@pytest.mark.tags(CaseLabel.L2)
def test_query_expr_by_int8_field(self):
"""
target: test query by int8 field
method: 1.create and insert with [int64, float, int8, float_vec] fields
2.query by int8 field, and output all scalar fields
expected: verify query result
"""
self._connect()
# construct collection from dataFrame according to [int64, float, int8, float_vec]
df = cf.gen_default_dataframe_data()
int8_values = pd.Series(data=[np.int8(i) for i in range(ct.default_nb)], dtype="int8")
df.insert(2, ct.default_int8_field_name, int8_values)
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
primary_field=ct.default_int64_field_name)
assert self.collection_wrap.num_entities == ct.default_nb
# query expression
term_expr = f'{ct.default_int8_field_name} in {[0]}'
# expected query result
res = []
# int8 range [-128, 127] so when nb=1200, there are many repeated int8 values equal to 0
for i in range(0, ct.default_nb, 256):
res.extend(df.iloc[i:i + 1, :-1].to_dict('records'))
self.collection_wrap.load()
self.collection_wrap.query(term_expr, output_fields=["*"],
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_query_expr_wrong_term_keyword(self): def test_query_expr_wrong_term_keyword(self):
@ -202,15 +257,84 @@ class TestQueryBase(TestcaseBase):
error_1 = {ct.err_code: 1, ct.err_msg: f'unexpected token Identifier("inn")'} error_1 = {ct.err_code: 1, ct.err_msg: f'unexpected token Identifier("inn")'}
collection_w.query(expr_1, check_task=CheckTasks.err_res, check_items=error_1) collection_w.query(expr_1, check_task=CheckTasks.err_res, check_items=error_1)
# TODO(yukun): "not in" is supported now
# expr_2 = f'{ct.default_int64_field_name} not in [1, 2]'
# error_2 = {ct.err_code: 1, ct.err_msg: 'not top level term'}
# collection_w.query(expr_2, check_task=CheckTasks.err_res, check_items=error_2)
expr_3 = f'{ct.default_int64_field_name} in not [1, 2]' expr_3 = f'{ct.default_int64_field_name} in not [1, 2]'
error_3 = {ct.err_code: 1, ct.err_msg: 'right operand of the InExpr must be array'} error_3 = {ct.err_code: 1, ct.err_msg: 'right operand of the InExpr must be array'}
collection_w.query(expr_3, check_task=CheckTasks.err_res, check_items=error_3) collection_w.query(expr_3, check_task=CheckTasks.err_res, check_items=error_3)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("field", [ct.default_int64_field_name, ct.default_float_field_name])
def test_query_expr_not_in_term(self, field):
"""
target: test query with `not in` expr
method: query with not in expr
expected: verify query result
"""
self._connect()
df = cf.gen_default_dataframe_data()
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
primary_field=ct.default_int64_field_name)
assert self.collection_wrap.num_entities == ct.default_nb
self.collection_wrap.load()
values = df[field].tolist()
pos = 100
term_expr = f'{field} not in {values[pos:]}'
res = df.iloc[:pos, :2].to_dict('records')
self.collection_wrap.query(term_expr, output_fields=["*"],
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("pos", [0, ct.default_nb])
def test_query_expr_not_in_empty_and_all(self, pos):
self._connect()
df = cf.gen_default_dataframe_data()
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
primary_field=ct.default_int64_field_name)
assert self.collection_wrap.num_entities == ct.default_nb
self.collection_wrap.load()
int64_values = df[ct.default_int64_field_name].tolist()
term_expr = f'{ct.default_int64_field_name} not in {int64_values[pos:]}'
res = df.iloc[:pos, :1].to_dict('records')
self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tag(CaseLabel.L1)
@pytest.mark.xfail(reason="issue #7544")
def test_query_expr_random_values(self):
"""
target: test query with random filter values
method: query with random filter values, like [0, 2, 4, 3]
expected: correct query result
"""
self._connect()
df = cf.gen_default_dataframe_data(nb=100)
log.debug(df.head(5))
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
primary_field=ct.default_int64_field_name)
assert self.collection_wrap.num_entities == 100
self.collection_wrap.load()
# random_values = [random.randint(0, ct.default_nb) for _ in range(4)]
random_values = [0, 2, 4, 0]
term_expr = f'{ct.default_int64_field_name} not in {random_values}'
res = df.iloc[random_values, :1].to_dict('records')
self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.xfail(reason="issue #7553")
def test_query_expr_not_in_random(self):
self._connect()
df = cf.gen_default_dataframe_data(nb=50)
log.debug(df.head(5))
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
primary_field=ct.default_int64_field_name)
assert self.collection_wrap.num_entities == 50
self.collection_wrap.load()
random_values = [i for i in range(10, 50)]
log.debug(f'random values: {random_values}')
random.shuffle(random_values)
term_expr = f'{ct.default_int64_field_name} not in {random_values}'
res = df.iloc[:10, :1].to_dict('records')
self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_query_expr_non_array_term(self): def test_query_expr_non_array_term(self):
""" """
@ -640,7 +764,7 @@ class TestQueryOperation(TestcaseBase):
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
# @pytest.mark.parametrize("collection_name, data", # @pytest.mark.parametrize("collection_name, data",
# [(cf.gen_unique_str(prefix), cf.gen_default_list_data(ct.default_nb))]) # [(cf.gen_unique_str(prefix), cf.gen_default_list_data(ct.default_nb))])
def test_query_without_loading(self): def test_query_without_loading(self):
""" """
target: test query without loading target: test query without loading
@ -730,13 +854,12 @@ class TestQueryOperation(TestcaseBase):
res, _ = collection_w.query(term_expr) res, _ = collection_w.query(term_expr)
assert len(res) == len(int_values) assert len(res) == len(int_values)
@pytest.mark.xfail(reason="fail")
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
def test_query_expr_repeated_term_array(self): def test_query_expr_repeated_term_array(self):
""" """
target: test query with repeated term array on primary field with unique value target: test query with repeated term array on primary field with unique value
method: query with repeated array value method: query with repeated array value
expected: todo expected: return hit entities, no repeated
""" """
collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)[0:3] collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)[0:3]
int_values = [0, 0, 0, 0] int_values = [0, 0, 0, 0]
@ -746,7 +869,6 @@ class TestQueryOperation(TestcaseBase):
assert res[0][ct.default_int64_field_name] == int_values[0] assert res[0][ct.default_int64_field_name] == int_values[0]
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="issue #6624")
def test_query_dup_ids_dup_term_array(self): def test_query_dup_ids_dup_term_array(self):
""" """
target: test query on duplicate primary keys with dup term array target: test query on duplicate primary keys with dup term array
@ -755,14 +877,15 @@ class TestQueryOperation(TestcaseBase):
expected: todo expected: todo
""" """
collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
df = cf.gen_default_dataframe_data(nb=ct.default_nb) df = cf.gen_default_dataframe_data(nb=100)
df[ct.default_int64_field_name] = 0 df[ct.default_int64_field_name] = 0
mutation_res, _ = collection_w.insert(df) mutation_res, _ = collection_w.insert(df)
assert mutation_res.primary_keys == df[ct.default_int64_field_name].tolist() assert mutation_res.primary_keys == df[ct.default_int64_field_name].tolist()
collection_w.load() collection_w.load()
term_expr = f'{ct.default_int64_field_name} in {[0, 0, 0]}' term_expr = f'{ct.default_int64_field_name} in {[0, 0, 0]}'
res, _ = collection_w.query(term_expr) res = df.iloc[:, :2].to_dict('records')
log.debug(res) collection_w.query(term_expr, output_fields=["*"], check_items=CheckTasks.check_query_results,
check_task={exp_res: res})
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
def test_query_after_index(self): def test_query_after_index(self):