From 416bfeafc9722c2d3f05bc9c9d27b870e0eba589 Mon Sep 17 00:00:00 2001 From: ThreadDao Date: Wed, 8 Sep 2021 12:10:00 +0800 Subject: [PATCH] Add cases for term not in and query non-primary field (#7554) Signed-off-by: ThreadDao --- ...d_podkill.yaml => chaos_etcd_podkill.yaml} | 0 ...podkill.yaml => chaos_pulsar_podkill.yaml} | 0 .../chaos/chaos_objects/testcases.yaml | 26 ++- tests/python_client/common/common_func.py | 11 +- tests/python_client/common/common_type.py | 1 + .../testcases/test_collection.py | 64 ++++-- .../python_client/testcases/test_insert_20.py | 16 ++ .../python_client/testcases/test_query_20.py | 197 ++++++++++++++---- 8 files changed, 254 insertions(+), 61 deletions(-) rename tests/python_client/chaos/chaos_objects/{skip_chaos_etcd_podkill.yaml => chaos_etcd_podkill.yaml} (100%) rename tests/python_client/chaos/chaos_objects/{skip_chaos_pulsar_podkill.yaml => chaos_pulsar_podkill.yaml} (100%) diff --git a/tests/python_client/chaos/chaos_objects/skip_chaos_etcd_podkill.yaml b/tests/python_client/chaos/chaos_objects/chaos_etcd_podkill.yaml similarity index 100% rename from tests/python_client/chaos/chaos_objects/skip_chaos_etcd_podkill.yaml rename to tests/python_client/chaos/chaos_objects/chaos_etcd_podkill.yaml diff --git a/tests/python_client/chaos/chaos_objects/skip_chaos_pulsar_podkill.yaml b/tests/python_client/chaos/chaos_objects/chaos_pulsar_podkill.yaml similarity index 100% rename from tests/python_client/chaos/chaos_objects/skip_chaos_pulsar_podkill.yaml rename to tests/python_client/chaos/chaos_objects/chaos_pulsar_podkill.yaml diff --git a/tests/python_client/chaos/chaos_objects/testcases.yaml b/tests/python_client/chaos/chaos_objects/testcases.yaml index cb4bdf742d..ebedfd442d 100644 --- a/tests/python_client/chaos/chaos_objects/testcases.yaml +++ b/tests/python_client/chaos/chaos_objects/testcases.yaml @@ -116,14 +116,38 @@ Collections: testcase: name: test_etcd_podkill chaos: chaos_etcd_podkill.yaml + expectation: + cluster_1_node: + create: fail + insert: fail + flush: fail + index: fail + search: fail + query: fail - testcase: name: test_minio_podkill chaos: chaos_minio_podkill.yaml + expectation: + cluster_1_node: + create: fail + insert: fail + flush: fail + index: fail + search: fail + query: fail - testcase: name: test_pulsar_podkill - chaos: chaos_minio_podkill.yaml + chaos: chaos_pulsar_podkill.yaml + expectation: + cluster_1_node: + create: fail + insert: fail + flush: fail + index: fail + search: fail + query: fail - testcase: name: test_querynode_cpu100p diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index ae80bfdbaf..fe78c4150f 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -14,7 +14,9 @@ import threading import traceback """" Methods of processing data """ -#l2 = lambda x, y: np.linalg.norm(np.array(x) - np.array(y)) + + +# l2 = lambda x, y: np.linalg.norm(np.array(x) - np.array(y)) def gen_unique_str(str_value=None): @@ -187,6 +189,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0): bool_values = pd.Series(data=[np.bool(i) for i in range(start, start + nb)], dtype="bool") float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32") double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double") + # string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string") float_vec_values = gen_vectors(nb, dim) df = pd.DataFrame({ ct.default_int64_field_name: int64_values, @@ -195,6 +198,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0): ct.default_int8_field_name: int8_values, ct.default_bool_field_name: bool_values, ct.default_float_field_name: float_values, + # ct.default_string_field_name: string_values, ct.default_double_field_name: double_values, ct.default_float_vec_field_name: float_vec_values }) @@ -297,6 +301,7 @@ def gen_invaild_search_params_type(): search_params.append(annoy_search_param) return search_params + def gen_search_param(index_type, metric_type="L2"): search_params = [] if index_type in ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_SQ8H", "IVF_PQ"] \ @@ -321,6 +326,7 @@ def gen_search_param(index_type, metric_type="L2"): raise Exception("Invalid index_type.") return search_params + def gen_all_type_fields(): fields = [] for k, v in DataType.__members__.items(): @@ -385,11 +391,13 @@ def tanimoto(x, y): y = np.asarray(y, np.bool) return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())) + def tanimoto_calc(x, y): x = np.asarray(x, np.bool) y = np.asarray(y, np.bool) return np.double((len(x) - np.bitwise_xor(x, y).sum())) / (len(y) + np.bitwise_xor(x, y).sum()) + def substructure(x, y): x = np.asarray(x, np.bool) y = np.asarray(y, np.bool) @@ -401,6 +409,7 @@ def superstructure(x, y): y = np.asarray(y, np.bool) return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x) + def compare_distance_2d_vector(x, y, distance, metric, sqrt): for i in range(len(x)): for j in range(len(y)): diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 2aad841e64..ed6077c8a9 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -26,6 +26,7 @@ default_int32_field_name = "int32" default_int64_field_name = "int64" default_float_field_name = "float" default_double_field_name = "double" +default_string_field_name = "string" default_float_vec_field_name = "float_vector" another_float_vec_field_name = "float_vector1" default_binary_vec_field_name = "binary_vector" diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 385f3cb99d..13fca47c50 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -11,7 +11,6 @@ from common.common_type import CaseLabel, CheckTasks from utils.utils import * from common.constants import * - prefix = "collection" exp_name = "name" exp_schema = "schema" @@ -39,7 +38,6 @@ default_single_query = { } - class TestCollectionParams(TestcaseBase): """ Test case of collection interface """ @@ -601,8 +599,9 @@ class TestCollectionParams(TestcaseBase): int_field_one = cf.gen_int64_field(is_primary=True) int_field_two = cf.gen_int64_field(name="int2", is_primary=True) error = {ct.err_code: 0, ct.err_msg: "Primary key field can only be one."} - self.collection_schema_wrap.init_collection_schema(fields=[int_field_one, int_field_two, cf.gen_float_vec_field()], - check_task=CheckTasks.err_res, check_items=error) + self.collection_schema_wrap.init_collection_schema( + fields=[int_field_one, int_field_two, cf.gen_float_vec_field()], + check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_primary_inconsistent(self): @@ -821,7 +820,8 @@ class TestCollectionParams(TestcaseBase): int_field, _ = self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=DataType.INT64, dim=ct.default_dim) float_vec_field = cf.gen_float_vec_field() - schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field=ct.default_int64_field_name) + schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], + primary_field=ct.default_int64_field_name) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema}) @@ -962,6 +962,26 @@ class TestCollectionOperation(TestcaseBase): check_items={exp_name: c_name, exp_schema: default_schema}) assert self.utility_wrap.has_collection(c_name)[0] + @pytest.mark.tags(CaseLabel.L2) + def test_collection_all_datatype_fields(self): + """ + target: test create collection with all dataType fields + method: create collection with all dataType schema + expected: create successfully + """ + self._connect() + fields = [] + for k, v in DataType.__members__.items(): + if v and v != DataType.UNKNOWN and v != DataType.FLOAT_VECTOR and v != DataType.BINARY_VECTOR: + field, _ = self.field_schema_wrap.init_field_schema(name=k.lower(), dtype=v) + fields.append(field) + fields.append(cf.gen_float_vec_field()) + schema, _ = self.collection_schema_wrap.init_collection_schema(fields, + primary_field=ct.default_int64_field_name) + c_name = cf.gen_unique_str(prefix) + self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, + check_items={exp_name: c_name, exp_schema: schema}) + class TestCollectionDataframe(TestcaseBase): """ @@ -1519,7 +1539,7 @@ class TestCollectionMultiCollections: stats = connect.get_collection_stats(collection_list[i]) assert stats[row_count] == default_nb connect.drop_collection(collection_list[i]) - + class TestGetCollectionStats: """ @@ -1788,7 +1808,7 @@ class TestGetCollectionStats: connect.insert(collection, entities, partition_name=default_tag) connect.flush([collection]) stats = connect.get_collection_stats(collection) - assert stats[row_count] == insert_count*2 + assert stats[row_count] == insert_count * 2 @pytest.mark.tags(CaseLabel.L2) def test_get_collection_stats_partitions_D(self, connect, collection, insert_count): @@ -1806,7 +1826,7 @@ class TestGetCollectionStats: connect.insert(collection, entities, partition_name=new_tag) connect.flush([collection]) stats = connect.get_collection_stats(collection) - assert stats[row_count] == insert_count*2 + assert stats[row_count] == insert_count * 2 # TODO: assert metric type in stats response @pytest.mark.tags(CaseLabel.L0) @@ -1902,7 +1922,7 @@ class TestGetCollectionStats: assert index == index_2 # break connect.drop_collection(collection_list[i]) - + class TestCreateCollection: """ @@ -2090,7 +2110,7 @@ class TestCreateCollectionInvalid(object): assert code == 1 message = getattr(e, 'message', "The exception does not contain the field of message.") assert message == "maximum field's number should be limited to 64" - + class TestDescribeCollection: @@ -2124,6 +2144,7 @@ class TestDescribeCollection: The following cases are used to test `describe_collection` function, no data in collection ****************************************************************** """ + @pytest.mark.tags(CaseLabel.L0) def test_collection_fields(self, connect, get_filter_field, get_vector_field): ''' @@ -2216,6 +2237,7 @@ class TestDescribeCollection: The following cases are used to test `describe_collection` function, and insert data in collection ****************************************************************** """ + @pytest.mark.tags(CaseLabel.L0) def test_describe_collection_fields_after_insert(self, connect, get_filter_field, get_vector_field): ''' @@ -2243,12 +2265,13 @@ class TestDescribeCollection: elif field["type"] == vector_field: assert field["name"] == vector_field["name"] assert field["params"] == vector_field["params"] - + class TestDescribeCollectionInvalid(object): """ Test describe collection with invalid params """ + @pytest.fixture( scope="function", params=gen_invalid_strs() @@ -2367,8 +2390,8 @@ class TestDropCollectionInvalid(object): def test_drop_collection_with_empty_or_None_collection_name(self, connect, collection_name): with pytest.raises(Exception) as e: connect.has_collection(collection_name) - - + + class TestHasCollection: """ ****************************************************************** @@ -2415,6 +2438,7 @@ class TestHasCollection: def has(): assert connect.has_collection(collection_name) # assert not assert_collection(connect, collection_name) + for i in range(threads_num): t = MyThread(target=has, args=()) threads.append(t) @@ -2428,6 +2452,7 @@ class TestHasCollectionInvalid(object): """ Test has collection with invalid params """ + @pytest.fixture( scope="function", params=gen_invalid_strs() @@ -2452,7 +2477,7 @@ class TestHasCollectionInvalid(object): collection_name = None with pytest.raises(Exception) as e: connect.has_collection(collection_name) - + class TestListCollections: """ @@ -2785,7 +2810,7 @@ class TestLoadCollection: with pytest.raises(Exception): connect.search(collection, default_single_query) # assert len(res[0]) == 0 - + class TestReleaseAdvanced: @@ -2917,7 +2942,7 @@ class TestReleaseAdvanced: expected: """ pass - + class TestLoadCollectionInvalid(object): """ @@ -2942,7 +2967,7 @@ class TestLoadCollectionInvalid(object): collection_name = get_collection_name with pytest.raises(Exception) as e: connect.release_collection(collection_name) - + class TestLoadPartition: """ @@ -3140,8 +3165,3 @@ class TestLoadPartitionInvalid(object): partition_name = get_partition_name with pytest.raises(Exception) as e: connect.load_partitions(collection, [partition_name]) - - - - - diff --git a/tests/python_client/testcases/test_insert_20.py b/tests/python_client/testcases/test_insert_20.py index f5e9703a94..c845827967 100644 --- a/tests/python_client/testcases/test_insert_20.py +++ b/tests/python_client/testcases/test_insert_20.py @@ -691,6 +691,22 @@ class TestInsertOperation(TestcaseBase): assert collection_w.num_entities == ct.default_nb + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue #7513") + def test_insert_all_datatype_collection(self): + """ + target: test insert into collection that contains all datatype fields + method: 1.create all datatype collection 2.insert data + expected: verify num entities + """ + self._connect() + # need to add string field + df = cf.gen_dataframe_all_data_type() + log.debug(df.head(3)) + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == ct.default_nb + class TestInsertAsync(TestcaseBase): """ diff --git a/tests/python_client/testcases/test_query_20.py b/tests/python_client/testcases/test_query_20.py index a0fafd1254..49e0e57cba 100644 --- a/tests/python_client/testcases/test_query_20.py +++ b/tests/python_client/testcases/test_query_20.py @@ -1,5 +1,7 @@ import pytest import random +import numpy as np +import pandas as pd from pymilvus import DefaultConfig from base.client_base import TestcaseBase @@ -82,7 +84,7 @@ class TestQueryBase(TestcaseBase): check_items={exp_res: res[:1]}) @pytest.mark.tags(CaseLabel.L1) - def test_query_auto_id_not_existed_primary_key(self): + def test_query_auto_id_not_existed_primary_values(self): """ target: test query on auto_id true collection method: 1.create auto_id true collection 2.query with not existed primary keys @@ -111,7 +113,7 @@ class TestQueryBase(TestcaseBase): collection_w.query(None, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - def test_query_expr_non_string(self): + def test_query_non_string_expr(self): """ target: test query with non-string expr method: query with non-string expr, eg 1, [] .. @@ -161,34 +163,87 @@ class TestQueryBase(TestcaseBase): collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - def test_query_expr_unsupported_field(self): + def test_query_expr_non_primary_fields(self): """ - target: test query on unsupported field - method: query on float field - expected: raise exception + target: test query on non-primary non-vector fields + method: query on non-primary non-vector fields + expected: verify query result """ - collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) - term_expr = f'{ct.default_float_field_name} in [1., 2.]' - error = {ct.err_code: 1, ct.err_msg: "column is not int64"} - collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) + self._connect() + # construct dataframe and inert data + df = pd.DataFrame({ + ct.default_int64_field_name: pd.Series(data=[i for i in range(ct.default_nb)]), + ct.default_int32_field_name: pd.Series(data=[np.int32(i) for i in range(ct.default_nb)], dtype="int32"), + ct.default_int16_field_name: pd.Series(data=[np.int16(i) for i in range(ct.default_nb)], dtype="int16"), + ct.default_float_field_name: pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"), + ct.default_double_field_name: pd.Series(data=[np.double(i) for i in range(ct.default_nb)], dtype="double"), + ct.default_float_vec_field_name: cf.gen_vectors(ct.default_nb, ct.default_dim) + }) + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == ct.default_nb + self.collection_wrap.load() - @pytest.mark.tags(CaseLabel.L1) - def test_query_expr_non_primary_field(self): + # query by non_primary non_vector scalar field + non_primary_field = [ct.default_int32_field_name, ct.default_int16_field_name, + ct.default_float_field_name, ct.default_double_field_name] + + # exp res: first two rows and all fields expect last vec field + res = df.iloc[:2, :-1].to_dict('records') + for field in non_primary_field: + filter_values = df[field].tolist()[:2] + term_expr = f'{field} in {filter_values}' + self.collection_wrap.query(term_expr, output_fields=["*"], + check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue #7521 #7522") + def test_query_expr_by_bool_field(self): """ - target: test query on non-primary field - method: query on non-primary int field - expected: raise exception + target: test query by bool field and output binary field + method: 1.create and insert with [int64, float, bool, float_vec] fields + 2.query by bool field, and output all int64, bool fields + expected: verify query result and output fields """ - fields = [cf.gen_int64_field(), cf.gen_int64_field(name='int2', is_primary=True), cf.gen_float_vec_field()] - schema = cf.gen_collection_schema(fields) - collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) - nb = 100 - data = [[i for i in range(nb)], [i for i in range(nb)], cf.gen_vectors(nb, ct.default_dim)] - collection_w.insert(data) - assert collection_w.num_entities == nb - assert collection_w.primary_field.name == 'int2' - error = {ct.err_code: 1, ct.err_msg: "column is not primary key"} - collection_w.query(default_term_expr, check_task=CheckTasks.err_res, check_items=error) + self._connect() + df = cf.gen_default_dataframe_data() + bool_values = pd.Series(data=[True if i % 2 == 0 else False for i in range(ct.default_nb)], dtype="bool") + df.insert(2, ct.default_bool_field_name, bool_values) + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == ct.default_nb + self.collection_wrap.load() + term_expr = f'{ct.default_bool_field_name} in [True]' + res, _ = self.collection_wrap.query(term_expr, output_fields=[ct.default_bool_field_name]) + assert len(res) == ct.default_nb / 2 + assert set(res[0].keys()) == set(ct.default_int64_field_name, ct.default_bool_field_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_query_expr_by_int8_field(self): + """ + target: test query by int8 field + method: 1.create and insert with [int64, float, int8, float_vec] fields + 2.query by int8 field, and output all scalar fields + expected: verify query result + """ + self._connect() + # construct collection from dataFrame according to [int64, float, int8, float_vec] + df = cf.gen_default_dataframe_data() + int8_values = pd.Series(data=[np.int8(i) for i in range(ct.default_nb)], dtype="int8") + df.insert(2, ct.default_int8_field_name, int8_values) + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == ct.default_nb + # query expression + term_expr = f'{ct.default_int8_field_name} in {[0]}' + # expected query result + res = [] + # int8 range [-128, 127] so when nb=1200, there are many repeated int8 values equal to 0 + for i in range(0, ct.default_nb, 256): + res.extend(df.iloc[i:i + 1, :-1].to_dict('records')) + self.collection_wrap.load() + self.collection_wrap.query(term_expr, output_fields=["*"], + check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_wrong_term_keyword(self): @@ -202,15 +257,84 @@ class TestQueryBase(TestcaseBase): error_1 = {ct.err_code: 1, ct.err_msg: f'unexpected token Identifier("inn")'} collection_w.query(expr_1, check_task=CheckTasks.err_res, check_items=error_1) - # TODO(yukun): "not in" is supported now - # expr_2 = f'{ct.default_int64_field_name} not in [1, 2]' - # error_2 = {ct.err_code: 1, ct.err_msg: 'not top level term'} - # collection_w.query(expr_2, check_task=CheckTasks.err_res, check_items=error_2) - expr_3 = f'{ct.default_int64_field_name} in not [1, 2]' error_3 = {ct.err_code: 1, ct.err_msg: 'right operand of the InExpr must be array'} collection_w.query(expr_3, check_task=CheckTasks.err_res, check_items=error_3) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("field", [ct.default_int64_field_name, ct.default_float_field_name]) + def test_query_expr_not_in_term(self, field): + """ + target: test query with `not in` expr + method: query with not in expr + expected: verify query result + """ + self._connect() + df = cf.gen_default_dataframe_data() + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == ct.default_nb + self.collection_wrap.load() + values = df[field].tolist() + pos = 100 + term_expr = f'{field} not in {values[pos:]}' + res = df.iloc[:pos, :2].to_dict('records') + self.collection_wrap.query(term_expr, output_fields=["*"], + check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("pos", [0, ct.default_nb]) + def test_query_expr_not_in_empty_and_all(self, pos): + self._connect() + df = cf.gen_default_dataframe_data() + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == ct.default_nb + self.collection_wrap.load() + int64_values = df[ct.default_int64_field_name].tolist() + term_expr = f'{ct.default_int64_field_name} not in {int64_values[pos:]}' + res = df.iloc[:pos, :1].to_dict('records') + self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + + @pytest.mark.tag(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #7544") + def test_query_expr_random_values(self): + """ + target: test query with random filter values + method: query with random filter values, like [0, 2, 4, 3] + expected: correct query result + """ + self._connect() + df = cf.gen_default_dataframe_data(nb=100) + log.debug(df.head(5)) + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == 100 + self.collection_wrap.load() + + # random_values = [random.randint(0, ct.default_nb) for _ in range(4)] + random_values = [0, 2, 4, 0] + term_expr = f'{ct.default_int64_field_name} not in {random_values}' + res = df.iloc[random_values, :1].to_dict('records') + self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + + @pytest.mark.xfail(reason="issue #7553") + def test_query_expr_not_in_random(self): + self._connect() + df = cf.gen_default_dataframe_data(nb=50) + log.debug(df.head(5)) + self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, + primary_field=ct.default_int64_field_name) + assert self.collection_wrap.num_entities == 50 + self.collection_wrap.load() + + random_values = [i for i in range(10, 50)] + log.debug(f'random values: {random_values}') + random.shuffle(random_values) + term_expr = f'{ct.default_int64_field_name} not in {random_values}' + res = df.iloc[:10, :1].to_dict('records') + self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + @pytest.mark.tags(CaseLabel.L1) def test_query_expr_non_array_term(self): """ @@ -640,7 +764,7 @@ class TestQueryOperation(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) # @pytest.mark.parametrize("collection_name, data", - # [(cf.gen_unique_str(prefix), cf.gen_default_list_data(ct.default_nb))]) + # [(cf.gen_unique_str(prefix), cf.gen_default_list_data(ct.default_nb))]) def test_query_without_loading(self): """ target: test query without loading @@ -730,13 +854,12 @@ class TestQueryOperation(TestcaseBase): res, _ = collection_w.query(term_expr) assert len(res) == len(int_values) - @pytest.mark.xfail(reason="fail") @pytest.mark.tags(CaseLabel.L2) def test_query_expr_repeated_term_array(self): """ target: test query with repeated term array on primary field with unique value method: query with repeated array value - expected: todo + expected: return hit entities, no repeated """ collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)[0:3] int_values = [0, 0, 0, 0] @@ -746,7 +869,6 @@ class TestQueryOperation(TestcaseBase): assert res[0][ct.default_int64_field_name] == int_values[0] @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="issue #6624") def test_query_dup_ids_dup_term_array(self): """ target: test query on duplicate primary keys with dup term array @@ -755,14 +877,15 @@ class TestQueryOperation(TestcaseBase): expected: todo """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) - df = cf.gen_default_dataframe_data(nb=ct.default_nb) + df = cf.gen_default_dataframe_data(nb=100) df[ct.default_int64_field_name] = 0 mutation_res, _ = collection_w.insert(df) assert mutation_res.primary_keys == df[ct.default_int64_field_name].tolist() collection_w.load() term_expr = f'{ct.default_int64_field_name} in {[0, 0, 0]}' - res, _ = collection_w.query(term_expr) - log.debug(res) + res = df.iloc[:, :2].to_dict('records') + collection_w.query(term_expr, output_fields=["*"], check_items=CheckTasks.check_query_results, + check_task={exp_res: res}) @pytest.mark.tags(CaseLabel.L0) def test_query_after_index(self):