From 416bfeafc9722c2d3f05bc9c9d27b870e0eba589 Mon Sep 17 00:00:00 2001
From: ThreadDao <yufen.zong@zilliz.com>
Date: Wed, 8 Sep 2021 12:10:00 +0800
Subject: [PATCH] Add cases for term not in and query non-primary field (#7554)

Signed-off-by: ThreadDao <yufen.zong@zilliz.com>
---
 ...d_podkill.yaml => chaos_etcd_podkill.yaml} |   0
 ...podkill.yaml => chaos_pulsar_podkill.yaml} |   0
 .../chaos/chaos_objects/testcases.yaml        |  26 ++-
 tests/python_client/common/common_func.py     |  11 +-
 tests/python_client/common/common_type.py     |   1 +
 .../testcases/test_collection.py              |  64 ++++--
 .../python_client/testcases/test_insert_20.py |  16 ++
 .../python_client/testcases/test_query_20.py  | 197 ++++++++++++++----
 8 files changed, 254 insertions(+), 61 deletions(-)
 rename tests/python_client/chaos/chaos_objects/{skip_chaos_etcd_podkill.yaml => chaos_etcd_podkill.yaml} (100%)
 rename tests/python_client/chaos/chaos_objects/{skip_chaos_pulsar_podkill.yaml => chaos_pulsar_podkill.yaml} (100%)

diff --git a/tests/python_client/chaos/chaos_objects/skip_chaos_etcd_podkill.yaml b/tests/python_client/chaos/chaos_objects/chaos_etcd_podkill.yaml
similarity index 100%
rename from tests/python_client/chaos/chaos_objects/skip_chaos_etcd_podkill.yaml
rename to tests/python_client/chaos/chaos_objects/chaos_etcd_podkill.yaml
diff --git a/tests/python_client/chaos/chaos_objects/skip_chaos_pulsar_podkill.yaml b/tests/python_client/chaos/chaos_objects/chaos_pulsar_podkill.yaml
similarity index 100%
rename from tests/python_client/chaos/chaos_objects/skip_chaos_pulsar_podkill.yaml
rename to tests/python_client/chaos/chaos_objects/chaos_pulsar_podkill.yaml
diff --git a/tests/python_client/chaos/chaos_objects/testcases.yaml b/tests/python_client/chaos/chaos_objects/testcases.yaml
index cb4bdf742d..ebedfd442d 100644
--- a/tests/python_client/chaos/chaos_objects/testcases.yaml
+++ b/tests/python_client/chaos/chaos_objects/testcases.yaml
@@ -116,14 +116,38 @@ Collections:
     testcase:
       name: test_etcd_podkill
       chaos: chaos_etcd_podkill.yaml
+      expectation:
+        cluster_1_node:
+          create: fail
+          insert: fail
+          flush: fail
+          index: fail
+          search: fail
+          query: fail
   -
     testcase:
       name: test_minio_podkill
       chaos: chaos_minio_podkill.yaml
+      expectation:
+        cluster_1_node:
+          create: fail
+          insert: fail
+          flush: fail
+          index: fail
+          search: fail
+          query: fail
   -
     testcase:
       name: test_pulsar_podkill
-      chaos: chaos_minio_podkill.yaml
+      chaos: chaos_pulsar_podkill.yaml
+      expectation:
+        cluster_1_node:
+          create: fail
+          insert: fail
+          flush: fail
+          index: fail
+          search: fail
+          query: fail
   -
     testcase:
       name: test_querynode_cpu100p
diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py
index ae80bfdbaf..fe78c4150f 100644
--- a/tests/python_client/common/common_func.py
+++ b/tests/python_client/common/common_func.py
@@ -14,7 +14,9 @@ import threading
 import traceback
 
 """" Methods of processing data """
-#l2 = lambda x, y: np.linalg.norm(np.array(x) - np.array(y))
+
+
+# l2 = lambda x, y: np.linalg.norm(np.array(x) - np.array(y))
 
 
 def gen_unique_str(str_value=None):
@@ -187,6 +189,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
     bool_values = pd.Series(data=[np.bool(i) for i in range(start, start + nb)], dtype="bool")
     float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
     double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
+    # string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
     float_vec_values = gen_vectors(nb, dim)
     df = pd.DataFrame({
         ct.default_int64_field_name: int64_values,
@@ -195,6 +198,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
         ct.default_int8_field_name: int8_values,
         ct.default_bool_field_name: bool_values,
         ct.default_float_field_name: float_values,
+        # ct.default_string_field_name: string_values,
         ct.default_double_field_name: double_values,
         ct.default_float_vec_field_name: float_vec_values
     })
@@ -297,6 +301,7 @@ def gen_invaild_search_params_type():
                 search_params.append(annoy_search_param)
     return search_params
 
+
 def gen_search_param(index_type, metric_type="L2"):
     search_params = []
     if index_type in ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_SQ8H", "IVF_PQ"] \
@@ -321,6 +326,7 @@ def gen_search_param(index_type, metric_type="L2"):
         raise Exception("Invalid index_type.")
     return search_params
 
+
 def gen_all_type_fields():
     fields = []
     for k, v in DataType.__members__.items():
@@ -385,11 +391,13 @@ def tanimoto(x, y):
     y = np.asarray(y, np.bool)
     return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
 
+
 def tanimoto_calc(x, y):
     x = np.asarray(x, np.bool)
     y = np.asarray(y, np.bool)
     return np.double((len(x) - np.bitwise_xor(x, y).sum())) / (len(y) + np.bitwise_xor(x, y).sum())
 
+
 def substructure(x, y):
     x = np.asarray(x, np.bool)
     y = np.asarray(y, np.bool)
@@ -401,6 +409,7 @@ def superstructure(x, y):
     y = np.asarray(y, np.bool)
     return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
 
+
 def compare_distance_2d_vector(x, y, distance, metric, sqrt):
     for i in range(len(x)):
         for j in range(len(y)):
diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py
index 2aad841e64..ed6077c8a9 100644
--- a/tests/python_client/common/common_type.py
+++ b/tests/python_client/common/common_type.py
@@ -26,6 +26,7 @@ default_int32_field_name = "int32"
 default_int64_field_name = "int64"
 default_float_field_name = "float"
 default_double_field_name = "double"
+default_string_field_name = "string"
 default_float_vec_field_name = "float_vector"
 another_float_vec_field_name = "float_vector1"
 default_binary_vec_field_name = "binary_vector"
diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py
index 385f3cb99d..13fca47c50 100644
--- a/tests/python_client/testcases/test_collection.py
+++ b/tests/python_client/testcases/test_collection.py
@@ -11,7 +11,6 @@ from common.common_type import CaseLabel, CheckTasks
 from utils.utils import *
 from common.constants import *
 
-
 prefix = "collection"
 exp_name = "name"
 exp_schema = "schema"
@@ -39,7 +38,6 @@ default_single_query = {
 }
 
 
-
 class TestCollectionParams(TestcaseBase):
     """ Test case of collection interface """
 
@@ -601,8 +599,9 @@ class TestCollectionParams(TestcaseBase):
         int_field_one = cf.gen_int64_field(is_primary=True)
         int_field_two = cf.gen_int64_field(name="int2", is_primary=True)
         error = {ct.err_code: 0, ct.err_msg: "Primary key field can only be one."}
-        self.collection_schema_wrap.init_collection_schema(fields=[int_field_one, int_field_two, cf.gen_float_vec_field()],
-                                                           check_task=CheckTasks.err_res, check_items=error)
+        self.collection_schema_wrap.init_collection_schema(
+            fields=[int_field_one, int_field_two, cf.gen_float_vec_field()],
+            check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L1)
     def test_collection_primary_inconsistent(self):
@@ -821,7 +820,8 @@ class TestCollectionParams(TestcaseBase):
         int_field, _ = self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=DataType.INT64,
                                                                 dim=ct.default_dim)
         float_vec_field = cf.gen_float_vec_field()
-        schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field=ct.default_int64_field_name)
+        schema = cf.gen_collection_schema(fields=[int_field, float_vec_field],
+                                          primary_field=ct.default_int64_field_name)
         self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property,
                                              check_items={exp_name: c_name, exp_schema: schema})
 
@@ -962,6 +962,26 @@ class TestCollectionOperation(TestcaseBase):
                                   check_items={exp_name: c_name, exp_schema: default_schema})
         assert self.utility_wrap.has_collection(c_name)[0]
 
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_collection_all_datatype_fields(self):
+        """
+        target: test create collection with all dataType fields
+        method: create collection with all dataType schema
+        expected: create successfully
+        """
+        self._connect()
+        fields = []
+        for k, v in DataType.__members__.items():
+            if v and v != DataType.UNKNOWN and v != DataType.FLOAT_VECTOR and v != DataType.BINARY_VECTOR:
+                field, _ = self.field_schema_wrap.init_field_schema(name=k.lower(), dtype=v)
+                fields.append(field)
+        fields.append(cf.gen_float_vec_field())
+        schema, _ = self.collection_schema_wrap.init_collection_schema(fields,
+                                                                       primary_field=ct.default_int64_field_name)
+        c_name = cf.gen_unique_str(prefix)
+        self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property,
+                                             check_items={exp_name: c_name, exp_schema: schema})
+
 
 class TestCollectionDataframe(TestcaseBase):
     """
@@ -1519,7 +1539,7 @@ class TestCollectionMultiCollections:
             stats = connect.get_collection_stats(collection_list[i])
             assert stats[row_count] == default_nb
             connect.drop_collection(collection_list[i])
-            
+
 
 class TestGetCollectionStats:
     """
@@ -1788,7 +1808,7 @@ class TestGetCollectionStats:
         connect.insert(collection, entities, partition_name=default_tag)
         connect.flush([collection])
         stats = connect.get_collection_stats(collection)
-        assert stats[row_count] == insert_count*2
+        assert stats[row_count] == insert_count * 2
 
     @pytest.mark.tags(CaseLabel.L2)
     def test_get_collection_stats_partitions_D(self, connect, collection, insert_count):
@@ -1806,7 +1826,7 @@ class TestGetCollectionStats:
         connect.insert(collection, entities, partition_name=new_tag)
         connect.flush([collection])
         stats = connect.get_collection_stats(collection)
-        assert stats[row_count] == insert_count*2
+        assert stats[row_count] == insert_count * 2
 
     # TODO: assert metric type in stats response
     @pytest.mark.tags(CaseLabel.L0)
@@ -1902,7 +1922,7 @@ class TestGetCollectionStats:
                 assert index == index_2
                 # break
             connect.drop_collection(collection_list[i])
-            
+
 
 class TestCreateCollection:
     """
@@ -2090,7 +2110,7 @@ class TestCreateCollectionInvalid(object):
             assert code == 1
             message = getattr(e, 'message', "The exception does not contain the field of message.")
             assert message == "maximum field's number should be limited to 64"
-            
+
 
 class TestDescribeCollection:
 
@@ -2124,6 +2144,7 @@ class TestDescribeCollection:
       The following cases are used to test `describe_collection` function, no data in collection
     ******************************************************************
     """
+
     @pytest.mark.tags(CaseLabel.L0)
     def test_collection_fields(self, connect, get_filter_field, get_vector_field):
         '''
@@ -2216,6 +2237,7 @@ class TestDescribeCollection:
       The following cases are used to test `describe_collection` function, and insert data in collection
     ******************************************************************
     """
+
     @pytest.mark.tags(CaseLabel.L0)
     def test_describe_collection_fields_after_insert(self, connect, get_filter_field, get_vector_field):
         '''
@@ -2243,12 +2265,13 @@ class TestDescribeCollection:
             elif field["type"] == vector_field:
                 assert field["name"] == vector_field["name"]
                 assert field["params"] == vector_field["params"]
-                
+
 
 class TestDescribeCollectionInvalid(object):
     """
     Test describe collection with invalid params
     """
+
     @pytest.fixture(
         scope="function",
         params=gen_invalid_strs()
@@ -2367,8 +2390,8 @@ class TestDropCollectionInvalid(object):
     def test_drop_collection_with_empty_or_None_collection_name(self, connect, collection_name):
         with pytest.raises(Exception) as e:
             connect.has_collection(collection_name)
-            
-            
+
+
 class TestHasCollection:
     """
     ******************************************************************
@@ -2415,6 +2438,7 @@ class TestHasCollection:
         def has():
             assert connect.has_collection(collection_name)
             # assert not assert_collection(connect, collection_name)
+
         for i in range(threads_num):
             t = MyThread(target=has, args=())
             threads.append(t)
@@ -2428,6 +2452,7 @@ class TestHasCollectionInvalid(object):
     """
     Test has collection with invalid params
     """
+
     @pytest.fixture(
         scope="function",
         params=gen_invalid_strs()
@@ -2452,7 +2477,7 @@ class TestHasCollectionInvalid(object):
         collection_name = None
         with pytest.raises(Exception) as e:
             connect.has_collection(collection_name)
-            
+
 
 class TestListCollections:
     """
@@ -2785,7 +2810,7 @@ class TestLoadCollection:
         with pytest.raises(Exception):
             connect.search(collection, default_single_query)
         # assert len(res[0]) == 0
-        
+
 
 class TestReleaseAdvanced:
 
@@ -2917,7 +2942,7 @@ class TestReleaseAdvanced:
         expected:
         """
         pass
-    
+
 
 class TestLoadCollectionInvalid(object):
     """
@@ -2942,7 +2967,7 @@ class TestLoadCollectionInvalid(object):
         collection_name = get_collection_name
         with pytest.raises(Exception) as e:
             connect.release_collection(collection_name)
-            
+
 
 class TestLoadPartition:
     """
@@ -3140,8 +3165,3 @@ class TestLoadPartitionInvalid(object):
         partition_name = get_partition_name
         with pytest.raises(Exception) as e:
             connect.load_partitions(collection, [partition_name])
-
-
-
-
-
diff --git a/tests/python_client/testcases/test_insert_20.py b/tests/python_client/testcases/test_insert_20.py
index f5e9703a94..c845827967 100644
--- a/tests/python_client/testcases/test_insert_20.py
+++ b/tests/python_client/testcases/test_insert_20.py
@@ -691,6 +691,22 @@ class TestInsertOperation(TestcaseBase):
 
         assert collection_w.num_entities == ct.default_nb
 
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.xfail(reason="issue #7513")
+    def test_insert_all_datatype_collection(self):
+        """
+        target: test insert into collection that contains all datatype fields
+        method: 1.create all datatype collection 2.insert data
+        expected: verify num entities
+        """
+        self._connect()
+        # need to add string field
+        df = cf.gen_dataframe_all_data_type()
+        log.debug(df.head(3))
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == ct.default_nb
+
 
 class TestInsertAsync(TestcaseBase):
     """
diff --git a/tests/python_client/testcases/test_query_20.py b/tests/python_client/testcases/test_query_20.py
index a0fafd1254..49e0e57cba 100644
--- a/tests/python_client/testcases/test_query_20.py
+++ b/tests/python_client/testcases/test_query_20.py
@@ -1,5 +1,7 @@
 import pytest
 import random
+import numpy as np
+import pandas as pd
 from pymilvus import DefaultConfig
 
 from base.client_base import TestcaseBase
@@ -82,7 +84,7 @@ class TestQueryBase(TestcaseBase):
                                    check_items={exp_res: res[:1]})
 
     @pytest.mark.tags(CaseLabel.L1)
-    def test_query_auto_id_not_existed_primary_key(self):
+    def test_query_auto_id_not_existed_primary_values(self):
         """
         target: test query on auto_id true collection
         method: 1.create auto_id true collection 2.query with not existed primary keys
@@ -111,7 +113,7 @@ class TestQueryBase(TestcaseBase):
         collection_w.query(None, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L1)
-    def test_query_expr_non_string(self):
+    def test_query_non_string_expr(self):
         """
         target: test query with non-string expr
         method: query with non-string expr, eg 1, [] ..
@@ -161,34 +163,87 @@ class TestQueryBase(TestcaseBase):
         collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L1)
-    def test_query_expr_unsupported_field(self):
+    def test_query_expr_non_primary_fields(self):
         """
-        target: test query on unsupported field
-        method: query on float field
-        expected: raise exception
+        target: test query on non-primary non-vector fields
+        method: query on non-primary non-vector fields
+        expected: verify query result
         """
-        collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix))
-        term_expr = f'{ct.default_float_field_name} in [1., 2.]'
-        error = {ct.err_code: 1, ct.err_msg: "column is not int64"}
-        collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
+        self._connect()
+        # construct dataframe and inert data
+        df = pd.DataFrame({
+            ct.default_int64_field_name: pd.Series(data=[i for i in range(ct.default_nb)]),
+            ct.default_int32_field_name: pd.Series(data=[np.int32(i) for i in range(ct.default_nb)], dtype="int32"),
+            ct.default_int16_field_name: pd.Series(data=[np.int16(i) for i in range(ct.default_nb)], dtype="int16"),
+            ct.default_float_field_name: pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
+            ct.default_double_field_name: pd.Series(data=[np.double(i) for i in range(ct.default_nb)], dtype="double"),
+            ct.default_float_vec_field_name: cf.gen_vectors(ct.default_nb, ct.default_dim)
+        })
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == ct.default_nb
+        self.collection_wrap.load()
 
-    @pytest.mark.tags(CaseLabel.L1)
-    def test_query_expr_non_primary_field(self):
+        # query by non_primary non_vector scalar field
+        non_primary_field = [ct.default_int32_field_name, ct.default_int16_field_name,
+                             ct.default_float_field_name, ct.default_double_field_name]
+
+        # exp res: first two rows and all fields expect last vec field
+        res = df.iloc[:2, :-1].to_dict('records')
+        for field in non_primary_field:
+            filter_values = df[field].tolist()[:2]
+            term_expr = f'{field} in {filter_values}'
+            self.collection_wrap.query(term_expr, output_fields=["*"],
+                                       check_task=CheckTasks.check_query_results, check_items={exp_res: res})
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.xfail(reason="issue #7521 #7522")
+    def test_query_expr_by_bool_field(self):
         """
-        target: test query on non-primary field
-        method: query on non-primary int field
-        expected: raise exception
+        target: test query by bool field and output binary field
+        method: 1.create and insert with [int64, float, bool, float_vec] fields
+                2.query by bool field, and output all int64, bool fields
+        expected: verify query result and output fields
         """
-        fields = [cf.gen_int64_field(), cf.gen_int64_field(name='int2', is_primary=True), cf.gen_float_vec_field()]
-        schema = cf.gen_collection_schema(fields)
-        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema)
-        nb = 100
-        data = [[i for i in range(nb)], [i for i in range(nb)], cf.gen_vectors(nb, ct.default_dim)]
-        collection_w.insert(data)
-        assert collection_w.num_entities == nb
-        assert collection_w.primary_field.name == 'int2'
-        error = {ct.err_code: 1, ct.err_msg: "column is not primary key"}
-        collection_w.query(default_term_expr, check_task=CheckTasks.err_res, check_items=error)
+        self._connect()
+        df = cf.gen_default_dataframe_data()
+        bool_values = pd.Series(data=[True if i % 2 == 0 else False for i in range(ct.default_nb)], dtype="bool")
+        df.insert(2, ct.default_bool_field_name, bool_values)
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == ct.default_nb
+        self.collection_wrap.load()
+        term_expr = f'{ct.default_bool_field_name} in [True]'
+        res, _ = self.collection_wrap.query(term_expr, output_fields=[ct.default_bool_field_name])
+        assert len(res) == ct.default_nb / 2
+        assert set(res[0].keys()) == set(ct.default_int64_field_name, ct.default_bool_field_name)
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_query_expr_by_int8_field(self):
+        """
+        target: test query by int8 field
+        method: 1.create and insert with [int64, float, int8, float_vec] fields
+                2.query by int8 field, and output all scalar fields
+        expected: verify query result
+        """
+        self._connect()
+        # construct collection from dataFrame according to [int64, float, int8, float_vec]
+        df = cf.gen_default_dataframe_data()
+        int8_values = pd.Series(data=[np.int8(i) for i in range(ct.default_nb)], dtype="int8")
+        df.insert(2, ct.default_int8_field_name, int8_values)
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == ct.default_nb
+        # query expression
+        term_expr = f'{ct.default_int8_field_name} in {[0]}'
+        # expected query result
+        res = []
+        # int8 range [-128, 127] so when nb=1200, there are many repeated int8 values equal to 0
+        for i in range(0, ct.default_nb, 256):
+            res.extend(df.iloc[i:i + 1, :-1].to_dict('records'))
+        self.collection_wrap.load()
+        self.collection_wrap.query(term_expr, output_fields=["*"],
+                                   check_task=CheckTasks.check_query_results, check_items={exp_res: res})
 
     @pytest.mark.tags(CaseLabel.L1)
     def test_query_expr_wrong_term_keyword(self):
@@ -202,15 +257,84 @@ class TestQueryBase(TestcaseBase):
         error_1 = {ct.err_code: 1, ct.err_msg: f'unexpected token Identifier("inn")'}
         collection_w.query(expr_1, check_task=CheckTasks.err_res, check_items=error_1)
 
-        # TODO(yukun): "not in" is supported now
-        # expr_2 = f'{ct.default_int64_field_name} not in [1, 2]'
-        # error_2 = {ct.err_code: 1, ct.err_msg: 'not top level term'}
-        # collection_w.query(expr_2, check_task=CheckTasks.err_res, check_items=error_2)
-
         expr_3 = f'{ct.default_int64_field_name} in not [1, 2]'
         error_3 = {ct.err_code: 1, ct.err_msg: 'right operand of the InExpr must be array'}
         collection_w.query(expr_3, check_task=CheckTasks.err_res, check_items=error_3)
 
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("field", [ct.default_int64_field_name, ct.default_float_field_name])
+    def test_query_expr_not_in_term(self, field):
+        """
+        target: test query with `not in` expr
+        method: query with not in expr
+        expected: verify query result
+        """
+        self._connect()
+        df = cf.gen_default_dataframe_data()
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == ct.default_nb
+        self.collection_wrap.load()
+        values = df[field].tolist()
+        pos = 100
+        term_expr = f'{field} not in {values[pos:]}'
+        res = df.iloc[:pos, :2].to_dict('records')
+        self.collection_wrap.query(term_expr, output_fields=["*"],
+                                   check_task=CheckTasks.check_query_results, check_items={exp_res: res})
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("pos", [0, ct.default_nb])
+    def test_query_expr_not_in_empty_and_all(self, pos):
+        self._connect()
+        df = cf.gen_default_dataframe_data()
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == ct.default_nb
+        self.collection_wrap.load()
+        int64_values = df[ct.default_int64_field_name].tolist()
+        term_expr = f'{ct.default_int64_field_name} not in {int64_values[pos:]}'
+        res = df.iloc[:pos, :1].to_dict('records')
+        self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
+
+    @pytest.mark.tag(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #7544")
+    def test_query_expr_random_values(self):
+        """
+        target: test query with random filter values
+        method: query with random filter values, like [0, 2, 4, 3]
+        expected: correct query result
+        """
+        self._connect()
+        df = cf.gen_default_dataframe_data(nb=100)
+        log.debug(df.head(5))
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == 100
+        self.collection_wrap.load()
+
+        # random_values = [random.randint(0, ct.default_nb) for _ in range(4)]
+        random_values = [0, 2, 4, 0]
+        term_expr = f'{ct.default_int64_field_name} not in {random_values}'
+        res = df.iloc[random_values, :1].to_dict('records')
+        self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
+
+    @pytest.mark.xfail(reason="issue #7553")
+    def test_query_expr_not_in_random(self):
+        self._connect()
+        df = cf.gen_default_dataframe_data(nb=50)
+        log.debug(df.head(5))
+        self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
+                                                      primary_field=ct.default_int64_field_name)
+        assert self.collection_wrap.num_entities == 50
+        self.collection_wrap.load()
+
+        random_values = [i for i in range(10, 50)]
+        log.debug(f'random values: {random_values}')
+        random.shuffle(random_values)
+        term_expr = f'{ct.default_int64_field_name} not in {random_values}'
+        res = df.iloc[:10, :1].to_dict('records')
+        self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
+
     @pytest.mark.tags(CaseLabel.L1)
     def test_query_expr_non_array_term(self):
         """
@@ -640,7 +764,7 @@ class TestQueryOperation(TestcaseBase):
 
     @pytest.mark.tags(CaseLabel.L1)
     # @pytest.mark.parametrize("collection_name, data",
-                             # [(cf.gen_unique_str(prefix), cf.gen_default_list_data(ct.default_nb))])
+    # [(cf.gen_unique_str(prefix), cf.gen_default_list_data(ct.default_nb))])
     def test_query_without_loading(self):
         """
         target: test query without loading
@@ -730,13 +854,12 @@ class TestQueryOperation(TestcaseBase):
         res, _ = collection_w.query(term_expr)
         assert len(res) == len(int_values)
 
-    @pytest.mark.xfail(reason="fail")
     @pytest.mark.tags(CaseLabel.L2)
     def test_query_expr_repeated_term_array(self):
         """
         target: test query with repeated term array on primary field with unique value
         method: query with repeated array value
-        expected: todo
+        expected: return hit entities, no repeated
         """
         collection_w, vectors, binary_raw_vectors = self.init_collection_general(prefix, insert_data=True)[0:3]
         int_values = [0, 0, 0, 0]
@@ -746,7 +869,6 @@ class TestQueryOperation(TestcaseBase):
         assert res[0][ct.default_int64_field_name] == int_values[0]
 
     @pytest.mark.tags(CaseLabel.L1)
-    @pytest.mark.xfail(reason="issue #6624")
     def test_query_dup_ids_dup_term_array(self):
         """
         target: test query on duplicate primary keys with dup term array
@@ -755,14 +877,15 @@ class TestQueryOperation(TestcaseBase):
         expected: todo
         """
         collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
-        df = cf.gen_default_dataframe_data(nb=ct.default_nb)
+        df = cf.gen_default_dataframe_data(nb=100)
         df[ct.default_int64_field_name] = 0
         mutation_res, _ = collection_w.insert(df)
         assert mutation_res.primary_keys == df[ct.default_int64_field_name].tolist()
         collection_w.load()
         term_expr = f'{ct.default_int64_field_name} in {[0, 0, 0]}'
-        res, _ = collection_w.query(term_expr)
-        log.debug(res)
+        res = df.iloc[:, :2].to_dict('records')
+        collection_w.query(term_expr, output_fields=["*"], check_items=CheckTasks.check_query_results,
+                           check_task={exp_res: res})
 
     @pytest.mark.tags(CaseLabel.L0)
     def test_query_after_index(self):