From c9997a27038751b2e001399839361cf08d47d93a Mon Sep 17 00:00:00 2001 From: binbin <83755740+binbinlv@users.noreply.github.com> Date: Tue, 6 Jun 2023 12:02:34 +0800 Subject: [PATCH] Add json and dynamic support cases (#24525) Signed-off-by: Binbin Lv --- tests/python_client/base/client_base.py | 16 +- tests/python_client/check/func_check.py | 3 +- tests/python_client/common/common_func.py | 145 +++- tests/python_client/common/common_type.py | 2 + .../testcases/test_collection.py | 111 +++- tests/python_client/testcases/test_delete.py | 18 +- tests/python_client/testcases/test_index.py | 14 + tests/python_client/testcases/test_insert.py | 4 +- tests/python_client/testcases/test_query.py | 67 +- tests/python_client/testcases/test_search.py | 617 +++++++++++++----- 10 files changed, 770 insertions(+), 227 deletions(-) diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index fdf5d7b38f..b460c58ca0 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -169,7 +169,7 @@ class TestcaseBase(Base): if is_all_data_type: default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim, primary_field=primary_field) - log.info("init_collection_general: collection creation") + log.info("insert_data_general: collection creation") collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs) pre_entities = collection_w.num_entities if insert_data: @@ -184,7 +184,8 @@ class TestcaseBase(Base): def init_collection_general(self, prefix="test", insert_data=False, nb=ct.default_nb, partition_num=0, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, is_index=True, - primary_field=ct.default_int64_field_name, is_flush=True, name=None, **kwargs): + primary_field=ct.default_int64_field_name, is_flush=True, name=None, + enable_dynamic_field=False, with_json=True, **kwargs): """ target: create specified collections method: 1. create collections (binary/non-binary, default/all data type, auto_id or not) @@ -204,13 +205,17 @@ class TestcaseBase(Base): insert_ids = [] time_stamp = 0 # 1 create collection - default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field) + default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field, + enable_dynamic_field=enable_dynamic_field, + with_json=with_json) if is_binary: default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field) if is_all_data_type: default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim, - primary_field=primary_field) + primary_field=primary_field, + enable_dynamic_field=enable_dynamic_field, + with_json=with_json) log.info("init_collection_general: collection creation") collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs) # 2 add extra partitions if specified (default is 1 partition named "_default") @@ -219,7 +224,8 @@ class TestcaseBase(Base): # 3 insert data if specified if insert_data: collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \ - cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim) + cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, + dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json) if is_flush: assert collection_w.is_empty is False assert collection_w.num_entities == nb diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index ea2c571eab..c09ef23b55 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -296,7 +296,8 @@ class ResponseChecker: primary_field = check_items.get("primary_field", None) if exp_res is not None: if isinstance(query_res, list): - assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=primary_field, with_vec=with_vec) + assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=primary_field, + with_vec=with_vec) return True else: log.error(f"Query result {query_res} is not list") diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 613202193f..285c175263 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -81,6 +81,12 @@ def gen_string_field(name=ct.default_string_field_name, description=ct.default_d return string_field +def gen_json_field(name=ct.default_json_field_name, description=ct.default_desc, is_primary=False, **kwargs): + json_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.JSON, description=description, + is_primary=is_primary, **kwargs) + return json_field + + def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs): int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description, is_primary=is_primary, **kwargs) @@ -134,10 +140,24 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, - auto_id=False, dim=ct.default_dim): - fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)] + auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True): + if enable_dynamic_field: + if primary_field is ct.default_int64_field_name: + fields = [gen_int64_field(), gen_float_vec_field(dim=dim)] + elif primary_field is ct.default_string_field_name: + fields = [gen_string_field(), gen_float_vec_field(dim=dim)] + else: + log.error("Primary key only support int or varchar") + assert False + else: + fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), + gen_float_vec_field(dim=dim)] + if with_json is False: + fields.remove(gen_json_field()) + schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, - primary_field=primary_field, auto_id=auto_id) + primary_field=primary_field, auto_id=auto_id, + enable_dynamic_field=enable_dynamic_field) return schema @@ -154,7 +174,24 @@ def gen_general_collection_schema(description=ct.default_desc, primary_field=ct. def gen_string_pk_default_collection_schema(description=ct.default_desc, primary_field=ct.default_string_field_name, auto_id=False, dim=ct.default_dim): - fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)] + fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), gen_float_vec_field(dim=dim)] + schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, + primary_field=primary_field, auto_id=auto_id) + return schema + + +def gen_json_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, + auto_id=False, dim=ct.default_dim): + fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), gen_float_vec_field(dim=dim)] + schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, + primary_field=primary_field, auto_id=auto_id) + return schema + + +def gen_multiple_json_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, + auto_id=False, dim=ct.default_dim): + fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(name="json1"), + gen_json_field(name="json2"), gen_float_vec_field(dim=dim)] schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, primary_field=primary_field, auto_id=auto_id) return schema @@ -162,11 +199,19 @@ def gen_string_pk_default_collection_schema(description=ct.default_desc, primary def gen_collection_schema_all_datatype(description=ct.default_desc, primary_field=ct.default_int64_field_name, - auto_id=False, dim=ct.default_dim): - fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(), - gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(), gen_float_vec_field(dim=dim)] + auto_id=False, dim=ct.default_dim, + enable_dynamic_field=False, with_json=True): + if enable_dynamic_field: + fields = [gen_int64_field(), gen_float_vec_field(dim=dim)] + else: + fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(), + gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(), + gen_json_field(), gen_float_vec_field(dim=dim)] + if with_json is False: + fields.remove(gen_json_field()) schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, - primary_field=primary_field, auto_id=auto_id) + primary_field=primary_field, auto_id=auto_id, + enable_dynamic_field=enable_dynamic_field) return schema @@ -227,29 +272,55 @@ def gen_binary_vectors(num, dim): return raw_vectors, binary_vectors -def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0): +def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True): int_values = pd.Series(data=[i for i in range(start, start + nb)]) float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32") string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string") + json_values = [{"number": i, "float": i*1.0, "string": str(i), "bool": bool(i), + "list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)] float_vec_values = gen_vectors(nb, dim) df = pd.DataFrame({ ct.default_int64_field_name: int_values, ct.default_float_field_name: float_values, ct.default_string_field_name: string_values, + ct.default_json_field_name: json_values, ct.default_float_vec_field_name: float_vec_values }) + if with_json is False: + df.drop(ct.default_json_field_name, axis=1, inplace=True) + return df +def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True): + array = [] + for i in range(start, start + nb): + dict = {ct.default_int64_field_name: i, + ct.default_float_field_name: i*1.0, + ct.default_string_field_name: str(i), + ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i), + "list": [j for j in range(0, i)]}, + ct.default_float_vec_field_name: gen_vectors(1, dim)[0] + } + if with_json is False: + dict.pop(ct.default_json_field_name, None) + array.append(dict) + + return array + + def gen_default_data_for_upsert(nb=ct.default_nb, dim=ct.default_dim, start=0, size=10000): int_values = pd.Series(data=[i for i in range(start, start + nb)]) float_values = pd.Series(data=[np.float32(i + size) for i in range(start, start + nb)], dtype="float32") string_values = pd.Series(data=[str(i + size) for i in range(start, start + nb)], dtype="string") + json_values = [{"number": i, "string": str(i), "bool": bool(i), + "list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)] float_vec_values = gen_vectors(nb, dim) df = pd.DataFrame({ ct.default_int64_field_name: int_values, ct.default_float_field_name: float_values, ct.default_string_field_name: string_values, + ct.default_json_field_name: json_values, ct.default_float_vec_field_name: float_vec_values }) return df, float_values @@ -304,7 +375,7 @@ def gen_dataframe_multi_string_fields(string_fields, nb=ct.default_nb): return df -def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0): +def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True): int64_values = pd.Series(data=[i for i in range(start, start + nb)]) int32_values = pd.Series(data=[np.int32(i) for i in range(start, start + nb)], dtype="int32") int16_values = pd.Series(data=[np.int16(i) for i in range(start, start + nb)], dtype="int16") @@ -313,6 +384,8 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0): float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32") double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double") string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string") + json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]} + for i in range(start, start + nb)] float_vec_values = gen_vectors(nb, dim) df = pd.DataFrame({ ct.default_int64_field_name: int64_values, @@ -323,11 +396,38 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0): ct.default_float_field_name: float_values, ct.default_double_field_name: double_values, ct.default_string_field_name: string_values, + ct.default_json_field_name: json_values, ct.default_float_vec_field_name: float_vec_values + }) + if with_json is False: + df.drop(ct.default_json_field_name, axis=1, inplace=True) + return df +def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True): + array = [] + for i in range(start, start + nb): + dict = {ct.default_int64_field_name: i, + ct.default_int32_field_name: i, + ct.default_int16_field_name: i, + ct.default_int8_field_name: i, + ct.default_bool_field_name: bool(i), + ct.default_float_field_name: i*1.0, + ct.default_double_field_name: i*1.0, + ct.default_string_field_name: str(i), + ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i), + "list": [j for j in range(0, i)]}, + ct.default_float_vec_field_name: gen_vectors(1, dim)[0] + } + if with_json is False: + dict.pop(ct.default_json_field_name, None) + array.append(dict) + + return array + + def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0): int_values = pd.Series(data=[i for i in range(start, start + nb)]) float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32") @@ -346,8 +446,10 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0): int_values = [i for i in range(start, start + nb)] float_values = [np.float32(i) for i in range(start, start + nb)] string_values = [str(i) for i in range(start, start + nb)] + json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]} + for i in range(start, start + nb)] float_vec_values = gen_vectors(nb, dim) - data = [int_values, float_values, string_values, float_vec_values] + data = [int_values, float_values, string_values, json_values, float_vec_values] return data @@ -421,8 +523,10 @@ def gen_numpy_data(nb=ct.default_nb, dim=ct.default_dim): int_values = np.arange(nb, dtype='int64') float_values = np.arange(nb, dtype='float32') string_values = [np.str_(i) for i in range(nb)] + json_values = [{"number": i, "string": str(i), "bool": bool(i), + "list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(nb)] float_vec_values = gen_vectors(nb, dim) - data = [int_values, float_values, string_values, float_vec_values] + data = [int_values, float_values, string_values, json_values, float_vec_values] return data @@ -768,7 +872,7 @@ def gen_partitions(collection_w, partition_num=1): def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False, - auto_id=False, dim=ct.default_dim, insert_offset=0): + auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True): """ target: insert non-binary/binary data method: insert non-binary/binary data into partitions if any @@ -782,14 +886,23 @@ def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False, start = insert_offset log.info(f"inserted {nb} data into collection {collection_w.name}") for i in range(num): - default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start) + log.debug("Dynamic field is enabled: %s" % enable_dynamic_field) + default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json) + if enable_dynamic_field: + default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json) if is_binary: default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start) binary_raw_vectors.extend(binary_raw_data) if is_all_data_type: - default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start) + default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start, with_json=with_json) + if enable_dynamic_field: + default_data = gen_default_rows_data_all_data_type(nb // num, dim=dim, start=start, with_json=with_json) if auto_id: - default_data.drop(ct.default_int64_field_name, axis=1, inplace=True) + if enable_dynamic_field: + for data in default_data: + data.pop(ct.default_int64_field_name, None) + else: + default_data.drop(ct.default_int64_field_name, axis=1, inplace=True) insert_res = collection_w.insert(default_data, par[i].name)[0] time_stamp = insert_res.timestamp insert_ids.extend(insert_res.primary_keys) diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 7ee88fd2f9..0ae372fb60 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -33,6 +33,7 @@ default_int64_field_name = "int64" default_float_field_name = "float" default_double_field_name = "double" default_string_field_name = "varchar" +default_json_field_name = "json_field" default_float_vec_field_name = "float_vector" another_float_vec_field_name = "float_vector1" default_binary_vec_field_name = "binary_vector" @@ -42,6 +43,7 @@ default_resource_group_capacity = 1000000 default_tag = "1970_01_01" row_count = "row_count" default_length = 65535 +default_json_list_length = 3 default_desc = "" default_collection_desc = "default collection" default_index_name = "default_index_name" diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 2192aff452..0147642353 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -327,7 +327,7 @@ class TestCollectionParams(TestcaseBase): expected: raise exception """ self._connect() - error = {ct.err_code: 0, ct.err_msg: "Field dtype must be of DataType"} + error = {ct.err_code: 1, ct.err_msg: "Field dtype must be of DataType"} self.field_schema_wrap.init_field_schema(name="unknown", dtype=DataType.UNKNOWN, check_task=CheckTasks.err_res, check_items=error) @@ -560,7 +560,7 @@ class TestCollectionParams(TestcaseBase): """ self._connect() fields = [cf.gen_int64_field(), cf.gen_float_vec_field()] - error = {ct.err_code: 0, ct.err_msg: "Primary field must in dataframe."} + error = {ct.err_code: 1, ct.err_msg: "Param primary_field must be str type."} self.collection_schema_wrap.init_collection_schema(fields, primary_field=primary_field, check_task=CheckTasks.err_res, check_items=error) @@ -646,7 +646,7 @@ class TestCollectionParams(TestcaseBase): int_field_one = cf.gen_int64_field(is_primary=True) int_field_two = cf.gen_int64_field(name="int2") fields = [int_field_one, int_field_two, cf.gen_float_vec_field()] - error = {ct.err_code: 0, ct.err_msg: "Expected only one primary key field"} + error = {ct.err_code: 1, ct.err_msg: "Expected only one primary key field"} self.collection_schema_wrap.init_collection_schema(fields, primary_field=int_field_two.name, check_task=CheckTasks.err_res, check_items=error) @@ -726,6 +726,7 @@ class TestCollectionParams(TestcaseBase): assert not schema.auto_id @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue 24578") def test_collection_auto_id_inconsistent(self): """ target: test collection auto_id with both collection schema and field schema @@ -768,6 +769,7 @@ class TestCollectionParams(TestcaseBase): auto_id=None, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue 24578") @pytest.mark.parametrize("auto_id", ct.get_invalid_strs) def test_collection_invalid_auto_id(self, auto_id): """ @@ -2909,6 +2911,7 @@ class TestDescribeCollection(TestcaseBase): """ @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue 24493") def test_collection_describe(self): """ target: test describe collection @@ -2921,14 +2924,22 @@ class TestDescribeCollection(TestcaseBase): collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index) description = {'collection_name': c_name, 'auto_id': False, 'num_shards': ct.default_shards_num, 'description': '', 'fields': [{'field_id': 100, 'name': 'int64', 'description': '', 'type': 5, - 'params': {}, 'is_primary': True, 'auto_id': False}, + 'params': {}, 'is_primary': True, 'auto_id': False, + 'is_partition_key': False, 'default_value': None, 'is_dynamic': False}, {'field_id': 101, 'name': 'float', 'description': '', 'type': 10, - 'params': {}, 'is_primary': False, 'auto_id': False}, + 'params': {}, 'is_primary': False, 'auto_id': False, + 'is_partition_key': False, 'default_value': None, 'is_dynamic': False}, {'field_id': 102, 'name': 'varchar', 'description': '', 'type': 21, - 'params': {'max_length': 65535}, 'is_primary': False, 'auto_id': False}, - {'field_id': 103, 'name': 'float_vector', 'description': '', 'type': 101, - 'params': {'dim': 128}, 'is_primary': False, 'auto_id': False}], - 'aliases': [], 'consistency_level': 0, 'properties': []} + 'params': {'max_length': 65535}, 'is_primary': False, 'auto_id': False, + 'is_partition_key': False, 'default_value': None, 'is_dynamic': False}, + {'field_id': 103, 'name': 'json_field', 'description': '', 'type': 23, + 'params': {}, 'is_primary': False, 'auto_id': False, + 'is_partition_key': False, 'default_value': None, 'is_dynamic': False}, + {'field_id': 104, 'name': 'float_vector', 'description': '', 'type': 101, + 'params': {'dim': 128}, 'is_primary': False, 'auto_id': False, + 'is_partition_key': False, 'default_value': None, 'is_dynamic': False}], + 'aliases': [], 'consistency_level': 2, 'properties': [], 'num_partitions': 0, + 'enable_dynamic_field': False} res = collection_w.describe()[0] del res['collection_id'] log.info(res) @@ -3670,3 +3681,85 @@ class TestCollectionString(TestcaseBase): error = {ct.err_code: 0, ct.err_msg: "autoID is not supported when the VarChar field is the primary key"} self.collection_wrap.init_collection(name=cf.gen_unique_str(prefix), schema=schema, check_task=CheckTasks.err_res, check_items=error) + + +class TestCollectionJSON(TestcaseBase): + """ + ****************************************************************** + The following cases are used to test about string + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("auto_id", [True, False]) + def test_collection_json_field_as_primary_key(self, auto_id): + """ + target: test create collection with JSON field as primary key + method: 1. create collection with one JSON field, and vector field + 2. set json field is_primary=true + 3. set auto_id as true + expected: Raise exception (not supported) + """ + self._connect() + int_field = cf.gen_int64_field() + vec_field = cf.gen_float_vec_field() + string_field = cf.gen_string_field() + # 1. create json field as primary key through field schema api + error = {ct.err_code: 1, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR"} + json_field = cf.gen_json_field(is_primary=True, auto_id=auto_id) + fields = [int_field, string_field, json_field, vec_field] + self.collection_schema_wrap.init_collection_schema(fields=fields, + check_task=CheckTasks.err_res, check_items=error) + # 2. create json field as primary key through collection schema api + json_field = cf.gen_json_field() + fields = [int_field, string_field, json_field, vec_field] + self.collection_schema_wrap.init_collection_schema(fields=fields, primary_field=ct.default_json_field_name, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) + def test_collection_json_field_partition_key(self, primary_field): + """ + target: test create collection with multiple JSON fields + method: 1. create collection with multiple JSON fields, primary key field and vector field + 2. set json field is_primary=false + expected: Raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_json_default_collection_schema(primary_field=primary_field, is_partition_key=True) + error = {ct.err_code: 1, ct.err_msg: "Partition key field type must be DataType.INT64 or DataType.VARCHAR."} + self.collection_wrap.init_collection(name=c_name, schema=schema, partition_key_field=ct.default_json_field_name, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_collection_json_field_supported_primary_key(self, primary_field): + """ + target: test create collection with one JSON field + method: 1. create collection with one JSON field, primary key field and vector field + 2. set json field is_primary=false + expected: Create collection successfully + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_json_default_collection_schema(primary_field=primary_field) + self.collection_wrap.init_collection(name=c_name, schema=schema, + check_task=CheckTasks.check_collection_property, + check_items={exp_name: c_name, exp_schema: schema}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_collection_multiple_json_fields_supported_primary_key(self, primary_field): + """ + target: test create collection with multiple JSON fields + method: 1. create collection with multiple JSON fields, primary key field and vector field + 2. set json field is_primary=false + expected: Create collection successfully + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_multiple_json_default_collection_schema(primary_field=primary_field) + self.collection_wrap.init_collection(name=c_name, schema=schema, + check_task=CheckTasks.check_collection_property, + check_items={exp_name: c_name, exp_schema: schema}) + + diff --git a/tests/python_client/testcases/test_delete.py b/tests/python_client/testcases/test_delete.py index ed6fd8e2c3..93789d7799 100644 --- a/tests/python_client/testcases/test_delete.py +++ b/tests/python_client/testcases/test_delete.py @@ -478,7 +478,7 @@ class TestDeleteOperation(TestcaseBase): """ # init collection with nb default data collection_w, _, _, ids = self.init_collection_general(prefix, insert_data=True)[0:4] - entity, _ = collection_w.query(tmp_expr, output_fields=["%"]) + entity, _ = collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name]) search_res, _ = collection_w.search([entity[0][ct.default_float_vec_field_name]], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) @@ -994,7 +994,7 @@ class TestDeleteOperation(TestcaseBase): log.debug(collection_w.num_entities) collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, - check_items={'exp_res': df_new.iloc[[0], [0, 3]].to_dict('records'), 'with_vec': True}) + check_items={'exp_res': df_new.iloc[[0], [0, 4]].to_dict('records'), 'with_vec': True}) collection_w.delete(tmp_expr) if to_flush_delete: @@ -1635,7 +1635,7 @@ class TestDeleteString(TestcaseBase): log.debug(collection_w.num_entities) collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, - check_items={'exp_res': df_new.iloc[[0], [2, 3]].to_dict('records'), + check_items={'exp_res': df_new.iloc[[0], [2, 4]].to_dict('records'), 'primary_field': ct.default_string_field_name, 'with_vec': True}) collection_w.delete(default_string_expr) @@ -1800,6 +1800,7 @@ class TestDeleteString(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("to_query", [True, False]) + # @pytest.mark.parametrize("enable_dynamic_field", [True, False]) def test_delete_insert_same_id_sealed_string(self, to_query): """ target: test insert same id entity after delete from sealed data @@ -1840,10 +1841,15 @@ class TestDeleteString(TestcaseBase): log.debug(collection_w.num_entities) # re-query - res = df_new.iloc[[0], [2, 3]].to_dict('records') + res = df_new.iloc[[0], [2, 4]].to_dict('records') + log.info(res) collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name], - check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'primary_field': ct.default_string_field_name, 'with_vec': True}) - collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, + check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, + 'primary_field': ct.default_string_field_name, + 'with_vec': True}) + collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], + anns_field=ct.default_float_vec_field_name, param=default_search_params, limit=1) @pytest.mark.tags(CaseLabel.L1) diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index e6ddc2a42e..d9bc7e47ba 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -1861,3 +1861,17 @@ class TestIndexDiskann(TestcaseBase): check_task=CheckTasks.err_res, check_items={ct.err_code: 1, ct.err_msg: "invalid index params"}) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_index_json(self): + """ + target: test create index on json fields + method: 1.create collection, and create index + expected: create index raise an error + """ + collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, + dim=ct.default_dim, is_index=False)[0:4] + collection_w.create_index(ct.default_json_field_name, index_params=ct.default_flat_index, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, + ct.err_msg: "create index on json field is not supported"}) diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index 01f35a05a0..8ed6e60275 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -98,7 +98,7 @@ class TestInsertParams(TestcaseBase): collection_w = self.init_collection_wrap(name=c_name) error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, " "expected: ['int64', 'float', 'varchar', 'float_vector'], got %s" % data} - collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error) + collection_w.insert(data=data) @pytest.mark.tags(CaseLabel.L2) def test_insert_dataframe_only_columns(self): @@ -1379,7 +1379,7 @@ class TestUpsertValid(TestcaseBase): [str(i) for i in range(nb)], binary_vectors] collection_w.upsert(data) res = collection_w.query("int64 >= 0", [ct.default_binary_vec_field_name])[0] - assert binary_vectors[0] == res[0][ct. default_binary_vec_field_name] + assert binary_vectors[0] == res[0][ct. default_binary_vec_field_name][0] @pytest.mark.tags(CaseLabel.L1) def test_upsert_same_with_inserted_data(self): diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index e480276382..1db55aef54 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -42,6 +42,10 @@ class TestQueryParams(TestcaseBase): query(collection_name, expr, output_fields=None, partition_names=None, timeout=None) """ + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + @pytest.mark.tags(CaseLabel.L2) def test_query_invalid(self): """ @@ -55,18 +59,27 @@ class TestQueryParams(TestcaseBase): collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) - def test_query(self): + def test_query(self, enable_dynamic_field): """ target: test query method: query with term expr expected: verify query result """ # create collection, insert default_nb, load collection - collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2] - int_values = vectors[0][ct.default_int64_field_name].values.tolist() + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=enable_dynamic_field)[0:2] pos = 5 + if enable_dynamic_field: + int_values = [] + for vector in vectors[0]: + vector = vector[ct.default_int64_field_name] + int_values.append(vector) + res = [{ct.default_int64_field_name: int_values[i]} for i in range(pos)] + else: + int_values = vectors[0][ct.default_int64_field_name].values.tolist() + res = vectors[0].iloc[0:pos, :1].to_dict('records') + term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}' - res = vectors[0].iloc[0:pos, :1].to_dict('records') collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) @@ -337,7 +350,7 @@ class TestQueryParams(TestcaseBase): res = [] # int8 range [-128, 127] so when nb=1200, there are many repeated int8 values equal to 0 for i in range(0, ct.default_nb, 256): - res.extend(df.iloc[i:i + 1, :-1].to_dict('records')) + res.extend(df.iloc[i:i + 1, :-2].to_dict('records')) self.collection_wrap.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index) self.collection_wrap.load() self.collection_wrap.query(term_expr, output_fields=["float", "int64", "int8", "varchar"], @@ -350,7 +363,7 @@ class TestQueryParams(TestcaseBase): yield request.param @pytest.mark.tags(CaseLabel.L1) - def test_query_with_expression(self, get_normal_expr): + def test_query_with_expression(self, get_normal_expr, enable_dynamic_field): """ target: test query with different expr method: query with different boolean expr @@ -358,7 +371,9 @@ class TestQueryParams(TestcaseBase): """ # 1. initialize with data nb = 1000 - collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb)[0:4] + collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, + enable_dynamic_field= + enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] @@ -366,8 +381,12 @@ class TestQueryParams(TestcaseBase): expression = expr.replace("&&", "and").replace("||", "or") filter_ids = [] for i, _id in enumerate(insert_ids): - int64 = _vectors.int64[i] - float = _vectors.float[i] + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] if not expression or eval(expression): filter_ids.append(_id) @@ -536,30 +555,34 @@ class TestQueryParams(TestcaseBase): collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - def test_query_output_field_none_or_empty(self): + def test_query_output_field_none_or_empty(self, enable_dynamic_field): """ target: test query with none and empty output field method: query with output field=None, field=[] expected: return primary field """ - collection_w = self.init_collection_general(prefix, insert_data=True)[0] + collection_w = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=enable_dynamic_field)[0] for fields in [None, []]: res, _ = collection_w.query(default_term_expr, output_fields=fields) assert res[0].keys() == {ct.default_int64_field_name} @pytest.mark.tags(CaseLabel.L0) - def test_query_output_one_field(self): + def test_query_output_one_field(self, enable_dynamic_field): """ target: test query with output one field method: query with output one field expected: return one field """ - collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2] + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field= + enable_dynamic_field)[0:2] res, _ = collection_w.query(default_term_expr, output_fields=[ct.default_float_field_name]) assert set(res[0].keys()) == {ct.default_int64_field_name, ct.default_float_field_name} @pytest.mark.tags(CaseLabel.L1) - def test_query_output_all_fields(self): + @pytest.mark.xfail(reason="issue 24637") + def test_query_output_all_fields(self, enable_dynamic_field): """ target: test query with none output field method: query with output field=None @@ -567,11 +590,18 @@ class TestQueryParams(TestcaseBase): """ # 1. initialize with data collection_w, df, _, insert_ids = self.init_collection_general(prefix, True, nb=10, - is_all_data_type=True)[0:4] + is_all_data_type=True, + enable_dynamic_field= + enable_dynamic_field)[0:4] all_fields = [ct.default_int64_field_name, ct.default_int32_field_name, ct.default_int16_field_name, ct.default_int8_field_name, ct.default_bool_field_name, ct.default_float_field_name, - ct.default_double_field_name, ct.default_string_field_name, ct.default_float_vec_field_name] - res = df[0].iloc[:2].to_dict('records') + ct.default_double_field_name, ct.default_string_field_name, ct.default_json_field_name, + ct.default_float_vec_field_name] + if enable_dynamic_field: + res = df[0][:2] + else: + res = df[0].iloc[:2].to_dict('records') + log.info(res) collection_w.load() actual_res, _ = collection_w.query(default_term_expr, output_fields=all_fields, check_task=CheckTasks.check_query_results, @@ -736,6 +766,7 @@ class TestQueryParams(TestcaseBase): check_items=error) @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.xfail(reason="issue 24637") def test_query_output_fields_simple_wildcard(self): """ target: test query output_fields with simple wildcard (* and %) @@ -754,6 +785,7 @@ class TestQueryParams(TestcaseBase): check_items={exp_res: res3, "with_vec": True}) @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 24637") def test_query_output_fields_part_scale_wildcard(self): """ target: test query output_fields with part wildcard @@ -1681,6 +1713,7 @@ class TestQueryString(TestcaseBase): check_items={ct.err_code: 1, ct.err_msg: f' cannot parse expression:{expression}'}) @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 24637") def test_query_after_insert_multi_threading(self): """ target: test data consistency after multi threading insert diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 75e9333094..a9fbbf5dc3 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -33,6 +33,7 @@ default_search_exp = "int64 >= 0" default_search_string_exp = "varchar >= \"0\"" default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" default_invaild_string_exp = "varchar >= 0" +default_json_search_exp = "json_field[\"number\"] >= 0" perfix_expr = 'varchar like "0%"' default_search_field = ct.default_float_vec_field_name default_search_params = ct.default_search_params @@ -144,6 +145,9 @@ class TestCollectionSearchInvalid(TestcaseBase): pytest.skip("number is valid for range search paras") yield request.param + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param """ ****************************************************************** @@ -1152,6 +1156,38 @@ class TestCollectionSearchInvalid(TestcaseBase): check_items={"err_code": 1, "err_msg": f"invalid metric type"})[0] + @pytest.mark.tags(CaseLabel.L1) + def test_search_dynamic_compare_two_fields(self): + """ + target: test search compare with two fields for dynamic collection + method: 1.create collection , insert data, enable dynamic function + 2.search with two fields comparisons + expected: Raise exception + """ + # create collection, insert tmp_nb, flush and load + collection_w = self.init_collection_general(prefix, insert_data=True, + primary_field=ct.default_string_field_name, + is_index=False, + enable_dynamic_field=True)[0] + + # create index + index_params_one = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} + collection_w.create_index(ct.default_float_vec_field_name, index_params_one, index_name=index_name1) + index_params_two = {} + collection_w.create_index(ct.default_string_field_name, index_params=index_params_two, index_name=index_name2) + assert collection_w.has_index(index_name=index_name2) + collection_w.load() + # delete entity + expr = 'float >= int64' + # search with id 0 vectors + vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, + expr, + check_task=CheckTasks.err_res, + check_items={"err_code": 1, + "err_msg": f"unsupported left datatype of compare expr"}) + class TestCollectionSearch(TestcaseBase): """ Test case of search interface """ @@ -1185,6 +1221,10 @@ class TestCollectionSearch(TestcaseBase): def is_flush(self, request): yield request.param + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + """ ****************************************************************** # The following are valid base cases @@ -1192,7 +1232,7 @@ class TestCollectionSearch(TestcaseBase): """ @pytest.mark.tags(CaseLabel.L0) - def test_search_normal(self, nq, dim, auto_id, is_flush): + def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field): """ target: test search normal case method: create connection, collection, insert and search @@ -1200,8 +1240,10 @@ class TestCollectionSearch(TestcaseBase): 2. search successfully with limit(topK) after travel timestamp """ # 1. initialize with data + collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush)[0:5] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + enable_dynamic_field=enable_dynamic_field)[0:5] # 2. search before insert time_stamp log.info("test_search_normal: searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -1294,19 +1336,25 @@ class TestCollectionSearch(TestcaseBase): "ids": insert_ids, "limit": default_limit}) - @pytest.mark.tags(CaseLabel.L0) - def test_search_with_hit_vectors(self, nq, dim, auto_id): + def test_search_with_hit_vectors(self, nq, dim, auto_id, enable_dynamic_field): """ target: test search with vectors in collections method: create connections,collection insert and search vectors in collections expected: search successfully with limit(topK) and can be hit at top 1 (min distance is 0) """ collection_w, _vectors, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim)[0:4] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # get vectors that inserted into collection - vectors = np.array(_vectors[0]).tolist() - vectors = [vectors[i][-1] for i in range(nq)] + vectors = [] + if enable_dynamic_field: + for vector in _vectors[0]: + vector = vector[ct.default_float_vec_field_name] + vectors.append(vector) + else: + vectors = np.array(_vectors[0]).tolist() + vectors = [vectors[i][-1] for i in range(nq)] log.info("test_search_with_hit_vectors: searching collection %s" % collection_w.name) search_res, _ = collection_w.search(vectors[:nq], default_search_field, default_search_params, default_limit, @@ -1393,7 +1441,7 @@ class TestCollectionSearch(TestcaseBase): }) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_empty_vectors(self, dim, auto_id, _async): + def test_search_with_empty_vectors(self, dim, auto_id, _async, enable_dynamic_field): """ target: test search with empty query vector method: search using empty query vector @@ -1401,7 +1449,8 @@ class TestCollectionSearch(TestcaseBase): """ # 1. initialize without data collection_w = self.init_collection_general(prefix, True, - auto_id=auto_id, dim=dim)[0] + auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0] # 2. search collection without data log.info("test_search_with_empty_vectors: Searching collection %s " "using empty vector" % collection_w.name) @@ -1412,7 +1461,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_search_with_ndarray(self, dim, auto_id, _async): + def test_search_with_ndarray(self, dim, auto_id, _async, enable_dynamic_field): """ target: test search with ndarray method: search using ndarray data @@ -1421,7 +1470,8 @@ class TestCollectionSearch(TestcaseBase): # 1. initialize without data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search collection without data log.info("test_search_with_ndarray: Searching collection %s " "using ndarray" % collection_w.name) @@ -1436,7 +1486,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("search_params", [{}, {"params": {}}, {"params": {"nprobe": 10}}]) - def test_search_normal_default_params(self, dim, auto_id, search_params, _async): + def test_search_normal_default_params(self, dim, auto_id, search_params, _async, enable_dynamic_field): """ target: test search normal case method: create connection, collection, insert and search @@ -1444,11 +1494,13 @@ class TestCollectionSearch(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim)[0:4] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. rename collection new_collection_name = cf.gen_unique_str(prefix + "new") self.utility_wrap.rename_collection(collection_w.name, new_collection_name) - collection_w = self.init_collection_general(auto_id=auto_id, dim=dim, name=new_collection_name)[0] + collection_w = self.init_collection_general(auto_id=auto_id, dim=dim, name=new_collection_name, + enable_dynamic_field=enable_dynamic_field)[0] # 3. search log.info("test_search_normal_default_params: searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] @@ -1478,8 +1530,7 @@ class TestCollectionSearch(TestcaseBase): partition_num = 1 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, partition_num, - auto_id=auto_id, - dim=dim)[0:4] + auto_id=auto_id, dim=dim)[0:4] # 2. search all the partitions before partition deletion vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] log.info("test_search_before_after_delete: searching before deleting partitions") @@ -1513,7 +1564,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L1) - def test_search_collection_after_release_load(self, nb, nq, dim, auto_id, _async): + def test_search_collection_after_release_load(self, nb, nq, dim, auto_id, _async, enable_dynamic_field): """ target: search the pre-released collection after load method: 1. create collection @@ -1525,7 +1576,9 @@ class TestCollectionSearch(TestcaseBase): # 1. initialize without data collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, 1, auto_id=auto_id, - dim=dim)[0:5] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. release collection log.info("test_search_collection_after_release_load: releasing collection %s" % collection_w.name) collection_w.release() @@ -1545,7 +1598,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L1) - def test_search_load_flush_load(self, nb, nq, dim, auto_id, _async): + def test_search_load_flush_load(self, nb, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test search when load before flush method: 1. insert data and load @@ -1554,9 +1607,11 @@ class TestCollectionSearch(TestcaseBase): expected: search success with limit(topK) """ # 1. initialize with data - collection_w = self.init_collection_general(prefix, auto_id=auto_id, dim=dim)[0] + collection_w = self.init_collection_general(prefix, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0] # 2. insert data - insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim)[3] + insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[3] # 3. load data collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index) collection_w.load() @@ -1708,7 +1763,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L1) - def test_search_min_dim(self, auto_id, _async): + def test_search_min_dim(self, auto_id, _async, enable_dynamic_field): """ target: test search with min configuration method: create connection, collection, insert and search with dim=1 @@ -1717,7 +1772,8 @@ class TestCollectionSearch(TestcaseBase): # 1. initialize with data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 100, auto_id=auto_id, - dim=min_dim)[0:4] + dim=min_dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search nq = 2 log.info("test_search_min_dim: searching collection %s" % collection_w.name) @@ -1791,7 +1847,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("M", [4, 64]) @pytest.mark.parametrize("efConstruction", [8, 512]) - def test_search_HNSW_index_with_max_ef(self, M, efConstruction, auto_id, _async): + def test_search_HNSW_index_with_max_ef(self, M, efConstruction, auto_id, _async, enable_dynamic_field): """ target: test search HNSW index with max ef method: connect milvus, create collection , insert, create index, load and search @@ -1802,7 +1858,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] HNSW_index_params = {"M": M, "efConstruction": efConstruction} HNSW_index = {"index_type": "HNSW", "params": HNSW_index_params, "metric_type": "L2"} collection_w.create_index("float_vector", HNSW_index) @@ -1822,7 +1880,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("M", [4, 64]) @pytest.mark.parametrize("efConstruction", [8, 512]) - def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, auto_id, _async): + def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, auto_id, _async, enable_dynamic_field): """ target: test search HNSW index with redundant param method: connect milvus, create collection , insert, create index, load and search @@ -1833,7 +1891,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] HNSW_index_params = {"M": M, "efConstruction": efConstruction, "nlist": 100} # nlist is of no use HNSW_index = {"index_type": "HNSW", "params": HNSW_index_params, "metric_type": "L2"} collection_w.create_index("float_vector", HNSW_index) @@ -1854,7 +1914,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.parametrize("M", [4, 64]) @pytest.mark.parametrize("efConstruction", [8, 512]) @pytest.mark.parametrize("limit", [1, 10, 3000]) - def test_search_HNSW_index_with_min_ef(self, M, efConstruction, limit, auto_id, _async): + def test_search_HNSW_index_with_min_ef(self, M, efConstruction, limit, auto_id, _async, enable_dynamic_field): """ target: test search HNSW index with min ef method: connect milvus, create collection , insert, create index, load and search @@ -1866,7 +1926,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] HNSW_index_params = {"M": M, "efConstruction": efConstruction} HNSW_index = {"index_type": "HNSW", "params": HNSW_index_params, "metric_type": "L2"} collection_w.create_index("float_vector", HNSW_index) @@ -1888,7 +1950,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:6], ct.default_index_params[:6])) - def test_search_after_different_index_with_params(self, dim, index, params, auto_id, _async): + def test_search_after_different_index_with_params(self, dim, index, params, auto_id, _async, enable_dynamic_field): """ target: test search after different index method: test search after different index and corresponding search params @@ -1898,7 +1960,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. create index and load if params.get("m"): if (dim % params["m"]) != 0: @@ -1928,7 +1992,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.parametrize("index, params", zip(ct.all_index_types[8:10], ct.default_index_params[8:10])) - def test_search_after_different_index_with_params_gpu(self, dim, index, params, auto_id, _async): + def test_search_after_different_index_with_params_gpu(self, dim, index, params, auto_id, _async, + enable_dynamic_field): """ target: test search after different index method: test search after different index and corresponding search params @@ -1938,7 +2003,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. create index and load if params.get("m"): if (dim % params["m"]) != 0: @@ -2046,7 +2113,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:6], ct.default_index_params[:6])) - def test_search_after_index_different_metric_type(self, dim, index, params, auto_id, _async): + def test_search_after_index_different_metric_type(self, dim, index, params, auto_id, _async, enable_dynamic_field): """ target: test search with different metric type method: test search with different metric type @@ -2056,7 +2123,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. create different index if params.get("m"): if (dim % params["m"]) != 0: @@ -2088,7 +2157,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.parametrize("index, params", zip(ct.all_index_types[8:10], ct.default_index_params[8:10])) - def test_search_after_index_different_metric_type_gpu(self, dim, index, params, auto_id, _async): + def test_search_after_index_different_metric_type_gpu(self, dim, index, params, auto_id, _async, enable_dynamic_field): """ target: test search with different metric type method: test search with different metric type @@ -2098,7 +2167,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. create different index if params.get("m"): if (dim % params["m"]) != 0: @@ -2127,7 +2198,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_search_collection_multiple_times(self, nb, nq, dim, auto_id, _async): + def test_search_collection_multiple_times(self, nb, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test search for multiple times method: search for multiple times @@ -2136,7 +2207,9 @@ class TestCollectionSearch(TestcaseBase): # 1. initialize with data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. search for multiple times vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] for i in range(search_num): @@ -2151,7 +2224,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_search_sync_async_multiple_times(self, nb, nq, dim, auto_id): + def test_search_sync_async_multiple_times(self, nb, nq, dim, auto_id, enable_dynamic_field): """ target: test async search after sync search case method: create connection, collection, insert, @@ -2161,7 +2234,9 @@ class TestCollectionSearch(TestcaseBase): # 1. initialize with data collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, auto_id=auto_id, - dim=dim)[0:5] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. search log.info("test_search_sync_async_multiple_times: searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -2208,7 +2283,7 @@ class TestCollectionSearch(TestcaseBase): param=search_params, limit=1) @pytest.mark.tags(CaseLabel.L1) - def test_search_index_one_partition(self, nb, auto_id, _async): + def test_search_index_one_partition(self, nb, auto_id, _async, enable_dynamic_field): """ target: test search from partition method: search from one partition @@ -2218,7 +2293,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, partition_num=1, auto_id=auto_id, - is_index=False)[0:5] + is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. create index default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} @@ -2278,7 +2355,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("partition_names", [["(.*)"], ["search(.*)"]]) - def test_search_index_partitions_fuzzy(self, nb, nq, dim, partition_names, auto_id, _async): + def test_search_index_partitions_fuzzy(self, nb, nq, dim, partition_names, auto_id, _async, enable_dynamic_field): """ target: test search from partitions method: search from partitions with fuzzy @@ -2290,7 +2367,9 @@ class TestCollectionSearch(TestcaseBase): partition_num=1, auto_id=auto_id, dim=dim, - is_index=False)[0:4] + is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:4] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. create index nlist = 128 @@ -2616,7 +2695,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expression", cf.gen_normal_expressions()) - def test_search_with_expression(self, dim, expression, _async): + def test_search_with_expression(self, dim, expression, _async, enable_dynamic_field): """ target: test search with different expressions method: test search with different expressions @@ -2626,15 +2705,21 @@ class TestCollectionSearch(TestcaseBase): nb = 1000 collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, dim=dim, - is_index=False)[0:4] + is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] expression = expression.replace("&&", "and").replace("||", "or") filter_ids = [] for i, _id in enumerate(insert_ids): - int64 = _vectors.int64[i] - float = _vectors.float[i] + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] if not expression or eval(expression): filter_ids.append(_id) @@ -2665,7 +2750,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("bool_type", [True, False, "true", "false"]) - def test_search_with_expression_bool(self, dim, auto_id, _async, bool_type): + def test_search_with_expression_bool(self, dim, auto_id, _async, bool_type, enable_dynamic_field): """ target: test search with different bool expressions method: search with different bool expressions @@ -2676,7 +2761,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, is_all_data_type=True, auto_id=auto_id, - dim=dim, is_index=False)[0:4] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. create index index_param = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 100}} @@ -2691,8 +2778,12 @@ class TestCollectionSearch(TestcaseBase): if bool_type == "false": bool_type_cmp = False for i, _id in enumerate(insert_ids): - if _vectors[0][f"{default_bool_field_name}"][i] == bool_type_cmp: - filter_ids.append(_id) + if enable_dynamic_field: + if _vectors[0][i][f"{default_bool_field_name}"] == bool_type_cmp: + filter_ids.append(_id) + else: + if _vectors[0][f"{default_bool_field_name}"][i] == bool_type_cmp: + filter_ids.append(_id) # 4. search with different expressions expression = f"{default_bool_field_name} == {bool_type}" @@ -2717,8 +2808,9 @@ class TestCollectionSearch(TestcaseBase): assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue 24514") @pytest.mark.parametrize("expression", cf.gen_normal_expressions_field(default_float_field_name)) - def test_search_with_expression_auto_id(self, dim, expression, _async): + def test_search_with_expression_auto_id(self, dim, expression, _async, enable_dynamic_field): """ target: test search with different expressions method: test search with different expressions with auto id @@ -2729,14 +2821,19 @@ class TestCollectionSearch(TestcaseBase): collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, auto_id=True, dim=dim, - is_index=False)[0:4] + is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] expression = expression.replace("&&", "and").replace("||", "or") filter_ids = [] for i, _id in enumerate(insert_ids): - exec(f"{default_float_field_name} = _vectors.{default_float_field_name}[i]") + if enable_dynamic_field: + exec(f"{default_float_field_name} = _vectors[i][f'{default_float_field_name}']") + else: + exec(f"{default_float_field_name} = _vectors.{default_float_field_name}[i]") if not expression or eval(expression): filter_ids.append(_id) @@ -2766,7 +2863,7 @@ class TestCollectionSearch(TestcaseBase): assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) - def test_search_expression_all_data_type(self, nb, nq, dim, auto_id, _async): + def test_search_expression_all_data_type(self, nb, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test search using all supported data types method: search using different supported data types @@ -2776,7 +2873,9 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, is_all_data_type=True, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. search log.info("test_search_expression_all_data_type: Searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -2907,7 +3006,7 @@ class TestCollectionSearch(TestcaseBase): "output_fields": []}) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_output_field(self, auto_id, _async): + def test_search_with_output_field(self, auto_id, _async, enable_dynamic_field): """ target: test search with output fields method: search with one output_field @@ -2915,7 +3014,9 @@ class TestCollectionSearch(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - auto_id=auto_id)[0:4] + auto_id=auto_id, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. search log.info("test_search_with_output_field: Searching collection %s" % collection_w.name) @@ -2931,7 +3032,7 @@ class TestCollectionSearch(TestcaseBase): "output_fields": [default_int64_field_name]})[0] @pytest.mark.tags(CaseLabel.L1) - def test_search_with_output_vector_field(self, auto_id, _async): + def test_search_with_output_vector_field(self, auto_id, _async, enable_dynamic_field): """ target: test search with output fields method: search with one output_field @@ -2939,7 +3040,9 @@ class TestCollectionSearch(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - auto_id=auto_id)[0:4] + auto_id=auto_id, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. search log.info("test_search_with_output_field: Searching collection %s" % collection_w.name) collection_w.search(vectors[:default_nq], default_search_field, @@ -3097,7 +3200,7 @@ class TestCollectionSearch(TestcaseBase): assert len(res[0][i].entity.float_vector) == len(data[field_name][res[0][i].id]) @pytest.mark.tags(CaseLabel.L2) - def test_search_output_vector_field_and_scalar_field(self): + def test_search_output_vector_field_and_scalar_field(self, enable_dynamic_field): """ target: test search with output vector field and scalar field method: 1. initialize a collection @@ -3106,7 +3209,7 @@ class TestCollectionSearch(TestcaseBase): expected: search success """ # 1. initialize a collection - collection_w = self.init_collection_general(prefix, True)[0] + collection_w = self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0] # 2. search with output field vector output_fields = [default_float_field_name, default_string_field_name, default_search_field] @@ -3119,7 +3222,7 @@ class TestCollectionSearch(TestcaseBase): "output_fields": output_fields}) @pytest.mark.tags(CaseLabel.L2) - def test_search_output_vector_field_and_pk_field(self): + def test_search_output_vector_field_and_pk_field(self, enable_dynamic_field): """ target: test search with output vector field and pk field method: 1. initialize a collection @@ -3128,7 +3231,7 @@ class TestCollectionSearch(TestcaseBase): expected: search success """ # 1. initialize a collection - collection_w = self.init_collection_general(prefix, True)[0] + collection_w = self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0] # 2. search with output field vector output_fields = [default_int64_field_name, default_string_field_name, default_search_field] @@ -3179,9 +3282,10 @@ class TestCollectionSearch(TestcaseBase): assert str(vectorInsert) == vectorRes @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("wildcard_output_fields", [["*"], ["*", default_float_field_name], ["*", default_search_field], + @pytest.mark.parametrize("wildcard_output_fields", [["*"], ["*", default_float_field_name], + ["*", default_search_field], ["%"], ["%", default_float_field_name], ["*", "%"]]) - def test_search_with_output_field_wildcard(self, wildcard_output_fields, auto_id, _async): + def test_search_with_output_field_wildcard(self, wildcard_output_fields, auto_id, _async, enable_dynamic_field): """ target: test search with output fields using wildcard method: search with one output_field (wildcard) @@ -3189,7 +3293,9 @@ class TestCollectionSearch(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - auto_id=auto_id)[0:4] + auto_id=auto_id, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. search log.info("test_search_with_output_field_wildcard: Searching collection %s" % collection_w.name) output_fields = cf.get_wildcard_output_field_names(collection_w, wildcard_output_fields) @@ -3233,7 +3339,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_search_concurrent_multi_threads(self, nb, nq, dim, auto_id, _async): + def test_search_concurrent_multi_threads(self, nb, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test concurrent search with multi-processes method: search with 10 processes, each process uses dependent connection @@ -3244,7 +3350,9 @@ class TestCollectionSearch(TestcaseBase): threads = [] collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb, auto_id=auto_id, - dim=dim)[0:5] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:5] def search(collection_w): vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -3309,7 +3417,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("round_decimal", [0, 1, 2, 3, 4, 5, 6]) - def test_search_round_decimal(self, round_decimal): + def test_search_round_decimal(self, round_decimal, enable_dynamic_field): """ target: test search with valid round decimal method: search with valid round decimal @@ -3320,7 +3428,8 @@ class TestCollectionSearch(TestcaseBase): tmp_nq = 1 tmp_limit = 5 # 1. initialize with data - collection_w = self.init_collection_general(prefix, True, nb=tmp_nb)[0] + collection_w = self.init_collection_general(prefix, True, nb=tmp_nb, + enable_dynamic_field=enable_dynamic_field)[0] # 2. search log.info("test_search_round_decimal: Searching collection %s" % collection_w.name) res, _ = collection_w.search(vectors[:tmp_nq], default_search_field, @@ -3339,7 +3448,7 @@ class TestCollectionSearch(TestcaseBase): assert math.isclose(dis_actual, dis_expect, rel_tol=0, abs_tol=abs_tol) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_expression_large(self, dim): + def test_search_with_expression_large(self, dim, enable_dynamic_field): """ target: test search with large expression method: test search with large expression @@ -3349,8 +3458,10 @@ class TestCollectionSearch(TestcaseBase): nb = 10000 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, dim=dim, - is_index=False)[0:4] - + is_index=False, + enable_dynamic_field= + enable_dynamic_field, + with_json=False)[0:4] # 2. create index index_param = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 100}} @@ -3373,7 +3484,7 @@ class TestCollectionSearch(TestcaseBase): }) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_expression_large_two(self, dim): + def test_search_with_expression_large_two(self, dim, enable_dynamic_field): """ target: test search with large expression method: test one of the collection ids to another collection search for it, with the large expression @@ -3383,15 +3494,15 @@ class TestCollectionSearch(TestcaseBase): nb = 10000 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, dim=dim, - is_index=False)[0:4] - + is_index=False, + enable_dynamic_field=enable_dynamic_field, + with_json=False)[0:4] # 2. create index index_param = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 100}} collection_w.create_index("float_vector", index_param) collection_w.load() - nums = 5000 vectors = [[random.random() for _ in range(dim)] for _ in range(nums)] vectors_id = [random.randint(0, nums)for _ in range(nums)] @@ -3406,7 +3517,7 @@ class TestCollectionSearch(TestcaseBase): }) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_consistency_bounded(self, nq, dim, auto_id, _async): + def test_search_with_consistency_bounded(self, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test search with different consistency level method: 1. create a collection @@ -3418,7 +3529,9 @@ class TestCollectionSearch(TestcaseBase): nb_old = 500 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, @@ -3438,7 +3551,8 @@ class TestCollectionSearch(TestcaseBase): nb_new = 400 _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, auto_id=auto_id, dim=dim, - insert_offset=nb_old) + insert_offset=nb_old, + enable_dynamic_field=enable_dynamic_field) insert_ids.extend(insert_ids_new) collection_w.search(vectors[:nq], default_search_field, @@ -3448,7 +3562,7 @@ class TestCollectionSearch(TestcaseBase): ) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_consistency_strong(self, nq, dim, auto_id, _async): + def test_search_with_consistency_strong(self, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test search with different consistency level method: 1. create a collection @@ -3460,7 +3574,8 @@ class TestCollectionSearch(TestcaseBase): nb_old = 500 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, @@ -3475,7 +3590,8 @@ class TestCollectionSearch(TestcaseBase): nb_new = 400 _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, auto_id=auto_id, dim=dim, - insert_offset=nb_old) + insert_offset=nb_old, + enable_dynamic_field=enable_dynamic_field) insert_ids.extend(insert_ids_new) kwargs = {} consistency_level = kwargs.get("consistency_level", CONSISTENCY_STRONG) @@ -3492,7 +3608,7 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_consistency_eventually(self, nq, dim, auto_id, _async): + def test_search_with_consistency_eventually(self, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test search with different consistency level method: 1. create a collection @@ -3504,7 +3620,8 @@ class TestCollectionSearch(TestcaseBase): nb_old = 500 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, @@ -3518,7 +3635,8 @@ class TestCollectionSearch(TestcaseBase): nb_new = 400 _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, auto_id=auto_id, dim=dim, - insert_offset=nb_old) + insert_offset=nb_old, + enable_dynamic_field=enable_dynamic_field) insert_ids.extend(insert_ids_new) kwargs = {} consistency_level = kwargs.get("consistency_level", CONSISTENCY_EVENTUALLY) @@ -3530,7 +3648,7 @@ class TestCollectionSearch(TestcaseBase): ) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_consistency_session(self, nq, dim, auto_id, _async): + def test_search_with_consistency_session(self, nq, dim, auto_id, _async, enable_dynamic_field): """ target: test search with different consistency level method: 1. create a collection @@ -3542,7 +3660,8 @@ class TestCollectionSearch(TestcaseBase): nb_old = 500 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, @@ -3561,7 +3680,8 @@ class TestCollectionSearch(TestcaseBase): nb_new = 400 _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, auto_id=auto_id, dim=dim, - insert_offset=nb_old) + insert_offset=nb_old, + enable_dynamic_field=enable_dynamic_field) insert_ids.extend(insert_ids_new) collection_w.search(vectors[:nq], default_search_field, default_search_params, limit, @@ -3679,7 +3799,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("partition_name", ["_PartiTi0n", "pArt1_ti0n"]) - def test_search_partition_naming_rules_without_index(self, nq, dim, auto_id, partition_name): + def test_search_partition_naming_rules_without_index(self, nq, dim, auto_id, partition_name, enable_dynamic_field): """ target: test search collection naming rules method: 1. Connect milvus @@ -3694,9 +3814,11 @@ class TestCollectionSearch(TestcaseBase): self._connect() collection_w, _, _, insert_ids = self.init_collection_general(prefix, False, nb, auto_id=auto_id, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] collection_w.create_partition(partition_name) - insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim)[3] + insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[3] collection_w.load() vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, default_search_params, @@ -3709,7 +3831,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("partition_name", ["_PartiTi0n", "pArt1_ti0n"]) @pytest.mark.parametrize("index_name", ["_1ndeX", "In_0"]) - def test_search_partition_naming_rules_with_index(self, nq, dim, auto_id, partition_name, index_name): + def test_search_partition_naming_rules_with_index(self, nq, dim, auto_id, partition_name, index_name, + enable_dynamic_field): """ target: test search collection naming rules method: 1. Connect milvus @@ -3724,9 +3847,11 @@ class TestCollectionSearch(TestcaseBase): self._connect() collection_w, _, _, insert_ids = self.init_collection_general(prefix, False, nb, auto_id=auto_id, - dim=dim, is_index=False)[0:4] + dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:4] collection_w.create_partition(partition_name) - insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim)[3] + insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[3] collection_w.create_index(default_search_field, default_index_params, index_name=index_name) collection_w.load() vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] @@ -4190,8 +4315,12 @@ class TestSearchString(TestcaseBase): def _async(self, request): yield request.param + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + @pytest.mark.tags(CaseLabel.L2) - def test_search_string_field_not_primary(self, auto_id, _async): + def test_search_string_field_not_primary(self, auto_id, _async, enable_dynamic_field): """ target: test search with string expr and string field is not primary method: create collection and insert data @@ -4201,7 +4330,8 @@ class TestSearchString(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim)[0:4] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search log.info("test_search_string_field_not_primary: searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] @@ -4219,7 +4349,7 @@ class TestSearchString(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_search_string_field_is_primary_true(self, dim, _async): + def test_search_string_field_is_primary_true(self, dim, _async, enable_dynamic_field): """ target: test search with string expr and string field is primary method: create collection and insert data @@ -4229,7 +4359,8 @@ class TestSearchString(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name)[0:4] + self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search log.info("test_search_string_field_is_primary_true: searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] @@ -4247,7 +4378,7 @@ class TestSearchString(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_range_search_string_field_is_primary_true(self, dim, _async): + def test_range_search_string_field_is_primary_true(self, dim, _async, enable_dynamic_field): """ target: test range search with string expr and string field is primary method: create collection and insert data @@ -4257,7 +4388,8 @@ class TestSearchString(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name)[0:4] + self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search log.info("test_search_string_field_is_primary_true: searching collection %s" % collection_w.name) range_search_params = {"metric_type": "L2", "params": {"nprobe": 10, "radius": 1000, @@ -4277,7 +4409,7 @@ class TestSearchString(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_search_string_mix_expr(self, dim, auto_id, _async): + def test_search_string_mix_expr(self, dim, auto_id, _async, enable_dynamic_field): """ target: test search with mix string and int expr method: create collection and insert data @@ -4287,7 +4419,8 @@ class TestSearchString(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim)[0:4] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search log.info("test_search_string_mix_expr: searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] @@ -4329,7 +4462,7 @@ class TestSearchString(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(ct.default_string_field_name)) - def test_search_with_different_string_expr(self, dim, expression, _async): + def test_search_with_different_string_expr(self, dim, expression, _async, enable_dynamic_field): """ target: test search with different string expressions method: test search with different string expressions @@ -4339,15 +4472,21 @@ class TestSearchString(TestcaseBase): nb = 1000 collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, dim=dim, - is_index=False)[0:4] + is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] filter_ids = [] expression = expression.replace("&&", "and").replace("||", "or") for i, _id in enumerate(insert_ids): - int64 = _vectors.int64[i] - varchar = _vectors.varchar[i] + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + varchar = _vectors[i][ct.default_string_field_name] + else: + int64 = _vectors.int64[i] + varchar = _vectors.varchar[i] if not expression or eval(expression): filter_ids.append(_id) @@ -4548,8 +4687,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, - "_async": _async} - ) + "_async": _async}) @pytest.mark.tags(CaseLabel.L2) def test_search_string_field_is_primary_insert_empty(self, _async): @@ -4571,7 +4709,6 @@ class TestSearchString(TestcaseBase): collection_w.load() - search_string_exp = "varchar >= \"\"" limit = 1 @@ -4617,7 +4754,6 @@ class TestSearchString(TestcaseBase): collection_w.create_index("float_vector", index_param) collection_w.load() - search_string_exp = "varchar >= \"\"" # 3. search @@ -4652,6 +4788,10 @@ class TestSearchPagination(TestcaseBase): def _async(self, request): yield request.param + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + """ ****************************************************************** # The following are valid base cases @@ -4660,7 +4800,7 @@ class TestSearchPagination(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("limit", [10, 20]) - def test_search_with_pagination(self, offset, auto_id, limit, _async): + def test_search_with_pagination(self, offset, auto_id, limit, _async, enable_dynamic_field): """ target: test search with pagination method: 1. connect and create a collection @@ -4670,7 +4810,8 @@ class TestSearchPagination(TestcaseBase): expected: search successfully and ids is correct """ # 1. create a collection - collection_w = self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim)[0] + collection_w = self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim, + enable_dynamic_field=enable_dynamic_field)[0] # 2. search pagination with offset search_param = {"metric_type": "L2", "params": {"nprobe": 10}, "offset": offset} vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] @@ -4694,7 +4835,7 @@ class TestSearchPagination(TestcaseBase): assert set(search_res[0].ids) == set(res[0].ids[offset:]) @pytest.mark.tags(CaseLabel.L1) - def test_search_string_with_pagination(self, offset, auto_id, _async): + def test_search_string_with_pagination(self, offset, auto_id, _async, enable_dynamic_field): """ target: test search string with pagination method: 1. connect and create a collection @@ -4705,7 +4846,8 @@ class TestSearchPagination(TestcaseBase): """ # 1. create a collection collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim)[0:4] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search search_param = {"metric_type": "L2", "params": {"nprobe": 10}, "offset": offset} vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] @@ -4801,7 +4943,7 @@ class TestSearchPagination(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("expression", cf.gen_normal_expressions()) - def test_search_pagination_with_expression(self, offset, expression, _async): + def test_search_pagination_with_expression(self, offset, expression, _async, enable_dynamic_field): """ target: test search pagination with expression method: create connection, collection, insert and search with expression @@ -4811,14 +4953,20 @@ class TestSearchPagination(TestcaseBase): nb = 500 dim = 8 collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb=nb, - dim=dim)[0:4] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] expression = expression.replace("&&", "and").replace("||", "or") filter_ids = [] for i, _id in enumerate(insert_ids): - int64 = _vectors.int64[i] - float = _vectors.float[i] + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] if not expression or eval(expression): filter_ids.append(_id) # 2. search @@ -5142,8 +5290,12 @@ class TestSearchDiskann(TestcaseBase): def _async(self, request): yield request.param + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + @pytest.mark.tags(CaseLabel.L2) - def test_search_with_diskann_index(self, dim, auto_id, _async): + def test_search_with_diskann_index(self, dim, auto_id, _async, enable_dynamic_field): """ target: test delete after creating index method: 1.create collection , insert data, primary_field is int field @@ -5156,7 +5308,8 @@ class TestSearchDiskann(TestcaseBase): nb = 2000 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id, nb=nb, dim=dim, - is_index=False)[0:4] + is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}} @@ -5273,7 +5426,7 @@ class TestSearchDiskann(TestcaseBase): ) @pytest.mark.tags(CaseLabel.L2) - def test_search_with_diskann_with_string_pk(self, dim): + def test_search_with_diskann_with_string_pk(self, dim, enable_dynamic_field): """ target: test delete after creating index method: 1.create collection , insert data, primary_field is string field @@ -5283,7 +5436,9 @@ class TestSearchDiskann(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=False, dim=dim, is_index=False, primary_field=ct.default_string_field_name)[0:4] + self.init_collection_general(prefix, True, auto_id=False, dim=dim, is_index=False, + primary_field=ct.default_string_field_name, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}} collection_w.create_index(ct.default_float_vec_field_name, default_index) @@ -5304,7 +5459,7 @@ class TestSearchDiskann(TestcaseBase): ) @pytest.mark.tags(CaseLabel.L2) - def test_search_with_delete_data(self, dim, auto_id, _async): + def test_search_with_delete_data(self, dim, auto_id, _async, enable_dynamic_field): """ target: test delete after creating index method: 1.create collection , insert data, @@ -5314,7 +5469,8 @@ class TestSearchDiskann(TestcaseBase): """ # 1. initialize with data collection_w, _, _, ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0:4] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}} collection_w.create_index(ct.default_float_vec_field_name, default_index) @@ -5345,7 +5501,7 @@ class TestSearchDiskann(TestcaseBase): ) @pytest.mark.tags(CaseLabel.L2) - def test_search_with_diskann_and_more_index(self, dim, auto_id, _async): + def test_search_with_diskann_and_more_index(self, dim, auto_id, _async, enable_dynamic_field): """ target: test delete after creating index method: 1.create collection , insert data @@ -5355,7 +5511,8 @@ class TestSearchDiskann(TestcaseBase): """ # 1. initialize with data collection_w, _, _, ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0:4] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}} collection_w.create_index(ct.default_float_vec_field_name, default_index, index_name=index_name1) @@ -5391,7 +5548,7 @@ class TestSearchDiskann(TestcaseBase): ) @pytest.mark.tags(CaseLabel.L1) - def test_search_with_scalar_field(self, dim, _async): + def test_search_with_scalar_field(self, dim, _async, enable_dynamic_field): """ target: test search with scalar field method: 1.create collection , insert data @@ -5402,39 +5559,35 @@ class TestSearchDiskann(TestcaseBase): # 1. initialize with data collection_w, _, _, ids = \ self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name, - is_index=False)[0:4] + is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index default_index = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} collection_w.create_index(ct.default_float_vec_field_name, default_index) index_params = {} - collection_w.create_index(ct.default_float_field_name, index_params=index_params) - collection_w.create_index(ct.default_int64_field_name, index_params=index_params) - + if not enable_dynamic_field: + collection_w.create_index(ct.default_float_field_name, index_params=index_params) + collection_w.create_index(ct.default_int64_field_name, index_params=index_params) + else: + collection_w.create_index(ct.default_string_field_name, index_params=index_params) collection_w.load() - default_expr = "int64 in [1, 2, 3, 4]" - limit = 4 - - default_search_params ={"metric_type": "L2", "params": {"nprobe": 64}} + default_search_params = {"metric_type": "L2", "params": {"nprobe": 64}} vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name] search_res = collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, limit, - default_expr, - output_fields=output_fields, - _async=_async, - travel_timestamp=0, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": ids, - "limit": limit, - "_async": _async} - ) + default_search_params, limit, default_expr, + output_fields=output_fields, _async=_async, + travel_timestamp=0, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": ids, + "limit": limit, + "_async": _async}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("limit", [10, 100, 1000]) - def test_search_diskann_search_list_equal_to_limit(self, dim, auto_id, limit, _async): + def test_search_diskann_search_list_equal_to_limit(self, dim, auto_id, limit, _async, enable_dynamic_field): """ target: test search diskann index when search_list equal to limit method: 1.create collection , insert data, primary_field is int field @@ -5444,7 +5597,8 @@ class TestSearchDiskann(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id, - dim=dim, is_index=False)[0:4] + dim=dim, is_index=False, + enable_dynamic_field=enable_dynamic_field)[0:4] # 2. create index default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}} @@ -5534,6 +5688,10 @@ class TestCollectionRangeSearch(TestcaseBase): def is_flush(self, request): yield request.param + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + """ ****************************************************************** # The followings are valid range search cases @@ -5543,7 +5701,7 @@ class TestCollectionRangeSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("radius", [1000, 1000.0]) @pytest.mark.parametrize("range_filter", [0, 0.0]) - def test_range_search_normal(self, nq, dim, auto_id, is_flush, radius, range_filter): + def test_range_search_normal(self, nq, dim, auto_id, is_flush, radius, range_filter, enable_dynamic_field): """ target: test range search normal case method: create connection, collection, insert and search @@ -5551,10 +5709,17 @@ class TestCollectionRangeSearch(TestcaseBase): """ # 1. initialize with data collection_w, _vectors, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush)[0:5] + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + enable_dynamic_field=enable_dynamic_field)[0:5] # 2. get vectors that inserted into collection - vectors = np.array(_vectors[0]).tolist() - vectors = [vectors[i][-1] for i in range(nq)] + vectors = [] + if enable_dynamic_field: + for vector in _vectors[0]: + vector = vector[ct.default_float_vec_field_name] + vectors.append(vector) + else: + vectors = np.array(_vectors[0]).tolist() + vectors = [vectors[i][-1] for i in range(nq)] # 3. range search range_search_params = {"metric_type": "L2", "params": {"nprobe": 10, "radius": radius, "range_filter": range_filter}} @@ -5817,7 +5982,7 @@ class TestCollectionRangeSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_range_search_collection_after_release_load(self, auto_id, _async): + def test_range_search_collection_after_release_load(self, auto_id, _async, enable_dynamic_field): """ target: range search the pre-released collection after load method: 1. create collection @@ -5829,7 +5994,9 @@ class TestCollectionRangeSearch(TestcaseBase): # 1. initialize without data collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, default_nb, 1, auto_id=auto_id, - dim=default_dim)[0:5] + dim=default_dim, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. release collection log.info("test_range_search_collection_after_release_load: releasing collection %s" % collection_w.name) collection_w.release() @@ -5851,7 +6018,7 @@ class TestCollectionRangeSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_range_search_load_flush_load(self, dim, _async): + def test_range_search_load_flush_load(self, dim, _async, enable_dynamic_field): """ target: test range search when load before flush method: 1. insert data and load @@ -5860,9 +6027,9 @@ class TestCollectionRangeSearch(TestcaseBase): expected: search success with limit(topK) """ # 1. initialize with data - collection_w = self.init_collection_general(prefix, dim=dim)[0] + collection_w = self.init_collection_general(prefix, dim=dim, enable_dynamic_field=enable_dynamic_field)[0] # 2. insert data - insert_ids = cf.insert_data(collection_w, default_nb, dim=dim)[3] + insert_ids = cf.insert_data(collection_w, default_nb, dim=dim, enable_dynamic_field=enable_dynamic_field)[3] # 3. load data collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index) collection_w.load() @@ -5883,7 +6050,7 @@ class TestCollectionRangeSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_range_search_new_data(self, nq, dim): + def test_range_search_new_data(self, nq, dim, enable_dynamic_field): """ target: test search new inserted data without load method: 1. search the collection @@ -5896,7 +6063,9 @@ class TestCollectionRangeSearch(TestcaseBase): limit = 1000 nb_old = 500 collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb_old, - dim=dim)[0:5] + dim=dim, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] range_search_params = {"metric_type": "L2", "params": {"nprobe": 10, "radius": 1000, @@ -6002,7 +6171,7 @@ class TestCollectionRangeSearch(TestcaseBase): @pytest.mark.parametrize("index, params", zip(range_search_supported_index, range_search_supported_index_params)) - def test_range_search_after_different_index_with_params(self, dim, index, params): + def test_range_search_after_different_index_with_params(self, dim, index, params, enable_dynamic_field): """ target: test range search after different index method: test range search after different index and corresponding search params @@ -6011,7 +6180,9 @@ class TestCollectionRangeSearch(TestcaseBase): # 1. initialize with data collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, - dim=dim, is_index=False)[0:5] + dim=dim, is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:5] # 2. create index and load if params.get("m"): if (dim % params["m"]) != 0: @@ -6376,7 +6547,7 @@ class TestCollectionRangeSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expression", cf.gen_normal_expressions()) - def test_range_search_with_expression(self, dim, expression, _async): + def test_range_search_with_expression(self, dim, expression, _async, enable_dynamic_field): """ target: test range search with different expressions method: test range search with different expressions @@ -6386,15 +6557,21 @@ class TestCollectionRangeSearch(TestcaseBase): nb = 1000 collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb, dim=dim, - is_index=False)[0:4] + is_index=False, + enable_dynamic_field= + enable_dynamic_field)[0:4] # filter result with expression in collection _vectors = _vectors[0] expression = expression.replace("&&", "and").replace("||", "or") filter_ids = [] for i, _id in enumerate(insert_ids): - int64 = _vectors.int64[i] - float = _vectors.float[i] + if enable_dynamic_field: + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + int64 = _vectors.int64[i] + float = _vectors.float[i] if not expression or eval(expression): filter_ids.append(_id) @@ -6426,7 +6603,7 @@ class TestCollectionRangeSearch(TestcaseBase): assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) - def test_range_search_with_output_field(self, auto_id, _async): + def test_range_search_with_output_field(self, auto_id, _async, enable_dynamic_field): """ target: test range search with output fields method: range search with one output_field @@ -6434,7 +6611,9 @@ class TestCollectionRangeSearch(TestcaseBase): """ # 1. initialize with data collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - auto_id=auto_id)[0:4] + auto_id=auto_id, + enable_dynamic_field= + enable_dynamic_field)[0:4] # 2. search log.info("test_range_search_with_output_field: Searching collection %s" % collection_w.name) range_search_params = {"metric_type": "L2", "params": {"nprobe": 10, "radius": 1000, @@ -7894,3 +8073,99 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w.search(vectors[:1], field_name, default_search_params, 200, check_task=CheckTasks.check_search_results, check_items={"nq": 1, "limit": 100}) + + +class TestCollectionSearchJSON(TestcaseBase): + """ Test case of search interface """ + + @pytest.fixture(scope="function", + params=[default_nb, default_nb_medium]) + def nb(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[2, 500]) + def nq(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[32, 128]) + def dim(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[False, True]) + def _async(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["JACCARD", "HAMMING", "TANIMOTO"]) + def metrics(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[False, True]) + def is_flush(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + + """ + ****************************************************************** + # The followings are invalid base cases + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L1) + def test_search_json_expression_object(self): + """ + target: test search with comparisons jsonField directly + method: search with expressions using jsonField name directly + expected: Raise error + """ + # 1. initialize with data + nq = 1 + dim = 128 + collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, dim=dim)[0:5] + # 2. search before insert time_stamp + log.info("test_search_json_expression_object: searching collection %s" % collection_w.name) + vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] + # 3. search after insert time_stamp + json_search_exp = "json_field > 0" + collection_w.search(vectors[:nq], default_search_field, + default_search_params, default_limit, + json_search_exp, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, + ct.err_msg: "can not comparisons jsonField directly"}) + + """ + ****************************************************************** + # The followings are valid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + def test_search_json_expression_default(self, nq, dim, auto_id, is_flush, enable_dynamic_field): + """ + target: test search case with default json expression + method: create connection, collection, insert and search + expected: 1. search returned with 0 before travel timestamp + 2. search successfully with limit(topK) after travel timestamp + """ + # 1. initialize with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + enable_dynamic_field=enable_dynamic_field)[0:5] + # 2. search before insert time_stamp + log.info("test_search_json_expression_default: searching collection %s" % collection_w.name) + vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] + # 3. search after insert time_stamp + collection_w.search(vectors[:nq], default_search_field, + default_search_params, default_limit, + default_json_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": nq, + "ids": insert_ids, + "limit": default_limit}) +