diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index c6089c9d27..416fc2051a 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -333,7 +333,7 @@ def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js dict = {ct.default_int64_field_name: i, ct.default_float_field_name: i*1.0, ct.default_string_field_name: str(i), - ct.default_json_field_name: {"number": i}, + ct.default_json_field_name: {"number": i, "float": i*1.0}, ct.default_float_vec_field_name: gen_vectors(1, dim)[0] } if with_json is False: @@ -968,39 +968,61 @@ def gen_normal_expressions(): return expressions -def gen_field_compare_expressions(): +def gen_json_field_expressions(): expressions = [ - "int64_1 | int64_2 == 1", - "int64_1 && int64_2 ==1", - "int64_1 + int64_2 == 10", - "int64_1 - int64_2 == 2", - "int64_1 * int64_2 == 8", - "int64_1 / int64_2 == 2", - "int64_1 ** int64_2 == 4", - "int64_1 % int64_2 == 0", - "int64_1 in int64_2", - "int64_1 + int64_2 >= 10" + "json_field['number'] > 0", + "0 <= json_field['number'] < 400 or 1000 > json_field['number'] >= 500", + "json_field['number'] not in [1, 2, 3]", + "json_field['number'] in [1, 2, 3] and json_field['float'] != 2", + "json_field['number'] == 0 || json_field['float'] == 10**2 || json_field['number'] + 1 == 3", + "json_field['number'] < 400 and json_field['number'] >= 100 and json_field['number'] % 100 == 0", + "json_field['float'] > 400 && json_field['float'] < 200", + "json_field['number'] in [300/2, -10*30+800, (100+200)*2] or json_field['float'] in [+3**6, 2**10/2]", + "json_field['float'] <= -4**5/2 && json_field['float'] > 500-1 && json_field['float'] != 500/2+260" ] return expressions -def gen_normal_string_expressions(field): - expressions = [ - f"\"0\"< {field} < \"3\"", - f"{field} >= \"0\"", - f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")", - f"\"0\" <= {field} <= \"100\"", - f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"", - f"{field} != \"0\"", - f"{field} not in [\"0\", \"1\", \"2\"]", - f"{field} in [\"0\", \"1\", \"2\"]" - ] +def gen_field_compare_expressions(fields1=None, fields2=None): + if fields1 is None: + fields1 = ["int64_1"] + fields2 = ["int64_2"] + expressions = [] + for field1, field2 in zip(fields1, fields2): + expression = [ + f"{field1} | {field2} == 1", + f"{field1} + {field2} <= 10 || {field1} - {field2} == 2", + f"{field1} * {field2} >= 8 && {field1} / {field2} < 2", + f"{field1} ** {field2} != 4 and {field1} + {field2} > 5", + f"{field1} not in {field2}", + f"{field1} in {field2}", + ] + expressions.extend(expression) + return expressions + + +def gen_normal_string_expressions(fields=None): + if fields is None: + fields = [ct.default_string_field_name] + expressions = [] + for field in fields: + expression = [ + f"\"0\"< {field} < \"3\"", + f"{field} >= \"0\"", + f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")", + f"\"0\" <= {field} <= \"100\"", + f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"", + f"{field} != \"0\"", + f"{field} not in [\"0\", \"1\", \"2\"]", + f"{field} in [\"0\", \"1\", \"2\"]" + ] + expressions.extend(expression) return expressions def gen_invalid_string_expressions(): expressions = [ - "varchar in [0, \"1\"]", + "varchar in [0, \"1\"]", "varchar not in [\"0\", 1, 2]" ] return expressions @@ -1192,6 +1214,29 @@ def index_to_dict(index): } +def assert_json_contains(expr, list_data): + result_ids = [] + expr_prefix = expr.split('(', 1)[0] + exp_ids = eval(expr.split(', ', 1)[1].split(')', 1)[0]) + if expr_prefix in ["json_contains", "JSON_CONTAINS"]: + for i in range(len(list_data)): + if exp_ids in list_data[i]: + result_ids.append(i) + elif expr_prefix in ["json_contains_all", "JSON_CONTAINS_ALL"]: + for i in range(len(list_data)): + set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i]) + if set(exp_ids).issubset(set_list_data): + result_ids.append(i) + elif expr_prefix in ["json_contains_any", "JSON_CONTAINS_ANY"]: + for i in range(len(list_data)): + set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i]) + if set(exp_ids) & set_list_data: + result_ids.append(i) + else: + log.warning("unknown expr: %s" % expr) + return result_ids + + def assert_equal_index(index_1, index_2): return index_to_dict(index_1) == index_to_dict(index_2) diff --git a/tests/python_client/testcases/test_delete.py b/tests/python_client/testcases/test_delete.py index 18e2528c36..ac948f70f1 100644 --- a/tests/python_client/testcases/test_delete.py +++ b/tests/python_client/testcases/test_delete.py @@ -1,5 +1,6 @@ +import random import time - +import pandas as pd import pytest from base.client_base import TestcaseBase @@ -1856,7 +1857,7 @@ class TestDeleteComplexExpr(TestcaseBase): Test case of delete interface with complex expr """ - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("expression", cf.gen_normal_expressions()[1:]) @pytest.mark.parametrize("enable_dynamic_field", [True, False]) def test_delete_normal_expressions(self, expression, enable_dynamic_field): @@ -1934,7 +1935,7 @@ class TestDeleteComplexExpr(TestcaseBase): check_task=CheckTasks.check_query_results, check_items={'count(*)': nb - len(deleted_str)}) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_delete_expr_empty_string(self): """ target: test delete with expr empty @@ -1948,7 +1949,7 @@ class TestDeleteComplexExpr(TestcaseBase): error = {ct.err_code: 1, ct.err_msg: "expr cannot be empty"} collection_w.delete(expr="", check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_delete_complex_expr_before_load(self): """ target: test delete before load @@ -1961,3 +1962,249 @@ class TestDeleteComplexExpr(TestcaseBase): # delete error = {ct.err_code: 1, ct.err_msg: "collection not loaded: unrecoverable error"} collection_w.delete(expr="int64 >= 0", check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr_prefix", ["json_contains", "JSON_CONTAINS"]) + @pytest.mark.parametrize("field_name", ["json_field['list']", "list"]) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_expr_json_contains_base(self, expr_prefix, field_name, enable_dynamic_field): + """ + target: test delete expr using json_contains + method: delete using expression using json_contains + expected: delete successfully + """ + if field_name == "list" and enable_dynamic_field is False: + pytest.skip("only support when enable_dynamic_filed == True") + # init collection with nb default data + collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0] + + # insert + listMix = [[i, i + 2] for i in range(ct.default_nb)] # only int + if enable_dynamic_field: + data = cf.gen_default_rows_data() + for i in range(ct.default_nb): + data[i][ct.default_json_field_name] = {"list": listMix[i]} + data[i]['list'] = listMix[i] + else: + data = cf.gen_default_dataframe_data() + data[ct.default_json_field_name] = [{"list": listMix[i]} for i in range(ct.default_nb)] + collection_w.insert(data) + collection_w.load() + + # delete with expressions + delete_ids = random.randint(2, ct.default_nb - 2) + expression = f"{expr_prefix}({field_name}, {delete_ids})" + res = collection_w.delete(expression)[0] + exp_ids = cf.assert_json_contains(expression, listMix) + assert res.delete_count == len(exp_ids) + + # query to check + collection_w.query(expression, check_task=CheckTasks.check_query_empty) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("expr_prefix", ["json_contains_all", "JSON_CONTAINS_ALL", + "json_contains_any", "JSON_CONTAINS_ANY"]) + @pytest.mark.parametrize("field_name", ["json_field['list']", "list"]) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_expr_json_contains_all(self, expr_prefix, field_name, enable_dynamic_field): + """ + target: test delete expr using json_contains + method: delete using expression using json_contains + expected: delete successfully + """ + if field_name == "list" and enable_dynamic_field is False: + pytest.skip("only support when enable_dynamic_filed == True") + # init collection with nb default data + collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0] + + # insert + listMix = [[i, i * 0.00001, bool(i % 2), [i, str(i)]] for i in range(ct.default_nb)] # mix int, float, list, bool + if enable_dynamic_field: + data = cf.gen_default_rows_data() + for i in range(ct.default_nb): + data[i][ct.default_json_field_name] = {"list": listMix[i]} + data[i]['list'] = listMix[i] + else: + data = cf.gen_default_dataframe_data() + data[ct.default_json_field_name] = [{"list": listMix[i]} for i in range(ct.default_nb)] + collection_w.insert(data) + collection_w.load() + + # delete with expressions + ids = random.randint(0, ct.default_nb) + delete_ids = [bool(ids % 2), ids] + expression = f"{expr_prefix}({field_name}, {delete_ids})" + res = collection_w.delete(expression)[0] + exp_ids = cf.assert_json_contains(expression, listMix) + assert res.delete_count == len(exp_ids) + + # query to check + collection_w.query(expression, check_task=CheckTasks.check_query_empty) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("expressions", + cf.gen_field_compare_expressions(["int64_1", "json_field['int'][0]"], + ["int64_2", "json_field['int'][1]"])) + def test_delete_expr_compare_two_variables(self, expressions): + """ + target: test delete expr using 2 variables + method: delete with expressions using compare 2 variables + expected: delete successfully + """ + # init collection with nb default data + nb = 1000 + dim = 32 + fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"), + cf.gen_json_field("json_field"), cf.gen_float_vec_field("float_vector", dim=dim)] + schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1") + collection_w = self.init_collection_wrap(schema=schema) + + # insert + int64_1_values = [i for i in range(nb)] + int64_2_values = [random.randint(0, nb) for _ in range(nb)] + vectors = cf.gen_vectors(nb, dim) + json_values = [[i, int64_2_values[i]] for i in range(nb)] + data = pd.DataFrame({ + "int64_1": int64_1_values, + "int64_2": int64_2_values, + "json_field": [{"int": json_values[i]} for i in range(nb)], + "float_vector": vectors + }) + collection_w.insert(data) + collection_w.create_index("float_vector") + collection_w.load() + + # delete with expressions + error = {ct.err_code: 1, ct.err_msg: f"failed to create expr plan, expr = {expressions}"} + collection_w.delete(expressions, check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("expression", cf.gen_json_field_expressions()) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_expr_json_field(self, expression, enable_dynamic_field): + """ + target: test delete entities using normal expression + method: delete using normal expression + expected: delete successfully + """ + # init collection with nb default data + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4] + + # filter result with expression in collection + _vectors = _vectors[0] + expression = expression.replace("&&", "and").replace("||", "or") + filter_ids = [] + json_field = {} + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + json_field['number'] = _vectors[i][ct.default_json_field_name]['number'] + json_field['float'] = _vectors[i][ct.default_json_field_name]['float'] + else: + json_field['number'] = _vectors[ct.default_json_field_name][i]['number'] + json_field['float'] = _vectors[ct.default_json_field_name][i]['float'] + if not expression or eval(expression): + filter_ids.append(_id) + + # delete with expressions + res = collection_w.delete(expression)[0] + assert res.delete_count == len(filter_ids) + + # query to check + collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("normal_expression, json_expression", zip(cf.gen_normal_expressions()[1:4], + cf.gen_json_field_expressions()[6:9])) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_expr_complex_mixed(self, normal_expression, json_expression, enable_dynamic_field): + """ + target: test delete entities using normal expression + method: delete using normal expression + expected: delete successfully + """ + # init collection with nb default data + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4] + + # filter result with expression in collection + expression = normal_expression + ' and ' + json_expression + _vectors = _vectors[0] + expression = expression.replace("&&", "and").replace("||", "or") + filter_ids = [] + json_field = {} + for i, _id in enumerate(insert_ids): + if enable_dynamic_field: + json_field['number'] = _vectors[i][ct.default_json_field_name]['number'] + json_field['float'] = _vectors[i][ct.default_json_field_name]['float'] + int64 = _vectors[i][ct.default_int64_field_name] + float = _vectors[i][ct.default_float_field_name] + else: + json_field['number'] = _vectors[ct.default_json_field_name][i]['number'] + json_field['float'] = _vectors[ct.default_json_field_name][i]['float'] + int64 = _vectors.int64[i] + float = _vectors.float[i] + if not expression or eval(expression): + filter_ids.append(_id) + + # delete with expressions + res = collection_w.delete(expression)[0] + assert res.delete_count == len(filter_ids) + + # query to check + collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(["varchar", "json_field['string']", "NewStr"])) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_delete_string_expressions_normal(self, expression, enable_dynamic_field): + """ + target: test delete expr like + method: delete using expression like + expected: delete successfully + """ + if "NewStr" in expression and enable_dynamic_field is False: + pytest.skip("only support when enable_dynamic_filed == True") + # init collection with nb default data + nb = 1000 + collection_w, _vectors, _, insert_ids = \ + self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0:4] + + # insert + if enable_dynamic_field: + data = cf.gen_default_rows_data(nb) + for i in range(nb): + data[i][ct.default_json_field_name] = {"string": str(i)} + data[i]['NewStr'] = str(i) + else: + data = cf.gen_default_dataframe_data(nb) + data[ct.default_json_field_name] = [{"string": str(i)} for i in range(nb)] + collection_w.insert(data) + collection_w.load() + + # calculate the result + _vectors = data + expression = expression.replace("&&", "and").replace("||", "or") + filter_ids = [] + json_field = {} + for i in range(nb): + if enable_dynamic_field: + json_field['string'] = _vectors[i][ct.default_json_field_name]['string'] + varchar = _vectors[i][ct.default_string_field_name] + NewStr = _vectors[i]['NewStr'] + else: + json_field['string'] = _vectors[ct.default_json_field_name][i]['string'] + varchar = _vectors.varchar[i] + if not expression or eval(expression): + filter_ids.append(i) + + # delete with expressions + res = collection_w.delete(expression)[0] + assert res.delete_count == len(filter_ids) + + # query to check + collection_w.load() + collection_w.query("int64 >= 0", output_fields=['count(*)'], + check_task=CheckTasks.check_query_results, + check_items={'count(*)': nb - len(filter_ids)}) + diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 05f3f71c11..1ae850667d 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -2304,7 +2304,7 @@ class TestQueryString(TestcaseBase): check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(default_string_field_name)) + @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions([default_string_field_name])) def test_query_string_is_primary(self, expression): """ target: test query with output field only primary field diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 85e36115da..7179b7e4e8 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -451,8 +451,7 @@ class TestCollectionSearchInvalid(TestcaseBase): dim = 1 fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"), cf.gen_float_vec_field(dim=dim)] - schema = cf.gen_collection_schema( - fields=fields, primary_field="int64_1") + schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1") collection_w = self.init_collection_wrap(schema=schema) # 2. insert data @@ -462,14 +461,11 @@ class TestCollectionSearchInvalid(TestcaseBase): collection_w.insert(dataframe) # 3. search with expression - log.info( - "test_search_with_expression: searching with expression: %s" % expression) - collection_w.create_index( - ct.default_float_vec_field_name, index_params=ct.default_flat_index) + log.info("test_search_with_expression: searching with expression: %s" % expression) + collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index) collection_w.load() expression = expression.replace("&&", "and").replace("||", "or") - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] + vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, nb, expression, check_task=CheckTasks.err_res, @@ -4793,7 +4789,7 @@ class TestSearchString(TestcaseBase): ) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(ct.default_string_field_name)) + @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions([ct.default_string_field_name])) def test_search_with_different_string_expr(self, dim, expression, _async, enable_dynamic_field): """ target: test search with different string expressions