mirror of https://github.com/milvus-io/milvus.git
Add test cases of delete by complex expr - part two (#27316)
Signed-off-by: nico <cheng.yuan@zilliz.com>pull/27337/head
parent
9433a24f5d
commit
9d77c1dcda
|
@ -333,7 +333,7 @@ def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
|
|||
dict = {ct.default_int64_field_name: i,
|
||||
ct.default_float_field_name: i*1.0,
|
||||
ct.default_string_field_name: str(i),
|
||||
ct.default_json_field_name: {"number": i},
|
||||
ct.default_json_field_name: {"number": i, "float": i*1.0},
|
||||
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
|
||||
}
|
||||
if with_json is False:
|
||||
|
@ -968,39 +968,61 @@ def gen_normal_expressions():
|
|||
return expressions
|
||||
|
||||
|
||||
def gen_field_compare_expressions():
|
||||
def gen_json_field_expressions():
|
||||
expressions = [
|
||||
"int64_1 | int64_2 == 1",
|
||||
"int64_1 && int64_2 ==1",
|
||||
"int64_1 + int64_2 == 10",
|
||||
"int64_1 - int64_2 == 2",
|
||||
"int64_1 * int64_2 == 8",
|
||||
"int64_1 / int64_2 == 2",
|
||||
"int64_1 ** int64_2 == 4",
|
||||
"int64_1 % int64_2 == 0",
|
||||
"int64_1 in int64_2",
|
||||
"int64_1 + int64_2 >= 10"
|
||||
"json_field['number'] > 0",
|
||||
"0 <= json_field['number'] < 400 or 1000 > json_field['number'] >= 500",
|
||||
"json_field['number'] not in [1, 2, 3]",
|
||||
"json_field['number'] in [1, 2, 3] and json_field['float'] != 2",
|
||||
"json_field['number'] == 0 || json_field['float'] == 10**2 || json_field['number'] + 1 == 3",
|
||||
"json_field['number'] < 400 and json_field['number'] >= 100 and json_field['number'] % 100 == 0",
|
||||
"json_field['float'] > 400 && json_field['float'] < 200",
|
||||
"json_field['number'] in [300/2, -10*30+800, (100+200)*2] or json_field['float'] in [+3**6, 2**10/2]",
|
||||
"json_field['float'] <= -4**5/2 && json_field['float'] > 500-1 && json_field['float'] != 500/2+260"
|
||||
]
|
||||
return expressions
|
||||
|
||||
|
||||
def gen_normal_string_expressions(field):
|
||||
expressions = [
|
||||
f"\"0\"< {field} < \"3\"",
|
||||
f"{field} >= \"0\"",
|
||||
f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")",
|
||||
f"\"0\" <= {field} <= \"100\"",
|
||||
f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"",
|
||||
f"{field} != \"0\"",
|
||||
f"{field} not in [\"0\", \"1\", \"2\"]",
|
||||
f"{field} in [\"0\", \"1\", \"2\"]"
|
||||
]
|
||||
def gen_field_compare_expressions(fields1=None, fields2=None):
|
||||
if fields1 is None:
|
||||
fields1 = ["int64_1"]
|
||||
fields2 = ["int64_2"]
|
||||
expressions = []
|
||||
for field1, field2 in zip(fields1, fields2):
|
||||
expression = [
|
||||
f"{field1} | {field2} == 1",
|
||||
f"{field1} + {field2} <= 10 || {field1} - {field2} == 2",
|
||||
f"{field1} * {field2} >= 8 && {field1} / {field2} < 2",
|
||||
f"{field1} ** {field2} != 4 and {field1} + {field2} > 5",
|
||||
f"{field1} not in {field2}",
|
||||
f"{field1} in {field2}",
|
||||
]
|
||||
expressions.extend(expression)
|
||||
return expressions
|
||||
|
||||
|
||||
def gen_normal_string_expressions(fields=None):
|
||||
if fields is None:
|
||||
fields = [ct.default_string_field_name]
|
||||
expressions = []
|
||||
for field in fields:
|
||||
expression = [
|
||||
f"\"0\"< {field} < \"3\"",
|
||||
f"{field} >= \"0\"",
|
||||
f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")",
|
||||
f"\"0\" <= {field} <= \"100\"",
|
||||
f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"",
|
||||
f"{field} != \"0\"",
|
||||
f"{field} not in [\"0\", \"1\", \"2\"]",
|
||||
f"{field} in [\"0\", \"1\", \"2\"]"
|
||||
]
|
||||
expressions.extend(expression)
|
||||
return expressions
|
||||
|
||||
|
||||
def gen_invalid_string_expressions():
|
||||
expressions = [
|
||||
"varchar in [0, \"1\"]",
|
||||
"varchar in [0, \"1\"]",
|
||||
"varchar not in [\"0\", 1, 2]"
|
||||
]
|
||||
return expressions
|
||||
|
@ -1192,6 +1214,29 @@ def index_to_dict(index):
|
|||
}
|
||||
|
||||
|
||||
def assert_json_contains(expr, list_data):
|
||||
result_ids = []
|
||||
expr_prefix = expr.split('(', 1)[0]
|
||||
exp_ids = eval(expr.split(', ', 1)[1].split(')', 1)[0])
|
||||
if expr_prefix in ["json_contains", "JSON_CONTAINS"]:
|
||||
for i in range(len(list_data)):
|
||||
if exp_ids in list_data[i]:
|
||||
result_ids.append(i)
|
||||
elif expr_prefix in ["json_contains_all", "JSON_CONTAINS_ALL"]:
|
||||
for i in range(len(list_data)):
|
||||
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
|
||||
if set(exp_ids).issubset(set_list_data):
|
||||
result_ids.append(i)
|
||||
elif expr_prefix in ["json_contains_any", "JSON_CONTAINS_ANY"]:
|
||||
for i in range(len(list_data)):
|
||||
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
|
||||
if set(exp_ids) & set_list_data:
|
||||
result_ids.append(i)
|
||||
else:
|
||||
log.warning("unknown expr: %s" % expr)
|
||||
return result_ids
|
||||
|
||||
|
||||
def assert_equal_index(index_1, index_2):
|
||||
return index_to_dict(index_1) == index_to_dict(index_2)
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import random
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from base.client_base import TestcaseBase
|
||||
|
@ -1856,7 +1857,7 @@ class TestDeleteComplexExpr(TestcaseBase):
|
|||
Test case of delete interface with complex expr
|
||||
"""
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("expression", cf.gen_normal_expressions()[1:])
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_delete_normal_expressions(self, expression, enable_dynamic_field):
|
||||
|
@ -1934,7 +1935,7 @@ class TestDeleteComplexExpr(TestcaseBase):
|
|||
check_task=CheckTasks.check_query_results,
|
||||
check_items={'count(*)': nb - len(deleted_str)})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_delete_expr_empty_string(self):
|
||||
"""
|
||||
target: test delete with expr empty
|
||||
|
@ -1948,7 +1949,7 @@ class TestDeleteComplexExpr(TestcaseBase):
|
|||
error = {ct.err_code: 1, ct.err_msg: "expr cannot be empty"}
|
||||
collection_w.delete(expr="", check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_delete_complex_expr_before_load(self):
|
||||
"""
|
||||
target: test delete before load
|
||||
|
@ -1961,3 +1962,249 @@ class TestDeleteComplexExpr(TestcaseBase):
|
|||
# delete
|
||||
error = {ct.err_code: 1, ct.err_msg: "collection not loaded: unrecoverable error"}
|
||||
collection_w.delete(expr="int64 >= 0", check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr_prefix", ["json_contains", "JSON_CONTAINS"])
|
||||
@pytest.mark.parametrize("field_name", ["json_field['list']", "list"])
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_delete_expr_json_contains_base(self, expr_prefix, field_name, enable_dynamic_field):
|
||||
"""
|
||||
target: test delete expr using json_contains
|
||||
method: delete using expression using json_contains
|
||||
expected: delete successfully
|
||||
"""
|
||||
if field_name == "list" and enable_dynamic_field is False:
|
||||
pytest.skip("only support when enable_dynamic_filed == True")
|
||||
# init collection with nb default data
|
||||
collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# insert
|
||||
listMix = [[i, i + 2] for i in range(ct.default_nb)] # only int
|
||||
if enable_dynamic_field:
|
||||
data = cf.gen_default_rows_data()
|
||||
for i in range(ct.default_nb):
|
||||
data[i][ct.default_json_field_name] = {"list": listMix[i]}
|
||||
data[i]['list'] = listMix[i]
|
||||
else:
|
||||
data = cf.gen_default_dataframe_data()
|
||||
data[ct.default_json_field_name] = [{"list": listMix[i]} for i in range(ct.default_nb)]
|
||||
collection_w.insert(data)
|
||||
collection_w.load()
|
||||
|
||||
# delete with expressions
|
||||
delete_ids = random.randint(2, ct.default_nb - 2)
|
||||
expression = f"{expr_prefix}({field_name}, {delete_ids})"
|
||||
res = collection_w.delete(expression)[0]
|
||||
exp_ids = cf.assert_json_contains(expression, listMix)
|
||||
assert res.delete_count == len(exp_ids)
|
||||
|
||||
# query to check
|
||||
collection_w.query(expression, check_task=CheckTasks.check_query_empty)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr_prefix", ["json_contains_all", "JSON_CONTAINS_ALL",
|
||||
"json_contains_any", "JSON_CONTAINS_ANY"])
|
||||
@pytest.mark.parametrize("field_name", ["json_field['list']", "list"])
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_delete_expr_json_contains_all(self, expr_prefix, field_name, enable_dynamic_field):
|
||||
"""
|
||||
target: test delete expr using json_contains
|
||||
method: delete using expression using json_contains
|
||||
expected: delete successfully
|
||||
"""
|
||||
if field_name == "list" and enable_dynamic_field is False:
|
||||
pytest.skip("only support when enable_dynamic_filed == True")
|
||||
# init collection with nb default data
|
||||
collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# insert
|
||||
listMix = [[i, i * 0.00001, bool(i % 2), [i, str(i)]] for i in range(ct.default_nb)] # mix int, float, list, bool
|
||||
if enable_dynamic_field:
|
||||
data = cf.gen_default_rows_data()
|
||||
for i in range(ct.default_nb):
|
||||
data[i][ct.default_json_field_name] = {"list": listMix[i]}
|
||||
data[i]['list'] = listMix[i]
|
||||
else:
|
||||
data = cf.gen_default_dataframe_data()
|
||||
data[ct.default_json_field_name] = [{"list": listMix[i]} for i in range(ct.default_nb)]
|
||||
collection_w.insert(data)
|
||||
collection_w.load()
|
||||
|
||||
# delete with expressions
|
||||
ids = random.randint(0, ct.default_nb)
|
||||
delete_ids = [bool(ids % 2), ids]
|
||||
expression = f"{expr_prefix}({field_name}, {delete_ids})"
|
||||
res = collection_w.delete(expression)[0]
|
||||
exp_ids = cf.assert_json_contains(expression, listMix)
|
||||
assert res.delete_count == len(exp_ids)
|
||||
|
||||
# query to check
|
||||
collection_w.query(expression, check_task=CheckTasks.check_query_empty)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expressions",
|
||||
cf.gen_field_compare_expressions(["int64_1", "json_field['int'][0]"],
|
||||
["int64_2", "json_field['int'][1]"]))
|
||||
def test_delete_expr_compare_two_variables(self, expressions):
|
||||
"""
|
||||
target: test delete expr using 2 variables
|
||||
method: delete with expressions using compare 2 variables
|
||||
expected: delete successfully
|
||||
"""
|
||||
# init collection with nb default data
|
||||
nb = 1000
|
||||
dim = 32
|
||||
fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
|
||||
cf.gen_json_field("json_field"), cf.gen_float_vec_field("float_vector", dim=dim)]
|
||||
schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")
|
||||
collection_w = self.init_collection_wrap(schema=schema)
|
||||
|
||||
# insert
|
||||
int64_1_values = [i for i in range(nb)]
|
||||
int64_2_values = [random.randint(0, nb) for _ in range(nb)]
|
||||
vectors = cf.gen_vectors(nb, dim)
|
||||
json_values = [[i, int64_2_values[i]] for i in range(nb)]
|
||||
data = pd.DataFrame({
|
||||
"int64_1": int64_1_values,
|
||||
"int64_2": int64_2_values,
|
||||
"json_field": [{"int": json_values[i]} for i in range(nb)],
|
||||
"float_vector": vectors
|
||||
})
|
||||
collection_w.insert(data)
|
||||
collection_w.create_index("float_vector")
|
||||
collection_w.load()
|
||||
|
||||
# delete with expressions
|
||||
error = {ct.err_code: 1, ct.err_msg: f"failed to create expr plan, expr = {expressions}"}
|
||||
collection_w.delete(expressions, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expression", cf.gen_json_field_expressions())
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_delete_expr_json_field(self, expression, enable_dynamic_field):
|
||||
"""
|
||||
target: test delete entities using normal expression
|
||||
method: delete using normal expression
|
||||
expected: delete successfully
|
||||
"""
|
||||
# init collection with nb default data
|
||||
collection_w, _vectors, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
|
||||
# filter result with expression in collection
|
||||
_vectors = _vectors[0]
|
||||
expression = expression.replace("&&", "and").replace("||", "or")
|
||||
filter_ids = []
|
||||
json_field = {}
|
||||
for i, _id in enumerate(insert_ids):
|
||||
if enable_dynamic_field:
|
||||
json_field['number'] = _vectors[i][ct.default_json_field_name]['number']
|
||||
json_field['float'] = _vectors[i][ct.default_json_field_name]['float']
|
||||
else:
|
||||
json_field['number'] = _vectors[ct.default_json_field_name][i]['number']
|
||||
json_field['float'] = _vectors[ct.default_json_field_name][i]['float']
|
||||
if not expression or eval(expression):
|
||||
filter_ids.append(_id)
|
||||
|
||||
# delete with expressions
|
||||
res = collection_w.delete(expression)[0]
|
||||
assert res.delete_count == len(filter_ids)
|
||||
|
||||
# query to check
|
||||
collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("normal_expression, json_expression", zip(cf.gen_normal_expressions()[1:4],
|
||||
cf.gen_json_field_expressions()[6:9]))
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_delete_expr_complex_mixed(self, normal_expression, json_expression, enable_dynamic_field):
|
||||
"""
|
||||
target: test delete entities using normal expression
|
||||
method: delete using normal expression
|
||||
expected: delete successfully
|
||||
"""
|
||||
# init collection with nb default data
|
||||
collection_w, _vectors, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
|
||||
# filter result with expression in collection
|
||||
expression = normal_expression + ' and ' + json_expression
|
||||
_vectors = _vectors[0]
|
||||
expression = expression.replace("&&", "and").replace("||", "or")
|
||||
filter_ids = []
|
||||
json_field = {}
|
||||
for i, _id in enumerate(insert_ids):
|
||||
if enable_dynamic_field:
|
||||
json_field['number'] = _vectors[i][ct.default_json_field_name]['number']
|
||||
json_field['float'] = _vectors[i][ct.default_json_field_name]['float']
|
||||
int64 = _vectors[i][ct.default_int64_field_name]
|
||||
float = _vectors[i][ct.default_float_field_name]
|
||||
else:
|
||||
json_field['number'] = _vectors[ct.default_json_field_name][i]['number']
|
||||
json_field['float'] = _vectors[ct.default_json_field_name][i]['float']
|
||||
int64 = _vectors.int64[i]
|
||||
float = _vectors.float[i]
|
||||
if not expression or eval(expression):
|
||||
filter_ids.append(_id)
|
||||
|
||||
# delete with expressions
|
||||
res = collection_w.delete(expression)[0]
|
||||
assert res.delete_count == len(filter_ids)
|
||||
|
||||
# query to check
|
||||
collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(["varchar", "json_field['string']", "NewStr"]))
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_delete_string_expressions_normal(self, expression, enable_dynamic_field):
|
||||
"""
|
||||
target: test delete expr like
|
||||
method: delete using expression like
|
||||
expected: delete successfully
|
||||
"""
|
||||
if "NewStr" in expression and enable_dynamic_field is False:
|
||||
pytest.skip("only support when enable_dynamic_filed == True")
|
||||
# init collection with nb default data
|
||||
nb = 1000
|
||||
collection_w, _vectors, _, insert_ids = \
|
||||
self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
|
||||
# insert
|
||||
if enable_dynamic_field:
|
||||
data = cf.gen_default_rows_data(nb)
|
||||
for i in range(nb):
|
||||
data[i][ct.default_json_field_name] = {"string": str(i)}
|
||||
data[i]['NewStr'] = str(i)
|
||||
else:
|
||||
data = cf.gen_default_dataframe_data(nb)
|
||||
data[ct.default_json_field_name] = [{"string": str(i)} for i in range(nb)]
|
||||
collection_w.insert(data)
|
||||
collection_w.load()
|
||||
|
||||
# calculate the result
|
||||
_vectors = data
|
||||
expression = expression.replace("&&", "and").replace("||", "or")
|
||||
filter_ids = []
|
||||
json_field = {}
|
||||
for i in range(nb):
|
||||
if enable_dynamic_field:
|
||||
json_field['string'] = _vectors[i][ct.default_json_field_name]['string']
|
||||
varchar = _vectors[i][ct.default_string_field_name]
|
||||
NewStr = _vectors[i]['NewStr']
|
||||
else:
|
||||
json_field['string'] = _vectors[ct.default_json_field_name][i]['string']
|
||||
varchar = _vectors.varchar[i]
|
||||
if not expression or eval(expression):
|
||||
filter_ids.append(i)
|
||||
|
||||
# delete with expressions
|
||||
res = collection_w.delete(expression)[0]
|
||||
assert res.delete_count == len(filter_ids)
|
||||
|
||||
# query to check
|
||||
collection_w.load()
|
||||
collection_w.query("int64 >= 0", output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={'count(*)': nb - len(filter_ids)})
|
||||
|
||||
|
|
|
@ -2304,7 +2304,7 @@ class TestQueryString(TestcaseBase):
|
|||
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(default_string_field_name))
|
||||
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions([default_string_field_name]))
|
||||
def test_query_string_is_primary(self, expression):
|
||||
"""
|
||||
target: test query with output field only primary field
|
||||
|
|
|
@ -451,8 +451,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
|
|||
dim = 1
|
||||
fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
|
||||
cf.gen_float_vec_field(dim=dim)]
|
||||
schema = cf.gen_collection_schema(
|
||||
fields=fields, primary_field="int64_1")
|
||||
schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")
|
||||
collection_w = self.init_collection_wrap(schema=schema)
|
||||
|
||||
# 2. insert data
|
||||
|
@ -462,14 +461,11 @@ class TestCollectionSearchInvalid(TestcaseBase):
|
|||
collection_w.insert(dataframe)
|
||||
|
||||
# 3. search with expression
|
||||
log.info(
|
||||
"test_search_with_expression: searching with expression: %s" % expression)
|
||||
collection_w.create_index(
|
||||
ct.default_float_vec_field_name, index_params=ct.default_flat_index)
|
||||
log.info("test_search_with_expression: searching with expression: %s" % expression)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
|
||||
collection_w.load()
|
||||
expression = expression.replace("&&", "and").replace("||", "or")
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, nb, expression,
|
||||
check_task=CheckTasks.err_res,
|
||||
|
@ -4793,7 +4789,7 @@ class TestSearchString(TestcaseBase):
|
|||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(ct.default_string_field_name))
|
||||
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions([ct.default_string_field_name]))
|
||||
def test_search_with_different_string_expr(self, dim, expression, _async, enable_dynamic_field):
|
||||
"""
|
||||
target: test search with different string expressions
|
||||
|
|
Loading…
Reference in New Issue