Add test cases of delete by complex expr - part two (#27316)

Signed-off-by: nico <cheng.yuan@zilliz.com>
pull/27337/head
nico 2023-09-25 09:01:26 +08:00 committed by GitHub
parent 9433a24f5d
commit 9d77c1dcda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 326 additions and 38 deletions

View File

@ -333,7 +333,7 @@ def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
dict = {ct.default_int64_field_name: i,
ct.default_float_field_name: i*1.0,
ct.default_string_field_name: str(i),
ct.default_json_field_name: {"number": i},
ct.default_json_field_name: {"number": i, "float": i*1.0},
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
}
if with_json is False:
@ -968,39 +968,61 @@ def gen_normal_expressions():
return expressions
def gen_field_compare_expressions():
def gen_json_field_expressions():
expressions = [
"int64_1 | int64_2 == 1",
"int64_1 && int64_2 ==1",
"int64_1 + int64_2 == 10",
"int64_1 - int64_2 == 2",
"int64_1 * int64_2 == 8",
"int64_1 / int64_2 == 2",
"int64_1 ** int64_2 == 4",
"int64_1 % int64_2 == 0",
"int64_1 in int64_2",
"int64_1 + int64_2 >= 10"
"json_field['number'] > 0",
"0 <= json_field['number'] < 400 or 1000 > json_field['number'] >= 500",
"json_field['number'] not in [1, 2, 3]",
"json_field['number'] in [1, 2, 3] and json_field['float'] != 2",
"json_field['number'] == 0 || json_field['float'] == 10**2 || json_field['number'] + 1 == 3",
"json_field['number'] < 400 and json_field['number'] >= 100 and json_field['number'] % 100 == 0",
"json_field['float'] > 400 && json_field['float'] < 200",
"json_field['number'] in [300/2, -10*30+800, (100+200)*2] or json_field['float'] in [+3**6, 2**10/2]",
"json_field['float'] <= -4**5/2 && json_field['float'] > 500-1 && json_field['float'] != 500/2+260"
]
return expressions
def gen_normal_string_expressions(field):
expressions = [
f"\"0\"< {field} < \"3\"",
f"{field} >= \"0\"",
f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")",
f"\"0\" <= {field} <= \"100\"",
f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"",
f"{field} != \"0\"",
f"{field} not in [\"0\", \"1\", \"2\"]",
f"{field} in [\"0\", \"1\", \"2\"]"
]
def gen_field_compare_expressions(fields1=None, fields2=None):
if fields1 is None:
fields1 = ["int64_1"]
fields2 = ["int64_2"]
expressions = []
for field1, field2 in zip(fields1, fields2):
expression = [
f"{field1} | {field2} == 1",
f"{field1} + {field2} <= 10 || {field1} - {field2} == 2",
f"{field1} * {field2} >= 8 && {field1} / {field2} < 2",
f"{field1} ** {field2} != 4 and {field1} + {field2} > 5",
f"{field1} not in {field2}",
f"{field1} in {field2}",
]
expressions.extend(expression)
return expressions
def gen_normal_string_expressions(fields=None):
if fields is None:
fields = [ct.default_string_field_name]
expressions = []
for field in fields:
expression = [
f"\"0\"< {field} < \"3\"",
f"{field} >= \"0\"",
f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")",
f"\"0\" <= {field} <= \"100\"",
f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"",
f"{field} != \"0\"",
f"{field} not in [\"0\", \"1\", \"2\"]",
f"{field} in [\"0\", \"1\", \"2\"]"
]
expressions.extend(expression)
return expressions
def gen_invalid_string_expressions():
expressions = [
"varchar in [0, \"1\"]",
"varchar in [0, \"1\"]",
"varchar not in [\"0\", 1, 2]"
]
return expressions
@ -1192,6 +1214,29 @@ def index_to_dict(index):
}
def assert_json_contains(expr, list_data):
result_ids = []
expr_prefix = expr.split('(', 1)[0]
exp_ids = eval(expr.split(', ', 1)[1].split(')', 1)[0])
if expr_prefix in ["json_contains", "JSON_CONTAINS"]:
for i in range(len(list_data)):
if exp_ids in list_data[i]:
result_ids.append(i)
elif expr_prefix in ["json_contains_all", "JSON_CONTAINS_ALL"]:
for i in range(len(list_data)):
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
if set(exp_ids).issubset(set_list_data):
result_ids.append(i)
elif expr_prefix in ["json_contains_any", "JSON_CONTAINS_ANY"]:
for i in range(len(list_data)):
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
if set(exp_ids) & set_list_data:
result_ids.append(i)
else:
log.warning("unknown expr: %s" % expr)
return result_ids
def assert_equal_index(index_1, index_2):
return index_to_dict(index_1) == index_to_dict(index_2)

View File

@ -1,5 +1,6 @@
import random
import time
import pandas as pd
import pytest
from base.client_base import TestcaseBase
@ -1856,7 +1857,7 @@ class TestDeleteComplexExpr(TestcaseBase):
Test case of delete interface with complex expr
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("expression", cf.gen_normal_expressions()[1:])
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_normal_expressions(self, expression, enable_dynamic_field):
@ -1934,7 +1935,7 @@ class TestDeleteComplexExpr(TestcaseBase):
check_task=CheckTasks.check_query_results,
check_items={'count(*)': nb - len(deleted_str)})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
def test_delete_expr_empty_string(self):
"""
target: test delete with expr empty
@ -1948,7 +1949,7 @@ class TestDeleteComplexExpr(TestcaseBase):
error = {ct.err_code: 1, ct.err_msg: "expr cannot be empty"}
collection_w.delete(expr="", check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
def test_delete_complex_expr_before_load(self):
"""
target: test delete before load
@ -1961,3 +1962,249 @@ class TestDeleteComplexExpr(TestcaseBase):
# delete
error = {ct.err_code: 1, ct.err_msg: "collection not loaded: unrecoverable error"}
collection_w.delete(expr="int64 >= 0", check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expr_prefix", ["json_contains", "JSON_CONTAINS"])
@pytest.mark.parametrize("field_name", ["json_field['list']", "list"])
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_expr_json_contains_base(self, expr_prefix, field_name, enable_dynamic_field):
"""
target: test delete expr using json_contains
method: delete using expression using json_contains
expected: delete successfully
"""
if field_name == "list" and enable_dynamic_field is False:
pytest.skip("only support when enable_dynamic_filed == True")
# init collection with nb default data
collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0]
# insert
listMix = [[i, i + 2] for i in range(ct.default_nb)] # only int
if enable_dynamic_field:
data = cf.gen_default_rows_data()
for i in range(ct.default_nb):
data[i][ct.default_json_field_name] = {"list": listMix[i]}
data[i]['list'] = listMix[i]
else:
data = cf.gen_default_dataframe_data()
data[ct.default_json_field_name] = [{"list": listMix[i]} for i in range(ct.default_nb)]
collection_w.insert(data)
collection_w.load()
# delete with expressions
delete_ids = random.randint(2, ct.default_nb - 2)
expression = f"{expr_prefix}({field_name}, {delete_ids})"
res = collection_w.delete(expression)[0]
exp_ids = cf.assert_json_contains(expression, listMix)
assert res.delete_count == len(exp_ids)
# query to check
collection_w.query(expression, check_task=CheckTasks.check_query_empty)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr_prefix", ["json_contains_all", "JSON_CONTAINS_ALL",
"json_contains_any", "JSON_CONTAINS_ANY"])
@pytest.mark.parametrize("field_name", ["json_field['list']", "list"])
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_expr_json_contains_all(self, expr_prefix, field_name, enable_dynamic_field):
"""
target: test delete expr using json_contains
method: delete using expression using json_contains
expected: delete successfully
"""
if field_name == "list" and enable_dynamic_field is False:
pytest.skip("only support when enable_dynamic_filed == True")
# init collection with nb default data
collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0]
# insert
listMix = [[i, i * 0.00001, bool(i % 2), [i, str(i)]] for i in range(ct.default_nb)] # mix int, float, list, bool
if enable_dynamic_field:
data = cf.gen_default_rows_data()
for i in range(ct.default_nb):
data[i][ct.default_json_field_name] = {"list": listMix[i]}
data[i]['list'] = listMix[i]
else:
data = cf.gen_default_dataframe_data()
data[ct.default_json_field_name] = [{"list": listMix[i]} for i in range(ct.default_nb)]
collection_w.insert(data)
collection_w.load()
# delete with expressions
ids = random.randint(0, ct.default_nb)
delete_ids = [bool(ids % 2), ids]
expression = f"{expr_prefix}({field_name}, {delete_ids})"
res = collection_w.delete(expression)[0]
exp_ids = cf.assert_json_contains(expression, listMix)
assert res.delete_count == len(exp_ids)
# query to check
collection_w.query(expression, check_task=CheckTasks.check_query_empty)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expressions",
cf.gen_field_compare_expressions(["int64_1", "json_field['int'][0]"],
["int64_2", "json_field['int'][1]"]))
def test_delete_expr_compare_two_variables(self, expressions):
"""
target: test delete expr using 2 variables
method: delete with expressions using compare 2 variables
expected: delete successfully
"""
# init collection with nb default data
nb = 1000
dim = 32
fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
cf.gen_json_field("json_field"), cf.gen_float_vec_field("float_vector", dim=dim)]
schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")
collection_w = self.init_collection_wrap(schema=schema)
# insert
int64_1_values = [i for i in range(nb)]
int64_2_values = [random.randint(0, nb) for _ in range(nb)]
vectors = cf.gen_vectors(nb, dim)
json_values = [[i, int64_2_values[i]] for i in range(nb)]
data = pd.DataFrame({
"int64_1": int64_1_values,
"int64_2": int64_2_values,
"json_field": [{"int": json_values[i]} for i in range(nb)],
"float_vector": vectors
})
collection_w.insert(data)
collection_w.create_index("float_vector")
collection_w.load()
# delete with expressions
error = {ct.err_code: 1, ct.err_msg: f"failed to create expr plan, expr = {expressions}"}
collection_w.delete(expressions, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expression", cf.gen_json_field_expressions())
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_expr_json_field(self, expression, enable_dynamic_field):
"""
target: test delete entities using normal expression
method: delete using normal expression
expected: delete successfully
"""
# init collection with nb default data
collection_w, _vectors, _, insert_ids = \
self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4]
# filter result with expression in collection
_vectors = _vectors[0]
expression = expression.replace("&&", "and").replace("||", "or")
filter_ids = []
json_field = {}
for i, _id in enumerate(insert_ids):
if enable_dynamic_field:
json_field['number'] = _vectors[i][ct.default_json_field_name]['number']
json_field['float'] = _vectors[i][ct.default_json_field_name]['float']
else:
json_field['number'] = _vectors[ct.default_json_field_name][i]['number']
json_field['float'] = _vectors[ct.default_json_field_name][i]['float']
if not expression or eval(expression):
filter_ids.append(_id)
# delete with expressions
res = collection_w.delete(expression)[0]
assert res.delete_count == len(filter_ids)
# query to check
collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("normal_expression, json_expression", zip(cf.gen_normal_expressions()[1:4],
cf.gen_json_field_expressions()[6:9]))
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_expr_complex_mixed(self, normal_expression, json_expression, enable_dynamic_field):
"""
target: test delete entities using normal expression
method: delete using normal expression
expected: delete successfully
"""
# init collection with nb default data
collection_w, _vectors, _, insert_ids = \
self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[0:4]
# filter result with expression in collection
expression = normal_expression + ' and ' + json_expression
_vectors = _vectors[0]
expression = expression.replace("&&", "and").replace("||", "or")
filter_ids = []
json_field = {}
for i, _id in enumerate(insert_ids):
if enable_dynamic_field:
json_field['number'] = _vectors[i][ct.default_json_field_name]['number']
json_field['float'] = _vectors[i][ct.default_json_field_name]['float']
int64 = _vectors[i][ct.default_int64_field_name]
float = _vectors[i][ct.default_float_field_name]
else:
json_field['number'] = _vectors[ct.default_json_field_name][i]['number']
json_field['float'] = _vectors[ct.default_json_field_name][i]['float']
int64 = _vectors.int64[i]
float = _vectors.float[i]
if not expression or eval(expression):
filter_ids.append(_id)
# delete with expressions
res = collection_w.delete(expression)[0]
assert res.delete_count == len(filter_ids)
# query to check
collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(["varchar", "json_field['string']", "NewStr"]))
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_string_expressions_normal(self, expression, enable_dynamic_field):
"""
target: test delete expr like
method: delete using expression like
expected: delete successfully
"""
if "NewStr" in expression and enable_dynamic_field is False:
pytest.skip("only support when enable_dynamic_filed == True")
# init collection with nb default data
nb = 1000
collection_w, _vectors, _, insert_ids = \
self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0:4]
# insert
if enable_dynamic_field:
data = cf.gen_default_rows_data(nb)
for i in range(nb):
data[i][ct.default_json_field_name] = {"string": str(i)}
data[i]['NewStr'] = str(i)
else:
data = cf.gen_default_dataframe_data(nb)
data[ct.default_json_field_name] = [{"string": str(i)} for i in range(nb)]
collection_w.insert(data)
collection_w.load()
# calculate the result
_vectors = data
expression = expression.replace("&&", "and").replace("||", "or")
filter_ids = []
json_field = {}
for i in range(nb):
if enable_dynamic_field:
json_field['string'] = _vectors[i][ct.default_json_field_name]['string']
varchar = _vectors[i][ct.default_string_field_name]
NewStr = _vectors[i]['NewStr']
else:
json_field['string'] = _vectors[ct.default_json_field_name][i]['string']
varchar = _vectors.varchar[i]
if not expression or eval(expression):
filter_ids.append(i)
# delete with expressions
res = collection_w.delete(expression)[0]
assert res.delete_count == len(filter_ids)
# query to check
collection_w.load()
collection_w.query("int64 >= 0", output_fields=['count(*)'],
check_task=CheckTasks.check_query_results,
check_items={'count(*)': nb - len(filter_ids)})

View File

@ -2304,7 +2304,7 @@ class TestQueryString(TestcaseBase):
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(default_string_field_name))
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions([default_string_field_name]))
def test_query_string_is_primary(self, expression):
"""
target: test query with output field only primary field

View File

@ -451,8 +451,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
dim = 1
fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
cf.gen_float_vec_field(dim=dim)]
schema = cf.gen_collection_schema(
fields=fields, primary_field="int64_1")
schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")
collection_w = self.init_collection_wrap(schema=schema)
# 2. insert data
@ -462,14 +461,11 @@ class TestCollectionSearchInvalid(TestcaseBase):
collection_w.insert(dataframe)
# 3. search with expression
log.info(
"test_search_with_expression: searching with expression: %s" % expression)
collection_w.create_index(
ct.default_float_vec_field_name, index_params=ct.default_flat_index)
log.info("test_search_with_expression: searching with expression: %s" % expression)
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
collection_w.load()
expression = expression.replace("&&", "and").replace("||", "or")
vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, nb, expression,
check_task=CheckTasks.err_res,
@ -4793,7 +4789,7 @@ class TestSearchString(TestcaseBase):
)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(ct.default_string_field_name))
@pytest.mark.parametrize("expression", cf.gen_normal_string_expressions([ct.default_string_field_name]))
def test_search_with_different_string_expr(self, dim, expression, _async, enable_dynamic_field):
"""
target: test search with different string expressions