Add test cases of json contain and binary index (#25808)

Signed-off-by: nico <cheng.yuan@zilliz.com>
pull/25850/head
nico 2023-07-21 18:38:59 +08:00 committed by GitHub
parent 4cb6351f87
commit eecf229b59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 260 additions and 8 deletions

View File

@ -1156,6 +1156,26 @@ class TestNewIndexBinary(TestcaseBase):
check_items={ct.err_code: 1,
ct.err_msg: "Invalid metric_type: L2, which does not match the index type: BIN_IVF_FLAT"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE", "JACCARD", "HAMMING", "TANIMOTO"])
def test_create_binary_index_HNSW(self, metric_type):
"""
target: test create binary index hnsw
method: create binary index hnsw
expected: succeed
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
binary_index_params = {'index_type': 'HNSW', "M": '18', "efConstruction": '240', 'metric_type': metric_type}
if metric_type == "TANIMOTO":
collection_w.create_index(default_binary_vec_field_name, binary_index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "metric type not found or not supported"})
else:
collection_w.create_index(default_binary_vec_field_name, binary_index_params)
assert collection_w.index()[0].params == binary_index_params
"""
******************************************************************
The following cases are used to test `drop_index` function

View File

@ -556,6 +556,131 @@ class TestQueryParams(TestcaseBase):
term_expr = f'{ct.default_int64_field_name} in [{constant}]'
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_query_expr_json_contains(self, enable_dynamic_field):
"""
target: test query with expression using json_contains
method: query with expression using json_contains
expected: succeed
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
# 2. insert data
limit = 99
array = []
for i in range(ct.default_nb):
data = {
ct.default_int64_field_name: i,
ct.default_float_field_name: i * 1.0,
ct.default_string_field_name: str(i),
ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]},
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
}
array.append(data)
collection_w.insert(array)
# 3. query
collection_w.load()
expressions = ["json_contains(json_field['list'], 1000)", "JSON_CONTAINS(json_field['list'], 1000)"]
for expression in expressions:
res = collection_w.query(expression)[0]
assert len(res) == limit
@pytest.mark.tags(CaseLabel.L2)
def test_query_expr_list_json_contains(self):
"""
target: test query with expression using json_contains
method: query with expression using json_contains
expected: succeed
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, enable_dynamic_field=True)[0]
# 2. insert data
limit = ct.default_nb // 4
array = []
for i in range(ct.default_nb):
data = {
ct.default_int64_field_name: i,
ct.default_json_field_name: [str(m) for m in range(i, i + limit)],
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
}
array.append(data)
collection_w.insert(array)
# 3. query
collection_w.load()
expressions = ["json_contains(json_field, '1000')", "JSON_CONTAINS(json_field, '1000')"]
for expression in expressions:
res = collection_w.query(expression, output_fields=["count(*)"])[0]
assert res[0]["count(*)"] == limit
@pytest.mark.tags(CaseLabel.L2)
def test_query_expr_json_contains_combined_with_normal(self, enable_dynamic_field):
"""
target: test query with expression using json_contains
method: query with expression using json_contains
expected: succeed
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
# 2. insert data
limit = ct.default_nb // 3
array = []
for i in range(ct.default_nb):
data = {
ct.default_int64_field_name: i,
ct.default_float_field_name: i * 1.0,
ct.default_string_field_name: str(i),
ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]},
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
}
array.append(data)
collection_w.insert(array)
# 3. query
collection_w.load()
tar = 1000
expressions = [f"json_contains(json_field['list'], {tar}) && float > {tar - limit // 2}",
f"JSON_CONTAINS(json_field['list'], {tar}) && float > {tar - limit // 2}"]
for expression in expressions:
res = collection_w.query(expression)[0]
assert len(res) == limit // 2
@pytest.mark.tags(CaseLabel.L2)
def test_query_expr_json_contains_pagination(self, enable_dynamic_field):
"""
target: test query with expression using json_contains
method: query with expression using json_contains
expected: succeed
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
# 2. insert data
limit = ct.default_nb // 3
array = []
for i in range(ct.default_nb):
data = {
ct.default_int64_field_name: i,
ct.default_float_field_name: i * 1.0,
ct.default_string_field_name: str(i),
ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]},
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
}
array.append(data)
collection_w.insert(array)
# 3. query
collection_w.load()
expressions = ["json_contains(json_field['list'], 1000)", "JSON_CONTAINS(json_field['list'], 1000)"]
offset = random.randint(1, limit)
for expression in expressions:
res = collection_w.query(expression, limit=limit, offset=offset)[0]
assert len(res) == limit - offset
@pytest.mark.tags(CaseLabel.L1)
def test_query_output_field_none_or_empty(self, enable_dynamic_field):
"""

View File

@ -40,6 +40,7 @@ default_int64_field_name = ct.default_int64_field_name
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_json_field_name = ct.default_json_field_name
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}}
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
range_search_supported_index = ct.all_index_types[:6]
@ -3028,6 +3029,112 @@ class TestCollectionSearch(TestcaseBase):
ids = hits.ids
assert set(ids).issubset(filter_ids_set)
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_expression_json_contains(self, enable_dynamic_field):
"""
target: test search with expression using json_contains
method: search with expression (json_contains)
expected: search successfully
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
# 2. insert data
array = []
for i in range(default_nb):
data = {
default_int64_field_name: i,
default_float_field_name: i*1.0,
default_string_field_name: str(i),
default_json_field_name: {"number": i, "list": [i, i+1, i+2]},
default_float_vec_field_name: gen_vectors(1, default_dim)[0]
}
array.append(data)
collection_w.insert(array)
# 2. search
collection_w.load()
log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name)
expressions = ["json_contains(json_field['list'], 100)", "JSON_CONTAINS(json_field['list'], 100)"]
for expression in expressions:
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit, expression,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": 3})
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_expression_json_contains_list(self, auto_id):
"""
target: test search with expression using json_contains
method: search with expression (json_contains)
expected: search successfully
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, auto_id=auto_id, enable_dynamic_field=True)[0]
# 2. insert data
limit = 100
array = []
for i in range(default_nb):
data = {
default_int64_field_name: i,
default_json_field_name: [j for j in range(i, i + limit)],
default_float_vec_field_name: gen_vectors(1, default_dim)[0]
}
if auto_id:
data.pop(default_int64_field_name, None)
array.append(data)
collection_w.insert(array)
# 2. search
collection_w.load()
log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name)
expressions = ["json_contains(json_field, 100)", "JSON_CONTAINS(json_field, 100)"]
for expression in expressions:
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, limit, expression,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": limit})
@pytest.mark.tags(CaseLabel.L2)
def test_search_expression_json_contains_combined_with_normal(self, enable_dynamic_field):
"""
target: test search with expression using json_contains
method: search with expression (json_contains)
expected: search successfully
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
# 2. insert data
limit = 100
array = []
for i in range(default_nb):
data = {
default_int64_field_name: i,
default_float_field_name: i * 1.0,
default_string_field_name: str(i),
default_json_field_name: {"number": i, "list": [str(j) for j in range(i, i + limit)]},
default_float_vec_field_name: gen_vectors(1, default_dim)[0]
}
array.append(data)
collection_w.insert(array)
# 2. search
collection_w.load()
log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name)
tar = 1000
expressions = [f"json_contains(json_field['list'], '{tar}') && int64 > {tar - limit // 2}",
f"JSON_CONTAINS(json_field['list'], '{tar}') && int64 > {tar - limit // 2}"]
for expression in expressions:
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, limit, expression,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": limit // 2})
@pytest.mark.tags(CaseLabel.L2)
def test_search_expression_all_data_type(self, nb, nq, dim, auto_id, _async, enable_dynamic_field):
"""
@ -3289,8 +3396,7 @@ class TestCollectionSearch(TestcaseBase):
"output_fields": [field_name]})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="issue #23661")
@pytest.mark.parametrize("index", ct.all_index_types[6:8])
@pytest.mark.parametrize("index", ["HNSW", "BIN_FLAT", "BIN_IVF_FLAT"])
def test_search_output_field_vector_after_binary_index(self, index):
"""
target: test search with output vector field after binary index
@ -3306,19 +3412,20 @@ class TestCollectionSearch(TestcaseBase):
collection_w.insert(data)
# 2. create index and load
default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "JACCARD"}
default_index = {"index_type": index, "metric_type": "JACCARD",
"params": {"nlist": 128, "efConstruction": 64, "M": 10}}
collection_w.create_index(binary_field_name, default_index)
collection_w.load()
# 3. search with output field vector
search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
search_params = {"metric_type": "JACCARD"}
binary_vectors = cf.gen_binary_vectors(1, default_dim)[1]
res = collection_w.search(binary_vectors, binary_field_name,
ct.default_search_binary_params, 2, default_search_exp,
search_params, 2, default_search_exp,
output_fields=[binary_field_name])[0]
# 4. check the result vectors should be equal to the inserted
assert res[0][0].entity.binary_vector == data[binary_field_name][res[0][0].id]
assert res[0][0].entity.binary_vector == [data[binary_field_name][res[0][0].id]]
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("dim", [32, 128, 768])
@ -5037,8 +5144,8 @@ class TestSearchPagination(TestcaseBase):
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": limit,
"_async": _async})[0]
"limit": limit,
"_async": _async})[0]
# 3. search with offset+limit
res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params,
limit+offset, default_search_exp, _async=_async)[0]