mirror of https://github.com/milvus-io/milvus.git
Add test cases of json contain and binary index (#25808)
Signed-off-by: nico <cheng.yuan@zilliz.com>pull/25850/head
parent
4cb6351f87
commit
eecf229b59
|
@ -1156,6 +1156,26 @@ class TestNewIndexBinary(TestcaseBase):
|
|||
check_items={ct.err_code: 1,
|
||||
ct.err_msg: "Invalid metric_type: L2, which does not match the index type: BIN_IVF_FLAT"})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE", "JACCARD", "HAMMING", "TANIMOTO"])
|
||||
def test_create_binary_index_HNSW(self, metric_type):
|
||||
"""
|
||||
target: test create binary index hnsw
|
||||
method: create binary index hnsw
|
||||
expected: succeed
|
||||
"""
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
|
||||
binary_index_params = {'index_type': 'HNSW', "M": '18', "efConstruction": '240', 'metric_type': metric_type}
|
||||
if metric_type == "TANIMOTO":
|
||||
collection_w.create_index(default_binary_vec_field_name, binary_index_params,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1,
|
||||
ct.err_msg: "metric type not found or not supported"})
|
||||
else:
|
||||
collection_w.create_index(default_binary_vec_field_name, binary_index_params)
|
||||
assert collection_w.index()[0].params == binary_index_params
|
||||
|
||||
"""
|
||||
******************************************************************
|
||||
The following cases are used to test `drop_index` function
|
||||
|
|
|
@ -556,6 +556,131 @@ class TestQueryParams(TestcaseBase):
|
|||
term_expr = f'{ct.default_int64_field_name} in [{constant}]'
|
||||
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_expr_json_contains(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test query with expression using json_contains
|
||||
method: query with expression using json_contains
|
||||
expected: succeed
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# 2. insert data
|
||||
limit = 99
|
||||
array = []
|
||||
for i in range(ct.default_nb):
|
||||
data = {
|
||||
ct.default_int64_field_name: i,
|
||||
ct.default_float_field_name: i * 1.0,
|
||||
ct.default_string_field_name: str(i),
|
||||
ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]},
|
||||
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
|
||||
}
|
||||
array.append(data)
|
||||
collection_w.insert(array)
|
||||
|
||||
# 3. query
|
||||
collection_w.load()
|
||||
expressions = ["json_contains(json_field['list'], 1000)", "JSON_CONTAINS(json_field['list'], 1000)"]
|
||||
for expression in expressions:
|
||||
res = collection_w.query(expression)[0]
|
||||
assert len(res) == limit
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_query_expr_list_json_contains(self):
|
||||
"""
|
||||
target: test query with expression using json_contains
|
||||
method: query with expression using json_contains
|
||||
expected: succeed
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w = self.init_collection_general(prefix, enable_dynamic_field=True)[0]
|
||||
|
||||
# 2. insert data
|
||||
limit = ct.default_nb // 4
|
||||
array = []
|
||||
for i in range(ct.default_nb):
|
||||
data = {
|
||||
ct.default_int64_field_name: i,
|
||||
ct.default_json_field_name: [str(m) for m in range(i, i + limit)],
|
||||
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
|
||||
}
|
||||
array.append(data)
|
||||
collection_w.insert(array)
|
||||
|
||||
# 3. query
|
||||
collection_w.load()
|
||||
expressions = ["json_contains(json_field, '1000')", "JSON_CONTAINS(json_field, '1000')"]
|
||||
for expression in expressions:
|
||||
res = collection_w.query(expression, output_fields=["count(*)"])[0]
|
||||
assert res[0]["count(*)"] == limit
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_query_expr_json_contains_combined_with_normal(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test query with expression using json_contains
|
||||
method: query with expression using json_contains
|
||||
expected: succeed
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# 2. insert data
|
||||
limit = ct.default_nb // 3
|
||||
array = []
|
||||
for i in range(ct.default_nb):
|
||||
data = {
|
||||
ct.default_int64_field_name: i,
|
||||
ct.default_float_field_name: i * 1.0,
|
||||
ct.default_string_field_name: str(i),
|
||||
ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]},
|
||||
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
|
||||
}
|
||||
array.append(data)
|
||||
collection_w.insert(array)
|
||||
|
||||
# 3. query
|
||||
collection_w.load()
|
||||
tar = 1000
|
||||
expressions = [f"json_contains(json_field['list'], {tar}) && float > {tar - limit // 2}",
|
||||
f"JSON_CONTAINS(json_field['list'], {tar}) && float > {tar - limit // 2}"]
|
||||
for expression in expressions:
|
||||
res = collection_w.query(expression)[0]
|
||||
assert len(res) == limit // 2
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_query_expr_json_contains_pagination(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test query with expression using json_contains
|
||||
method: query with expression using json_contains
|
||||
expected: succeed
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# 2. insert data
|
||||
limit = ct.default_nb // 3
|
||||
array = []
|
||||
for i in range(ct.default_nb):
|
||||
data = {
|
||||
ct.default_int64_field_name: i,
|
||||
ct.default_float_field_name: i * 1.0,
|
||||
ct.default_string_field_name: str(i),
|
||||
ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]},
|
||||
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0]
|
||||
}
|
||||
array.append(data)
|
||||
collection_w.insert(array)
|
||||
|
||||
# 3. query
|
||||
collection_w.load()
|
||||
expressions = ["json_contains(json_field['list'], 1000)", "JSON_CONTAINS(json_field['list'], 1000)"]
|
||||
offset = random.randint(1, limit)
|
||||
for expression in expressions:
|
||||
res = collection_w.query(expression, limit=limit, offset=offset)[0]
|
||||
assert len(res) == limit - offset
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_output_field_none_or_empty(self, enable_dynamic_field):
|
||||
"""
|
||||
|
|
|
@ -40,6 +40,7 @@ default_int64_field_name = ct.default_int64_field_name
|
|||
default_float_field_name = ct.default_float_field_name
|
||||
default_bool_field_name = ct.default_bool_field_name
|
||||
default_string_field_name = ct.default_string_field_name
|
||||
default_json_field_name = ct.default_json_field_name
|
||||
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}}
|
||||
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||
range_search_supported_index = ct.all_index_types[:6]
|
||||
|
@ -3028,6 +3029,112 @@ class TestCollectionSearch(TestcaseBase):
|
|||
ids = hits.ids
|
||||
assert set(ids).issubset(filter_ids_set)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_expression_json_contains(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test search with expression using json_contains
|
||||
method: search with expression (json_contains)
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# 2. insert data
|
||||
array = []
|
||||
for i in range(default_nb):
|
||||
data = {
|
||||
default_int64_field_name: i,
|
||||
default_float_field_name: i*1.0,
|
||||
default_string_field_name: str(i),
|
||||
default_json_field_name: {"number": i, "list": [i, i+1, i+2]},
|
||||
default_float_vec_field_name: gen_vectors(1, default_dim)[0]
|
||||
}
|
||||
array.append(data)
|
||||
collection_w.insert(array)
|
||||
|
||||
# 2. search
|
||||
collection_w.load()
|
||||
log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name)
|
||||
expressions = ["json_contains(json_field['list'], 100)", "JSON_CONTAINS(json_field['list'], 100)"]
|
||||
for expression in expressions:
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit, expression,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": 3})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_expression_json_contains_list(self, auto_id):
|
||||
"""
|
||||
target: test search with expression using json_contains
|
||||
method: search with expression (json_contains)
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w = self.init_collection_general(prefix, auto_id=auto_id, enable_dynamic_field=True)[0]
|
||||
|
||||
# 2. insert data
|
||||
limit = 100
|
||||
array = []
|
||||
for i in range(default_nb):
|
||||
data = {
|
||||
default_int64_field_name: i,
|
||||
default_json_field_name: [j for j in range(i, i + limit)],
|
||||
default_float_vec_field_name: gen_vectors(1, default_dim)[0]
|
||||
}
|
||||
if auto_id:
|
||||
data.pop(default_int64_field_name, None)
|
||||
array.append(data)
|
||||
collection_w.insert(array)
|
||||
|
||||
# 2. search
|
||||
collection_w.load()
|
||||
log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name)
|
||||
expressions = ["json_contains(json_field, 100)", "JSON_CONTAINS(json_field, 100)"]
|
||||
for expression in expressions:
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, limit, expression,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_expression_json_contains_combined_with_normal(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test search with expression using json_contains
|
||||
method: search with expression (json_contains)
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# 2. insert data
|
||||
limit = 100
|
||||
array = []
|
||||
for i in range(default_nb):
|
||||
data = {
|
||||
default_int64_field_name: i,
|
||||
default_float_field_name: i * 1.0,
|
||||
default_string_field_name: str(i),
|
||||
default_json_field_name: {"number": i, "list": [str(j) for j in range(i, i + limit)]},
|
||||
default_float_vec_field_name: gen_vectors(1, default_dim)[0]
|
||||
}
|
||||
array.append(data)
|
||||
collection_w.insert(array)
|
||||
|
||||
# 2. search
|
||||
collection_w.load()
|
||||
log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name)
|
||||
tar = 1000
|
||||
expressions = [f"json_contains(json_field['list'], '{tar}') && int64 > {tar - limit // 2}",
|
||||
f"JSON_CONTAINS(json_field['list'], '{tar}') && int64 > {tar - limit // 2}"]
|
||||
for expression in expressions:
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, limit, expression,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": limit // 2})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_expression_all_data_type(self, nb, nq, dim, auto_id, _async, enable_dynamic_field):
|
||||
"""
|
||||
|
@ -3289,8 +3396,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||
"output_fields": [field_name]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.skip(reason="issue #23661")
|
||||
@pytest.mark.parametrize("index", ct.all_index_types[6:8])
|
||||
@pytest.mark.parametrize("index", ["HNSW", "BIN_FLAT", "BIN_IVF_FLAT"])
|
||||
def test_search_output_field_vector_after_binary_index(self, index):
|
||||
"""
|
||||
target: test search with output vector field after binary index
|
||||
|
@ -3306,19 +3412,20 @@ class TestCollectionSearch(TestcaseBase):
|
|||
collection_w.insert(data)
|
||||
|
||||
# 2. create index and load
|
||||
default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "JACCARD"}
|
||||
default_index = {"index_type": index, "metric_type": "JACCARD",
|
||||
"params": {"nlist": 128, "efConstruction": 64, "M": 10}}
|
||||
collection_w.create_index(binary_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
# 3. search with output field vector
|
||||
search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
|
||||
search_params = {"metric_type": "JACCARD"}
|
||||
binary_vectors = cf.gen_binary_vectors(1, default_dim)[1]
|
||||
res = collection_w.search(binary_vectors, binary_field_name,
|
||||
ct.default_search_binary_params, 2, default_search_exp,
|
||||
search_params, 2, default_search_exp,
|
||||
output_fields=[binary_field_name])[0]
|
||||
|
||||
# 4. check the result vectors should be equal to the inserted
|
||||
assert res[0][0].entity.binary_vector == data[binary_field_name][res[0][0].id]
|
||||
assert res[0][0].entity.binary_vector == [data[binary_field_name][res[0][0].id]]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("dim", [32, 128, 768])
|
||||
|
@ -5037,8 +5144,8 @@ class TestSearchPagination(TestcaseBase):
|
|||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": limit,
|
||||
"_async": _async})[0]
|
||||
"limit": limit,
|
||||
"_async": _async})[0]
|
||||
# 3. search with offset+limit
|
||||
res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params,
|
||||
limit+offset, default_search_exp, _async=_async)[0]
|
||||
|
|
Loading…
Reference in New Issue