mirror of https://github.com/milvus-io/milvus.git
test: Add group search test on sparse vectors (#36562)
related issue: #36295 add one test for alter alias Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>pull/36469/head
parent
7c2cb8c5d4
commit
e6c7fd6605
|
@ -22,15 +22,10 @@ default_search_field = ct.default_float_vec_field_name
|
|||
default_search_params = ct.default_search_params
|
||||
|
||||
|
||||
class TestAliasParams(TestcaseBase):
|
||||
""" Test cases of alias interface parameters"""
|
||||
pass
|
||||
|
||||
|
||||
class TestAliasParamsInvalid(TestcaseBase):
|
||||
""" Negative test cases of alias interface parameters"""
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("alias_name", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))])
|
||||
def test_alias_create_alias_with_invalid_name(self, alias_name):
|
||||
"""
|
||||
|
@ -52,6 +47,59 @@ class TestAliasParamsInvalid(TestcaseBase):
|
|||
class TestAliasOperation(TestcaseBase):
|
||||
""" Test cases of alias interface operations"""
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_alias_alter_operation_default(self):
|
||||
"""
|
||||
target: test collection altering alias
|
||||
method:
|
||||
1. create collection_1, bind alias to collection_1 and insert 2000 entities
|
||||
2. create collection_2 with 1500 entities
|
||||
3. search on alias
|
||||
verify num_entities=2000
|
||||
4. alter alias to collection_2 and search on alias
|
||||
verify num_entities=1500
|
||||
"""
|
||||
c_name1 = cf.gen_unique_str("collection1")
|
||||
collection_w1 = self.init_collection_wrap(name=c_name1, schema=default_schema,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: c_name1, exp_schema: default_schema})
|
||||
alias_name = cf.gen_unique_str(prefix)
|
||||
# create a collection alias and bind to collection1
|
||||
self.utility_wrap.create_alias(collection_w1.name, alias_name)
|
||||
collection_alias = self.init_collection_wrap(name=alias_name)
|
||||
|
||||
nb1 = 2000
|
||||
data1 = cf.gen_default_dataframe_data(nb=nb1)
|
||||
import pandas as pd
|
||||
string_values = pd.Series(data=[str(i) for i in range(nb1)], dtype="string")
|
||||
data1[ct.default_string_field_name] = string_values
|
||||
collection_alias.insert(data1)
|
||||
collection_alias.create_index(ct.default_float_vec_field_name, ct.default_index)
|
||||
collection_alias.load()
|
||||
|
||||
assert collection_alias.num_entities == nb1 == collection_w1.num_entities
|
||||
res1 = collection_alias.query(expr="", output_fields=["count(*)"])[0]
|
||||
assert res1[0].get("count(*)") == nb1
|
||||
|
||||
# create collection2
|
||||
c_name2 = cf.gen_unique_str("collection2")
|
||||
collection_w2 = self.init_collection_wrap(name=c_name2, schema=default_schema,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: c_name2, exp_schema: default_schema})
|
||||
nb2 = 1500
|
||||
data2 = cf.gen_default_dataframe_data(nb=nb2)
|
||||
string_values = pd.Series(data=[str(i) for i in range(nb2)], dtype="string")
|
||||
data2[ct.default_string_field_name] = string_values
|
||||
collection_w2.insert(data2)
|
||||
collection_w2.create_index(ct.default_float_vec_field_name, ct.default_index)
|
||||
collection_w2.load()
|
||||
|
||||
# alter the collection alias to collection2
|
||||
self.utility_wrap.alter_alias(collection_w2.name, alias_name)
|
||||
assert collection_alias.num_entities == nb2 == collection_w2.num_entities
|
||||
res1 = collection_alias.query(expr="", output_fields=["count(*)"])[0]
|
||||
assert res1[0].get("count(*)") == nb2
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_alias_create_operation_default(self):
|
||||
"""
|
||||
|
@ -81,73 +129,6 @@ class TestAliasOperation(TestcaseBase):
|
|||
assert [p.name for p in collection_w.partitions] == [
|
||||
p.name for p in collection_alias.partitions]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_alias_alter_operation_default(self):
|
||||
"""
|
||||
target: test collection altering alias
|
||||
method:
|
||||
1. create collection_1 with 10 partitions and its alias alias_a
|
||||
2. create collection_2 with 5 partitions and its alias alias_b
|
||||
3. collection_1 alter alias to alias_b
|
||||
expected:
|
||||
in step 1, collection_1 is equal to alias_a
|
||||
in step 2, collection_2 is equal to alias_b
|
||||
in step 3, collection_1 is equal to alias_a and alias_b, and collection_2 is not equal to alias_b
|
||||
"""
|
||||
self._connect()
|
||||
|
||||
# create collection_1 with 10 partitions and its alias alias_a
|
||||
c_1_name = cf.gen_unique_str("collection")
|
||||
collection_1 = self.init_collection_wrap(name=c_1_name, schema=default_schema,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: c_1_name, exp_schema: default_schema})
|
||||
for _ in range(10):
|
||||
partition_name = cf.gen_unique_str("partition")
|
||||
# create partition with different names and check the partition exists
|
||||
self.init_partition_wrap(collection_1, partition_name)
|
||||
assert collection_1.has_partition(partition_name)[0]
|
||||
|
||||
alias_a_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_1.name, alias_a_name)
|
||||
collection_alias_a = self.init_collection_wrap(name=alias_a_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_a_name, exp_schema: default_schema})
|
||||
# assert collection is equal to alias according to partitions
|
||||
assert [p.name for p in collection_1.partitions] == [
|
||||
p.name for p in collection_alias_a.partitions]
|
||||
|
||||
# create collection_2 with 5 partitions and its alias alias_b
|
||||
c_2_name = cf.gen_unique_str("collection")
|
||||
collection_2 = self.init_collection_wrap(name=c_2_name, schema=default_schema,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: c_2_name, exp_schema: default_schema})
|
||||
|
||||
for _ in range(5):
|
||||
partition_name = cf.gen_unique_str("partition")
|
||||
# create partition with different names and check the partition exists
|
||||
self.init_partition_wrap(collection_2, partition_name)
|
||||
assert collection_2.has_partition(partition_name)[0]
|
||||
|
||||
alias_b_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_2.name, alias_b_name)
|
||||
collection_alias_b = self.init_collection_wrap(name=alias_b_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_b_name, exp_schema: default_schema})
|
||||
# assert collection is equal to alias according to partitions
|
||||
assert [p.name for p in collection_2.partitions] == [
|
||||
p.name for p in collection_alias_b.partitions]
|
||||
|
||||
# collection_1 alter alias to alias_b
|
||||
self.utility_wrap.alter_alias(collection_1.name, alias_b_name)
|
||||
|
||||
# collection_1 has two alias name, alias_a and alias_b, but collection_2 has no alias any more
|
||||
assert [p.name for p in collection_1.partitions] == [
|
||||
p.name for p in collection_alias_b.partitions]
|
||||
assert [p.name for p in collection_1.partitions] == [
|
||||
p.name for p in collection_alias_a.partitions]
|
||||
assert [p.name for p in collection_2.partitions] != [
|
||||
p.name for p in collection_alias_b.partitions]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_alias_drop_operation_default(self):
|
||||
"""
|
||||
|
@ -187,128 +168,7 @@ class TestAliasOperation(TestcaseBase):
|
|||
check_task=CheckTasks.err_res,
|
||||
check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_alias_exec_operations_as_collection(self):
|
||||
"""
|
||||
target: test alias
|
||||
1.creating partition,
|
||||
2.inserting data,
|
||||
3.creating index,
|
||||
4.loading collection,
|
||||
5.searching,
|
||||
6.releasing collection,
|
||||
method: follow the steps in target
|
||||
expected: all steps operated by alias can work
|
||||
"""
|
||||
create_partition_flag = True
|
||||
insert_data_flag = True
|
||||
create_index_flag = True
|
||||
load_collection_flag = True
|
||||
search_flag = True
|
||||
release_collection_flag = True
|
||||
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str("collection")
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=default_schema,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: c_name, exp_schema: default_schema})
|
||||
alias_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_w.name, alias_name)
|
||||
# collection_w.create_alias(alias_name)
|
||||
collection_alias, _ = self.collection_wrap.init_collection(name=alias_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_name,
|
||||
exp_schema: default_schema})
|
||||
|
||||
# create partition by alias
|
||||
partition_name = cf.gen_unique_str("partition")
|
||||
try:
|
||||
collection_alias.create_partition(partition_name)
|
||||
except Exception as e:
|
||||
log.info(f"alias create partition failed with exception {e}")
|
||||
create_partition_flag = False
|
||||
collection_w.create_partition(partition_name)
|
||||
|
||||
# assert partition
|
||||
pytest.assume(create_partition_flag is True and
|
||||
[p.name for p in collection_alias.partitions] == [p.name for p in collection_w.partitions])
|
||||
|
||||
# insert data by alias
|
||||
df = cf.gen_default_dataframe_data(ct.default_nb)
|
||||
try:
|
||||
collection_alias.insert(data=df)
|
||||
except Exception as e:
|
||||
log.info(f"alias insert data failed with exception {e}")
|
||||
insert_data_flag = False
|
||||
collection_w.insert(data=df)
|
||||
|
||||
# assert insert data
|
||||
pytest.assume(insert_data_flag is True and
|
||||
collection_w.num_entities == ct.default_nb and
|
||||
collection_alias.num_entities == ct.default_nb)
|
||||
|
||||
# create index by alias
|
||||
default_index = {"index_type": "IVF_FLAT",
|
||||
"params": {"nlist": 128}, "metric_type": "L2"}
|
||||
try:
|
||||
collection_alias.create_index(
|
||||
field_name="float_vector", index_params=default_index)
|
||||
except Exception as e:
|
||||
log.info(f"alias create index failed with exception {e}")
|
||||
create_index_flag = False
|
||||
collection_w.create_index(
|
||||
field_name="float_vector", index_params=default_index)
|
||||
|
||||
# assert create index
|
||||
pytest.assume(create_index_flag is True and
|
||||
collection_alias.has_index() is True and
|
||||
collection_w.has_index()[0] is True)
|
||||
|
||||
# load by alias
|
||||
try:
|
||||
collection_alias.load()
|
||||
except Exception as e:
|
||||
log.info(f"alias load collection failed with exception {e}")
|
||||
load_collection_flag = False
|
||||
collection_w.load()
|
||||
# assert load
|
||||
pytest.assume(load_collection_flag is True)
|
||||
|
||||
# search by alias
|
||||
topK = 5
|
||||
search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
|
||||
|
||||
query = [[random.random() for _ in range(ct.default_dim)]
|
||||
for _ in range(1)]
|
||||
alias_res = None
|
||||
try:
|
||||
alias_res = collection_alias.search(
|
||||
query, "float_vector", search_params, topK,
|
||||
"int64 >= 0", output_fields=["int64"]
|
||||
)
|
||||
except Exception as e:
|
||||
log.info(f"alias search failed with exception {e}")
|
||||
search_flag = False
|
||||
|
||||
collection_res, _ = collection_w.search(
|
||||
query, "float_vector", search_params, topK,
|
||||
"int64 >= 0", output_fields=["int64"]
|
||||
)
|
||||
# assert search
|
||||
pytest.assume(
|
||||
search_flag is True and alias_res[0].ids == collection_res[0].ids)
|
||||
|
||||
# release by alias
|
||||
try:
|
||||
collection_alias.release()
|
||||
except Exception as e:
|
||||
log.info(f"alias release failed with exception {e}")
|
||||
release_collection_flag = False
|
||||
collection_w.release()
|
||||
# assert release
|
||||
pytest.assume(release_collection_flag is True)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_alias_called_by_utility_has_collection(self):
|
||||
"""
|
||||
target: test utility has collection by alias
|
||||
|
@ -334,7 +194,7 @@ class TestAliasOperation(TestcaseBase):
|
|||
|
||||
assert res is True
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_alias_called_by_utility_drop_collection(self):
|
||||
"""
|
||||
target: test utility drop collection by alias
|
||||
|
@ -365,7 +225,7 @@ class TestAliasOperation(TestcaseBase):
|
|||
self.utility_wrap.drop_collection(c_name)
|
||||
assert not self.utility_wrap.has_collection(c_name)[0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_alias_called_by_utility_has_partition(self):
|
||||
"""
|
||||
target: test utility has partition by alias
|
||||
|
@ -482,7 +342,7 @@ class TestAliasOperationInvalid(TestcaseBase):
|
|||
# check_task=CheckTasks.err_res,
|
||||
# check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_alias_drop_not_exist_alias(self):
|
||||
"""
|
||||
target: test collection dropping alias which is not exist
|
||||
|
@ -547,7 +407,7 @@ class TestAliasOperationInvalid(TestcaseBase):
|
|||
# check_task=CheckTasks.err_res,
|
||||
# check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_alias_create_dup_name_collection(self):
|
||||
"""
|
||||
target: test creating a collection with a same name as alias, but a different schema
|
||||
|
@ -571,7 +431,7 @@ class TestAliasOperationInvalid(TestcaseBase):
|
|||
check_task=CheckTasks.err_res,
|
||||
check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_alias_drop_collection_by_alias(self):
|
||||
"""
|
||||
target: test dropping a collection by alias
|
||||
|
|
|
@ -1,13 +1,16 @@
|
|||
import random
|
||||
import re
|
||||
import math # do not remove `math`
|
||||
import pytest
|
||||
import random
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pymilvus import DataType, AnnSearchRequest, RRFRanker, WeightedRanker
|
||||
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
from common import common_type as ct
|
||||
from common import common_func as cf
|
||||
from common import common_params as cp
|
||||
from common.code_mapping import QueryErrorMessage as qem
|
||||
from common.common_params import (
|
||||
FieldParams, MetricType, DefaultVectorIndexParams, DefaultScalarIndexParams, Expr, AlterIndexParams
|
||||
|
@ -1889,7 +1892,10 @@ class TestMixScenes(TestcaseBase):
|
|||
class TestGroupSearch(TestCaseClassBase):
|
||||
"""
|
||||
Testing group search scenarios
|
||||
1. collection schema: int64_pk(auto_id), float16_vector, float_vector, bfloat16_vector, varchar
|
||||
1. collection schema:
|
||||
int64_pk(auto_id), varchar,
|
||||
float16_vector, float_vector, bfloat16_vector, sparse_vector,
|
||||
inverted_varchar
|
||||
2. varchar field is inserted with dup values for group by
|
||||
3. index for each vector field with different index types, dims and metric types
|
||||
Author: Yanliang567
|
||||
|
@ -1902,34 +1908,34 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
|
||||
# init params
|
||||
self.primary_field = "int64_pk"
|
||||
self.indexed_string_field = "varchar_with_index"
|
||||
self.inverted_string_field = "varchar_inverted"
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_group_search"),
|
||||
name=cf.gen_unique_str("TestGroupSearch"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[DataType.VARCHAR.name, self.primary_field, DataType.FLOAT16_VECTOR.name,
|
||||
DataType.FLOAT_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
fields=[self.primary_field, DataType.VARCHAR.name, DataType.FLOAT16_VECTOR.name,
|
||||
DataType.FLOAT_VECTOR.name, DataType.BFLOAT16_VECTOR.name, DataType.SPARSE_FLOAT_VECTOR.name,
|
||||
DataType.INT8.name, DataType.INT64.name, DataType.BOOL.name,
|
||||
self.indexed_string_field],
|
||||
self.inverted_string_field],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=31).to_dict,
|
||||
DataType.FLOAT_VECTOR.name: FieldParams(dim=64).to_dict,
|
||||
DataType.BFLOAT16_VECTOR.name: FieldParams(dim=24).to_dict,
|
||||
DataType.BFLOAT16_VECTOR.name: FieldParams(dim=24).to_dict
|
||||
},
|
||||
auto_id=True
|
||||
)
|
||||
)
|
||||
|
||||
self.vector_fields = [DataType.FLOAT16_VECTOR.name, DataType.FLOAT_VECTOR.name, DataType.BFLOAT16_VECTOR.name]
|
||||
self.dims = [31, 64, 24]
|
||||
self.index_types = ["IVF_SQ8", "HNSW", "IVF_FLAT"]
|
||||
self.vector_fields = [DataType.FLOAT16_VECTOR.name, DataType.FLOAT_VECTOR.name,
|
||||
DataType.BFLOAT16_VECTOR.name, DataType.SPARSE_FLOAT_VECTOR.name]
|
||||
self.dims = [31, 64, 24, 99]
|
||||
self.index_types = [cp.IndexName.IVF_SQ8, cp.IndexName.HNSW, cp.IndexName.IVF_FLAT, cp.IndexName.SPARSE_WAND]
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
# prepare data (> 1024 triggering index building)
|
||||
import pandas as pd
|
||||
nb = 100
|
||||
for _ in range(100):
|
||||
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
|
||||
|
@ -1950,8 +1956,9 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name, metric_type=MetricType.L2),
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name, metric_type=MetricType.IP),
|
||||
**DefaultVectorIndexParams.IVF_FLAT(DataType.BFLOAT16_VECTOR.name, metric_type=MetricType.COSINE),
|
||||
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name, metric_type=MetricType.IP),
|
||||
# index params for varchar field
|
||||
**DefaultScalarIndexParams.INVERTED(self.indexed_string_field)
|
||||
**DefaultScalarIndexParams.INVERTED(self.inverted_string_field)
|
||||
}
|
||||
|
||||
self.build_multi_index(index_params=index_params)
|
||||
|
@ -1961,11 +1968,11 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("group_by_field", [DataType.VARCHAR.name, "varchar_with_index"])
|
||||
@pytest.mark.parametrize("group_by_field", [DataType.VARCHAR.name, "varchar_inverted"])
|
||||
def test_search_group_size(self, group_by_field):
|
||||
"""
|
||||
target:
|
||||
1. search on 3 different float vector fields with group by varchar field with group size
|
||||
1. search on 4 different float vector fields with group by varchar field with group size
|
||||
verify results entity = limit * group_size and group size is full if group_strict_size is True
|
||||
verify results group counts = limit if group_strict_size is False
|
||||
"""
|
||||
|
@ -2005,7 +2012,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
@pytest.mark.xfail()
|
||||
def test_hybrid_search_group_size(self):
|
||||
"""
|
||||
hybrid search group by on 3 different float vector fields with group by varchar field with group size
|
||||
hybrid search group by on 4 different float vector fields with group by varchar field with group size
|
||||
verify results returns with de-dup group values and group distances are in order as rank_group_scorer
|
||||
"""
|
||||
nq = 2
|
||||
|
@ -2024,7 +2031,8 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
# 4. hybrid search group by
|
||||
rank_scorers = ["max", "avg", "sum"]
|
||||
for scorer in rank_scorers:
|
||||
res = self.collection_wrap.hybrid_search(req_list, WeightedRanker(0.3, 0.3, 0.3), limit=limit,
|
||||
res = self.collection_wrap.hybrid_search(req_list, WeightedRanker(0.1, 0.3, 0.9, 0.6),
|
||||
limit=limit,
|
||||
group_by_field=DataType.VARCHAR.name,
|
||||
group_size=group_size, rank_group_scorer=scorer,
|
||||
output_fields=[DataType.VARCHAR.name])[0]
|
||||
|
@ -2044,16 +2052,16 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
group_distances.append(res[i][l + 1].distance)
|
||||
else:
|
||||
if scorer == 'sum':
|
||||
assert np.sum(group_distances) < np.sum(tmp_distances)
|
||||
assert np.sum(group_distances) <= np.sum(tmp_distances)
|
||||
elif scorer == 'avg':
|
||||
assert np.mean(group_distances) < np.mean(tmp_distances)
|
||||
assert np.mean(group_distances) <= np.mean(tmp_distances)
|
||||
else: # default max
|
||||
assert np.max(group_distances) < np.max(tmp_distances)
|
||||
assert np.max(group_distances) <= np.max(tmp_distances)
|
||||
|
||||
tmp_distances = group_distances
|
||||
group_distances = [res[i][l + 1].distance]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_hybrid_search_group_by(self):
|
||||
"""
|
||||
verify hybrid search group by works with different Rankers
|
||||
|
@ -2070,7 +2078,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
req = AnnSearchRequest(**search_param)
|
||||
req_list.append(req)
|
||||
# 4. hybrid search group by
|
||||
res = self.collection_wrap.hybrid_search(req_list, WeightedRanker(0.1, 0.9, 0.2), ct.default_limit,
|
||||
res = self.collection_wrap.hybrid_search(req_list, WeightedRanker(0.1, 0.9, 0.2, 0.3), ct.default_limit,
|
||||
group_by_field=DataType.VARCHAR.name,
|
||||
output_fields=[DataType.VARCHAR.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
|
@ -2094,7 +2102,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
req = AnnSearchRequest(**search_param)
|
||||
req_list.append(req)
|
||||
self.collection_wrap.hybrid_search(req_list, RRFRanker(), ct.default_limit,
|
||||
group_by_field=self.indexed_string_field,
|
||||
group_by_field=self.inverted_string_field,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq, "limit": ct.default_limit})
|
||||
|
||||
|
@ -2148,7 +2156,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
page_rounds = 3
|
||||
search_param = {}
|
||||
default_search_exp = f"{self.primary_field} >= 0"
|
||||
grpby_field = self.indexed_string_field
|
||||
grpby_field = self.inverted_string_field
|
||||
default_search_field = self.vector_fields[1]
|
||||
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1])
|
||||
all_pages_ids = []
|
||||
|
@ -2191,7 +2199,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||
page_rounds = 3
|
||||
search_param = {}
|
||||
default_search_exp = f"{self.primary_field} >= 0"
|
||||
grpby_field = self.indexed_string_field
|
||||
grpby_field = self.inverted_string_field
|
||||
default_search_field = self.vector_fields[1]
|
||||
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1])
|
||||
all_pages_ids = []
|
||||
|
|
|
@ -10558,52 +10558,6 @@ class TestSearchGroupBy(TestcaseBase):
|
|||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": err_code, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index", ct.all_index_types[9:11])
|
||||
def test_sparse_vectors_group_by(self, index):
|
||||
"""
|
||||
target: test search group by works on a collection with sparse vector
|
||||
method: 1. create a collection
|
||||
2. create index
|
||||
3. grouping search
|
||||
verify: search successfully
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
schema = cf.gen_default_sparse_schema()
|
||||
collection_w = self.init_collection_wrap(c_name, schema=schema)
|
||||
nb = 5000
|
||||
data = cf.gen_default_list_sparse_data(nb=nb)
|
||||
# update float fields
|
||||
_data = [random.randint(1, 100) for _ in range(nb)]
|
||||
str_data = [str(i) for i in _data]
|
||||
data[2] = str_data
|
||||
collection_w.insert(data)
|
||||
params = cf.get_index_params_params(index)
|
||||
index_params = {"index_type": index, "metric_type": "IP", "params": params}
|
||||
collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index)
|
||||
collection_w.load()
|
||||
|
||||
nq = 2
|
||||
limit = 20
|
||||
search_params = ct.default_sparse_search_params
|
||||
search_vectors = cf.gen_default_list_sparse_data(nb=nq)[-1][0:nq]
|
||||
# verify the result if gourp by
|
||||
res = collection_w.search(data=search_vectors, anns_field=ct.default_sparse_vec_field_name,
|
||||
param=search_params, limit=limit,
|
||||
group_by_field=ct.default_string_field_name,
|
||||
output_fields=[ct.default_string_field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": limit})[0]
|
||||
|
||||
hit = res[0]
|
||||
set_varchar = set()
|
||||
for item in hit:
|
||||
a = list(item.fields.values())
|
||||
set_varchar.add(a[0])
|
||||
# groupy by is in effect, then there are no duplicate varchar values
|
||||
assert len(hit) == len(set_varchar)
|
||||
|
||||
|
||||
class TestCollectionHybridSearchValid(TestcaseBase):
|
||||
""" Test case of search interface """
|
||||
|
|
Loading…
Reference in New Issue