enhance: ban groupby on binary vector(#31134) (#31735)

Cherry-pick from master
pr: https://github.com/milvus-io/milvus/pull/31659
See also: https://github.com/milvus-io/milvus/issues/31134

Currently, don't support brute force search iterator for binary_vector
so group_by in such cases will fail and to avoid inconsistent behavior,
we ban groupby on binary vector for the time being.

Signed-off-by: MrPresent-Han <chun.han@zilliz.com>
pull/31787/head
Chun Han 2024-04-01 14:13:12 +08:00 committed by GitHub
parent 68a2e1b40a
commit f3216bfe18
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 20 additions and 9 deletions

View File

@ -157,9 +157,8 @@ PrepareVectorIteratorsFromIndex(const SearchInfo& search_info,
"group_by operation will be terminated",
e.what());
throw std::runtime_error(
"Failed to groupBy, please check the index type, trying to "
"groupBy on unsupported index type will fail, currently only "
"support ivf-flat, ivf_cc and HNSW");
"Failed to groupBy, current index:" + index.GetIndexType() +
" doesn't support search_group_by");
}
return true;
}

View File

@ -11,6 +11,7 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/parser/planparserv2"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/funcutil"
@ -61,8 +62,8 @@ func initSearchRequest(ctx context.Context, t *searchTask) error {
t.SearchRequest.OutputFieldsId = outputFieldIDs
if t.request.GetDslType() == commonpb.DslType_BoolExprV1 {
annsField, err := funcutil.GetAttrByKeyFromRepeatedKV(AnnsFieldKey, t.request.GetSearchParams())
if err != nil || len(annsField) == 0 {
annsFieldName, err := funcutil.GetAttrByKeyFromRepeatedKV(AnnsFieldKey, t.request.GetSearchParams())
if err != nil || len(annsFieldName) == 0 {
vecFields := typeutil.GetVectorFieldSchemas(t.schema.CollectionSchema)
if len(vecFields) == 0 {
return errors.New(AnnsFieldKey + " not found in schema")
@ -72,24 +73,29 @@ func initSearchRequest(ctx context.Context, t *searchTask) error {
return errors.New("multiple anns_fields exist, please specify a anns_field in search_params")
}
annsField = vecFields[0].Name
annsFieldName = vecFields[0].Name
}
queryInfo, offset, err := parseSearchInfo(t.request.GetSearchParams(), t.schema.CollectionSchema)
annField := typeutil.GetFieldByName(t.schema.CollectionSchema, annsFieldName)
if queryInfo.GetGroupByFieldId() != -1 && annField.GetDataType() == schemapb.DataType_BinaryVector {
return errors.New("not support search_group_by operation based on binary vector column")
}
if err != nil {
return err
}
t.offset = offset
plan, err := planparserv2.CreateSearchPlan(t.schema.schemaHelper, t.request.Dsl, annsField, queryInfo)
plan, err := planparserv2.CreateSearchPlan(t.schema.schemaHelper, t.request.Dsl, annsFieldName, queryInfo)
if err != nil {
log.Warn("failed to create query plan", zap.Error(err),
zap.String("dsl", t.request.Dsl), // may be very large if large term passed.
zap.String("anns field", annsField), zap.Any("query info", queryInfo))
zap.String("anns field", annsFieldName), zap.Any("query info", queryInfo))
return merr.WrapErrParameterInvalidMsg("failed to create query plan: %v", err)
}
log.Debug("create query plan",
zap.String("dsl", t.request.Dsl), // may be very large if large term passed.
zap.String("anns field", annsField), zap.Any("query info", queryInfo))
zap.String("anns field", annsFieldName), zap.Any("query info", queryInfo))
if t.partitionKeyMode {
expr, err := ParseExprFromPlan(plan)

View File

@ -1075,6 +1075,12 @@ func GetField(schema *schemapb.CollectionSchema, fieldID int64) *schemapb.FieldS
})
}
func GetFieldByName(schema *schemapb.CollectionSchema, fieldName string) *schemapb.FieldSchema {
return lo.FindOrElse(schema.GetFields(), nil, func(field *schemapb.FieldSchema) bool {
return field.GetName() == fieldName
})
}
func IsPrimaryFieldDataExist(datas []*schemapb.FieldData, primaryFieldSchema *schemapb.FieldSchema) bool {
primaryFieldID := primaryFieldSchema.FieldID
primaryFieldName := primaryFieldSchema.Name