enhance: ban groupby on binary vector(#31134) (#31659)

related: #31134

Signed-off-by: MrPresent-Han <chun.han@zilliz.com>
pull/31691/head
Chun Han 2024-03-28 15:19:10 +08:00 committed by GitHub
parent e33dba8afe
commit b99c46246c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 20 additions and 9 deletions

View File

@ -157,9 +157,8 @@ PrepareVectorIteratorsFromIndex(const SearchInfo& search_info,
"group_by operation will be terminated",
e.what());
throw std::runtime_error(
"Failed to groupBy, please check the index type, trying to "
"groupBy on unsupported index type will fail, currently only "
"support ivf-flat, ivf_cc and HNSW");
"Failed to groupBy, current index:" + index.GetIndexType() +
" doesn't support search_group_by");
}
return true;
}

View File

@ -11,6 +11,7 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/parser/planparserv2"
"github.com/milvus-io/milvus/internal/util/exprutil"
"github.com/milvus-io/milvus/pkg/log"
@ -62,8 +63,8 @@ func initSearchRequest(ctx context.Context, t *searchTask) error {
t.SearchRequest.OutputFieldsId = outputFieldIDs
if t.request.GetDslType() == commonpb.DslType_BoolExprV1 {
annsField, err := funcutil.GetAttrByKeyFromRepeatedKV(AnnsFieldKey, t.request.GetSearchParams())
if err != nil || len(annsField) == 0 {
annsFieldName, err := funcutil.GetAttrByKeyFromRepeatedKV(AnnsFieldKey, t.request.GetSearchParams())
if err != nil || len(annsFieldName) == 0 {
vecFields := typeutil.GetVectorFieldSchemas(t.schema.CollectionSchema)
if len(vecFields) == 0 {
return errors.New(AnnsFieldKey + " not found in schema")
@ -73,24 +74,29 @@ func initSearchRequest(ctx context.Context, t *searchTask) error {
return errors.New("multiple anns_fields exist, please specify a anns_field in search_params")
}
annsField = vecFields[0].Name
annsFieldName = vecFields[0].Name
}
queryInfo, offset, err := parseSearchInfo(t.request.GetSearchParams(), t.schema.CollectionSchema)
annField := typeutil.GetFieldByName(t.schema.CollectionSchema, annsFieldName)
if queryInfo.GetGroupByFieldId() != -1 && annField.GetDataType() == schemapb.DataType_BinaryVector {
return errors.New("not support search_group_by operation based on binary vector column")
}
if err != nil {
return err
}
t.offset = offset
plan, err := planparserv2.CreateSearchPlan(t.schema.schemaHelper, t.request.Dsl, annsField, queryInfo)
plan, err := planparserv2.CreateSearchPlan(t.schema.schemaHelper, t.request.Dsl, annsFieldName, queryInfo)
if err != nil {
log.Warn("failed to create query plan", zap.Error(err),
zap.String("dsl", t.request.Dsl), // may be very large if large term passed.
zap.String("anns field", annsField), zap.Any("query info", queryInfo))
zap.String("anns field", annsFieldName), zap.Any("query info", queryInfo))
return merr.WrapErrParameterInvalidMsg("failed to create query plan: %v", err)
}
log.Debug("create query plan",
zap.String("dsl", t.request.Dsl), // may be very large if large term passed.
zap.String("anns field", annsField), zap.Any("query info", queryInfo))
zap.String("anns field", annsFieldName), zap.Any("query info", queryInfo))
if t.partitionKeyMode {
expr, err := exprutil.ParseExprFromPlan(plan)

View File

@ -1101,6 +1101,12 @@ func GetField(schema *schemapb.CollectionSchema, fieldID int64) *schemapb.FieldS
})
}
func GetFieldByName(schema *schemapb.CollectionSchema, fieldName string) *schemapb.FieldSchema {
return lo.FindOrElse(schema.GetFields(), nil, func(field *schemapb.FieldSchema) bool {
return field.GetName() == fieldName
})
}
func IsPrimaryFieldDataExist(datas []*schemapb.FieldData, primaryFieldSchema *schemapb.FieldSchema) bool {
primaryFieldID := primaryFieldSchema.FieldID
primaryFieldName := primaryFieldSchema.Name