From f6cd84161c91ef8add56ab2ffcb31e097485170b Mon Sep 17 00:00:00 2001 From: Gao Date: Mon, 8 Jul 2024 19:48:14 +0800 Subject: [PATCH] enhance: ensure autoindex default metric type compatibility (#34479) issue: #34304 pr: #34261 Signed-off-by: chasingegg --- internal/datacoord/index_meta.go | 21 ++++++++++++- internal/datacoord/index_meta_test.go | 44 +++++++++++++++++++++++++-- internal/proto/index_coord.proto | 1 + internal/proxy/task_index.go | 26 +++++++++------- internal/proxy/task_index_test.go | 7 +++++ pkg/common/common.go | 4 ++- 6 files changed, 88 insertions(+), 15 deletions(-) diff --git a/internal/datacoord/index_meta.go b/internal/datacoord/index_meta.go index 0a2f978b80..7122f87a83 100644 --- a/internal/datacoord/index_meta.go +++ b/internal/datacoord/index_meta.go @@ -187,11 +187,15 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool return false } + useAutoIndex := false userIndexParamsWithoutMmapKey := make([]*commonpb.KeyValuePair, 0) for _, param := range fieldIndex.UserIndexParams { if param.Key == common.MmapEnabledKey { continue } + if param.Key == common.IndexTypeKey && param.Value == common.AutoIndexName { + useAutoIndex = true + } userIndexParamsWithoutMmapKey = append(userIndexParamsWithoutMmapKey, param) } @@ -200,9 +204,24 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool } for _, param1 := range userIndexParamsWithoutMmapKey { exist := false - for _, param2 := range req.GetUserIndexParams() { + for i, param2 := range req.GetUserIndexParams() { if param2.Key == param1.Key && param2.Value == param1.Value { exist = true + } else if param1.Key == common.MetricTypeKey && param2.Key == param1.Key && useAutoIndex && !req.GetUserAutoindexMetricTypeSpecified() { + // when users use autoindex, metric type is the only thing they can specify + // if they do not specify metric type, will use autoindex default metric type + // when autoindex default config upgraded, remain the old metric type at the very first time for compatibility + // warn! replace request metric type + log.Warn("user not specify autoindex metric type, autoindex config has changed, use old metric for compatibility", + zap.String("old metric type", param1.Value), zap.String("new metric type", param2.Value)) + req.GetUserIndexParams()[i].Value = param1.Value + for j, param := range req.GetIndexParams() { + if param.Key == common.MetricTypeKey { + req.GetIndexParams()[j].Value = param1.Value + break + } + } + exist = true } } if !exist { diff --git a/internal/datacoord/index_meta_test.go b/internal/datacoord/index_meta_test.go index 6165a55cd6..71b8486973 100644 --- a/internal/datacoord/index_meta_test.go +++ b/internal/datacoord/index_meta_test.go @@ -95,6 +95,20 @@ func TestMeta_CanCreateIndex(t *testing.T) { Key: common.IndexTypeKey, Value: "FLAT", }, + { + Key: common.MetricTypeKey, + Value: "L2", + }, + } + userIndexParams = []*commonpb.KeyValuePair{ + { + Key: common.IndexTypeKey, + Value: common.AutoIndexName, + }, + { + Key: common.MetricTypeKey, + Value: "L2", + }, } ) @@ -114,7 +128,7 @@ func TestMeta_CanCreateIndex(t *testing.T) { IndexParams: indexParams, Timestamp: 0, IsAutoIndex: false, - UserIndexParams: indexParams, + UserIndexParams: userIndexParams, } t.Run("can create index", func(t *testing.T) { @@ -132,7 +146,7 @@ func TestMeta_CanCreateIndex(t *testing.T) { TypeParams: typeParams, IndexParams: indexParams, IsAutoIndex: false, - UserIndexParams: indexParams, + UserIndexParams: userIndexParams, } err = m.CreateIndex(index) @@ -166,6 +180,32 @@ func TestMeta_CanCreateIndex(t *testing.T) { assert.Error(t, err) assert.Equal(t, int64(0), tmpIndexID) + req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserIndexParams = req.IndexParams + tmpIndexID, err = m.CanCreateIndex(req) + assert.Error(t, err) + assert.Equal(t, int64(0), tmpIndexID) + + // when we use autoindex, it is possible autoindex changes default metric type + // if user does not specify metric type, we should follow the very first autoindex config + req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserAutoindexMetricTypeSpecified = false + tmpIndexID, err = m.CanCreateIndex(req) + assert.NoError(t, err) + assert.Equal(t, indexID, tmpIndexID) + // req should follow the meta + assert.Equal(t, "L2", req.GetUserIndexParams()[1].Value) + assert.Equal(t, "L2", req.GetIndexParams()[1].Value) + + // if autoindex specify metric type, so the index param change is from user, return error + req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserAutoindexMetricTypeSpecified = true + tmpIndexID, err = m.CanCreateIndex(req) + assert.Error(t, err) + assert.Equal(t, int64(0), tmpIndexID) + req.IndexParams = indexParams req.UserIndexParams = indexParams req.FieldID++ diff --git a/internal/proto/index_coord.proto b/internal/proto/index_coord.proto index dc789d3054..0c331f8bba 100644 --- a/internal/proto/index_coord.proto +++ b/internal/proto/index_coord.proto @@ -163,6 +163,7 @@ message CreateIndexRequest { uint64 timestamp = 6; bool is_auto_index = 7; repeated common.KeyValuePair user_index_params = 8; + bool user_autoindex_metric_type_specified = 9; } message AlterIndexRequest { diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index fe61dba536..df6da1a5f4 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -49,7 +49,7 @@ const ( GetIndexStateTaskName = "GetIndexStateTask" GetIndexBuildProgressTaskName = "GetIndexBuildProgressTask" - AutoIndexName = "AUTOINDEX" + AutoIndexName = common.AutoIndexName DimKey = common.DimKey IsSparseKey = common.IsSparseKey ) @@ -70,8 +70,9 @@ type createIndexTask struct { newTypeParams []*commonpb.KeyValuePair newExtraParams []*commonpb.KeyValuePair - collectionID UniqueID - fieldSchema *schemapb.FieldSchema + collectionID UniqueID + fieldSchema *schemapb.FieldSchema + userAutoIndexMetricTypeSpecified bool } func (cit *createIndexTask) TraceCtx() context.Context { @@ -198,6 +199,7 @@ func (cit *createIndexTask) parseIndexParams() error { if metricTypeExist { // make the users' metric type first class citizen. indexParamsMap[common.MetricTypeKey] = metricType + cit.userAutoIndexMetricTypeSpecified = true } } else { // behavior change after 2.2.9, adapt autoindex logic here. useAutoIndex := func(autoIndexConfig map[string]string) { @@ -235,6 +237,7 @@ func (cit *createIndexTask) parseIndexParams() error { useAutoIndex(autoIndexConfig) // make the users' metric type first class citizen. indexParamsMap[common.MetricTypeKey] = metricType + cit.userAutoIndexMetricTypeSpecified = true } return nil @@ -445,14 +448,15 @@ func (cit *createIndexTask) Execute(ctx context.Context) error { var err error req := &indexpb.CreateIndexRequest{ - CollectionID: cit.collectionID, - FieldID: cit.fieldSchema.GetFieldID(), - IndexName: cit.req.GetIndexName(), - TypeParams: cit.newTypeParams, - IndexParams: cit.newIndexParams, - IsAutoIndex: cit.isAutoIndex, - UserIndexParams: cit.newExtraParams, - Timestamp: cit.BeginTs(), + CollectionID: cit.collectionID, + FieldID: cit.fieldSchema.GetFieldID(), + IndexName: cit.req.GetIndexName(), + TypeParams: cit.newTypeParams, + IndexParams: cit.newIndexParams, + IsAutoIndex: cit.isAutoIndex, + UserIndexParams: cit.newExtraParams, + Timestamp: cit.BeginTs(), + UserAutoindexMetricTypeSpecified: cit.userAutoIndexMetricTypeSpecified, } cit.result, err = cit.datacoord.CreateIndex(ctx, req) if err != nil { diff --git a/internal/proxy/task_index_test.go b/internal/proxy/task_index_test.go index f5e9840453..9976ffa8fb 100644 --- a/internal/proxy/task_index_test.go +++ b/internal/proxy/task_index_test.go @@ -1006,6 +1006,7 @@ func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.True(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: "HNSW"}, {Key: common.MetricTypeKey, Value: "L2"}, @@ -1026,6 +1027,7 @@ func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.True(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: "SPARSE_INVERTED_INDEX"}, {Key: common.MetricTypeKey, Value: "IP"}, @@ -1044,6 +1046,7 @@ func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.True(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: "BIN_IVF_FLAT"}, {Key: common.MetricTypeKey, Value: "JACCARD"}, @@ -1093,6 +1096,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.False(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.MetricTypeKey, Value: autoIndexConfigBinary[common.MetricTypeKey]}, @@ -1108,6 +1112,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.False(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.MetricTypeKey, Value: autoIndexConfigSparse[common.MetricTypeKey]}, @@ -1123,6 +1128,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.False(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.MetricTypeKey, Value: autoIndexConfig[common.MetricTypeKey]}, @@ -1140,6 +1146,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.True(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.MetricTypeKey, Value: "L2"}, diff --git a/pkg/common/common.go b/pkg/common/common.go index 8aae735d6e..b0fbe73043 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -119,7 +119,9 @@ const ( DropRatioBuildKey = "drop_ratio_build" - IsSparseKey = "is_sparse" + BitmapCardinalityLimitKey = "bitmap_cardinality_limit" + IsSparseKey = "is_sparse" + AutoIndexName = "AUTOINDEX" ) // Collection properties key