From 0a122533d0e615c0c5d360d873160d0cde6dc694 Mon Sep 17 00:00:00 2001 From: Gao Date: Fri, 2 Aug 2024 16:22:20 +0800 Subject: [PATCH] enhance: change autoindex default metric type (#34328) issue: #34304 pr: #34261 Signed-off-by: chasingegg --- configs/milvus.yaml | 2 +- internal/datacoord/index_meta.go | 24 ++++++++++- internal/datacoord/index_meta_test.go | 44 ++++++++++++++++++++- internal/proto/index_coord.proto | 1 + internal/proxy/task_index.go | 26 ++++++------ internal/proxy/task_index_test.go | 6 +++ pkg/common/common.go | 1 + pkg/util/indexparamcheck/constraints.go | 4 +- pkg/util/paramtable/autoindex_param.go | 2 +- tests/python_client/testcases/test_index.py | 2 +- 10 files changed, 93 insertions(+), 19 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 6567cd92aa..563ce68b6a 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -750,4 +750,4 @@ trace: autoIndex: params: - build: '{"M": 18,"efConstruction": 240,"index_type": "HNSW", "metric_type": "IP"}' + build: '{"M": 18,"efConstruction": 240,"index_type": "HNSW", "metric_type": "COSINE"}' diff --git a/internal/datacoord/index_meta.go b/internal/datacoord/index_meta.go index e8d9bcbe79..fc90c4f15a 100644 --- a/internal/datacoord/index_meta.go +++ b/internal/datacoord/index_meta.go @@ -185,14 +185,36 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool if notEq { return false } + useAutoIndex := false + for _, param := range fieldIndex.UserIndexParams { + if param.Key == common.IndexTypeKey && param.Value == common.AutoIndexName { + useAutoIndex = true + break + } + } if len(fieldIndex.UserIndexParams) != len(req.GetUserIndexParams()) { return false } for _, param1 := range fieldIndex.UserIndexParams { exist := false - for _, param2 := range req.GetUserIndexParams() { + for i, param2 := range req.GetUserIndexParams() { if param2.Key == param1.Key && param2.Value == param1.Value { exist = true + } else if param1.Key == common.MetricTypeKey && param2.Key == param1.Key && useAutoIndex && !req.GetUserAutoindexMetricTypeSpecified() { + // when users use autoindex, metric type is the only thing they can specify + // if they do not specify metric type, will use autoindex default metric type + // when autoindex default config upgraded, remain the old metric type at the very first time for compatibility + // warn! replace request metric type + log.Warn("user not specify autoindex metric type, autoindex config has changed, use old metric for compatibility", + zap.String("old metric type", param1.Value), zap.String("new metric type", param2.Value)) + req.GetUserIndexParams()[i].Value = param1.Value + for j, param := range req.GetIndexParams() { + if param.Key == common.MetricTypeKey { + req.GetIndexParams()[j].Value = param1.Value + break + } + } + exist = true } } if !exist { diff --git a/internal/datacoord/index_meta_test.go b/internal/datacoord/index_meta_test.go index 2fe0094e55..79dc93fea3 100644 --- a/internal/datacoord/index_meta_test.go +++ b/internal/datacoord/index_meta_test.go @@ -95,6 +95,20 @@ func TestMeta_CanCreateIndex(t *testing.T) { Key: common.IndexTypeKey, Value: "FLAT", }, + { + Key: common.MetricTypeKey, + Value: "L2", + }, + } + userIndexParams = []*commonpb.KeyValuePair{ + { + Key: common.IndexTypeKey, + Value: common.AutoIndexName, + }, + { + Key: common.MetricTypeKey, + Value: "L2", + }, } ) @@ -114,7 +128,7 @@ func TestMeta_CanCreateIndex(t *testing.T) { IndexParams: indexParams, Timestamp: 0, IsAutoIndex: false, - UserIndexParams: indexParams, + UserIndexParams: userIndexParams, } t.Run("can create index", func(t *testing.T) { @@ -132,7 +146,7 @@ func TestMeta_CanCreateIndex(t *testing.T) { TypeParams: typeParams, IndexParams: indexParams, IsAutoIndex: false, - UserIndexParams: indexParams, + UserIndexParams: userIndexParams, } err = m.CreateIndex(index) @@ -166,6 +180,32 @@ func TestMeta_CanCreateIndex(t *testing.T) { assert.Error(t, err) assert.Equal(t, int64(0), tmpIndexID) + req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserIndexParams = req.IndexParams + tmpIndexID, err = m.CanCreateIndex(req) + assert.Error(t, err) + assert.Equal(t, int64(0), tmpIndexID) + + // when we use autoindex, it is possible autoindex changes default metric type + // if user does not specify metric type, we should follow the very first autoindex config + req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserAutoindexMetricTypeSpecified = false + tmpIndexID, err = m.CanCreateIndex(req) + assert.NoError(t, err) + assert.Equal(t, indexID, tmpIndexID) + // req should follow the meta + assert.Equal(t, "L2", req.GetUserIndexParams()[1].Value) + assert.Equal(t, "L2", req.GetIndexParams()[1].Value) + + // if autoindex specify metric type, so the index param change is from user, return error + req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}} + req.UserAutoindexMetricTypeSpecified = true + tmpIndexID, err = m.CanCreateIndex(req) + assert.Error(t, err) + assert.Equal(t, int64(0), tmpIndexID) + req.IndexParams = indexParams req.UserIndexParams = indexParams req.FieldID++ diff --git a/internal/proto/index_coord.proto b/internal/proto/index_coord.proto index 04c89bd6c6..a2cc2baaad 100644 --- a/internal/proto/index_coord.proto +++ b/internal/proto/index_coord.proto @@ -134,6 +134,7 @@ message CreateIndexRequest { uint64 timestamp = 6; bool is_auto_index = 7; repeated common.KeyValuePair user_index_params = 8; + bool user_autoindex_metric_type_specified = 9; } message GetIndexInfoRequest { diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 541765d0b0..868075f576 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -47,7 +47,7 @@ const ( GetIndexStateTaskName = "GetIndexStateTask" GetIndexBuildProgressTaskName = "GetIndexBuildProgressTask" - AutoIndexName = "AUTOINDEX" + AutoIndexName = common.AutoIndexName DimKey = common.DimKey ) @@ -67,8 +67,9 @@ type createIndexTask struct { newTypeParams []*commonpb.KeyValuePair newExtraParams []*commonpb.KeyValuePair - collectionID UniqueID - fieldSchema *schemapb.FieldSchema + collectionID UniqueID + fieldSchema *schemapb.FieldSchema + userAutoIndexMetricTypeSpecified bool } func (cit *createIndexTask) TraceCtx() context.Context { @@ -186,6 +187,7 @@ func (cit *createIndexTask) parseIndexParams() error { if metricTypeExist { // make the users' metric type first class citizen. indexParamsMap[common.MetricTypeKey] = metricType + cit.userAutoIndexMetricTypeSpecified = true } } else { // behavior change after 2.2.9, adapt autoindex logic here. autoIndexConfig := Params.AutoIndexConfig.IndexParams.GetAsJSONMap() @@ -225,6 +227,7 @@ func (cit *createIndexTask) parseIndexParams() error { useAutoIndex() // make the users' metric type first class citizen. indexParamsMap[common.MetricTypeKey] = metricType + cit.userAutoIndexMetricTypeSpecified = true } return nil @@ -398,14 +401,15 @@ func (cit *createIndexTask) Execute(ctx context.Context) error { var err error req := &indexpb.CreateIndexRequest{ - CollectionID: cit.collectionID, - FieldID: cit.fieldSchema.GetFieldID(), - IndexName: cit.req.GetIndexName(), - TypeParams: cit.newTypeParams, - IndexParams: cit.newIndexParams, - IsAutoIndex: cit.isAutoIndex, - UserIndexParams: cit.newExtraParams, - Timestamp: cit.BeginTs(), + CollectionID: cit.collectionID, + FieldID: cit.fieldSchema.GetFieldID(), + IndexName: cit.req.GetIndexName(), + TypeParams: cit.newTypeParams, + IndexParams: cit.newIndexParams, + IsAutoIndex: cit.isAutoIndex, + UserIndexParams: cit.newExtraParams, + Timestamp: cit.BeginTs(), + UserAutoindexMetricTypeSpecified: cit.userAutoIndexMetricTypeSpecified, } cit.result, err = cit.datacoord.CreateIndex(ctx, req) if err != nil { diff --git a/internal/proxy/task_index_test.go b/internal/proxy/task_index_test.go index 68ae999c9c..ac59ff062b 100644 --- a/internal/proxy/task_index_test.go +++ b/internal/proxy/task_index_test.go @@ -772,6 +772,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.False(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.MetricTypeKey, Value: autoIndexConfig[common.MetricTypeKey]}, @@ -789,6 +790,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.True(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.MetricTypeKey, Value: "L2"}, @@ -807,6 +809,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { } err := task.parseIndexParams() assert.NoError(t, err) + assert.True(t, task.userAutoIndexMetricTypeSpecified) assert.ElementsMatch(t, []*commonpb.KeyValuePair{ {Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.MetricTypeKey, Value: "L2"}, @@ -823,6 +826,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { }, } err := task.parseIndexParams() + assert.False(t, task.userAutoIndexMetricTypeSpecified) assert.Error(t, err) }) @@ -837,6 +841,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { }, } err := task.parseIndexParams() + assert.False(t, task.userAutoIndexMetricTypeSpecified) assert.Error(t, err) }) @@ -852,6 +857,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) { }, } err := task.parseIndexParams() + assert.False(t, task.userAutoIndexMetricTypeSpecified) assert.Error(t, err) }) } diff --git a/pkg/common/common.go b/pkg/common/common.go index 1ec3bfe3bd..84d59f1851 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -95,6 +95,7 @@ const ( DimKey = "dim" MaxLengthKey = "max_length" MaxCapacityKey = "max_capacity" + AutoIndexName = "AUTOINDEX" ) // Collection properties key diff --git a/pkg/util/indexparamcheck/constraints.go b/pkg/util/indexparamcheck/constraints.go index 44b6202181..57a0c5e5fe 100644 --- a/pkg/util/indexparamcheck/constraints.go +++ b/pkg/util/indexparamcheck/constraints.go @@ -51,6 +51,6 @@ var ( ) const ( - FloatVectorDefaultMetricType = metric.IP - BinaryVectorDefaultMetricType = metric.JACCARD + FloatVectorDefaultMetricType = metric.COSINE + BinaryVectorDefaultMetricType = metric.HAMMING ) diff --git a/pkg/util/paramtable/autoindex_param.go b/pkg/util/paramtable/autoindex_param.go index 70d355381a..98190004d5 100644 --- a/pkg/util/paramtable/autoindex_param.go +++ b/pkg/util/paramtable/autoindex_param.go @@ -51,7 +51,7 @@ func (p *autoIndexConfig) init(base *BaseTable) { p.IndexParams = ParamItem{ Key: "autoIndex.params.build", Version: "2.2.0", - DefaultValue: `{"M": 18,"efConstruction": 240,"index_type": "HNSW", "metric_type": "IP"}`, + DefaultValue: `{"M": 18,"efConstruction": 240,"index_type": "HNSW", "metric_type": "COSINE"}`, } p.IndexParams.Init(base.mgr) diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 5b64190811..b35c807206 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -20,7 +20,7 @@ prefix = "index" default_schema = cf.gen_default_collection_schema() default_field_name = ct.default_float_vec_field_name default_index_params = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}} -default_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "IP"} +default_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "COSINE"} # copied from pymilvus uid = "test_index"