From 4bda6c33add714dc7a2f0aa90467b41742bb1e49 Mon Sep 17 00:00:00 2001 From: Xiaofan <83447078+xiaofan-luan@users.noreply.github.com> Date: Mon, 4 Mar 2024 22:21:00 -0800 Subject: [PATCH] fix: binary vector should not limit dimension to 32768 (#30676) all the vector dimension check should happen on collection creation but not index build fix #30285 Signed-off-by: xiaofanluan <xiaofan.luan@zilliz.com> --- internal/core/CMakeLists.txt | 11 ++------ internal/proxy/util.go | 18 ++++++++++--- internal/proxy/util_test.go | 18 +++++++++++++ pkg/util/indexparamcheck/base_checker.go | 25 ++++++++++++++++--- pkg/util/indexparamcheck/constraints.go | 5 ---- .../indexparamcheck/ivf_pq_checker_test.go | 4 --- .../raft_ivf_pq_checker_test.go | 4 --- tests/python_client/common/common_type.py | 2 +- .../test_milvus_client_collection.py | 2 +- .../testcases/test_collection.py | 8 +++--- tests/python_client/testcases/test_search.py | 4 +-- 11 files changed, 64 insertions(+), 37 deletions(-) diff --git a/internal/core/CMakeLists.txt b/internal/core/CMakeLists.txt index 240749495a..1c22ee5120 100644 --- a/internal/core/CMakeLists.txt +++ b/internal/core/CMakeLists.txt @@ -127,6 +127,7 @@ if (LINUX OR MSYS) "-DELPP_THREAD_SAFE" "-fopenmp" "-Wno-error" + "-Wno-all" ) if (CMAKE_BUILD_TYPE STREQUAL "Release") append_flags( CMAKE_CXX_FLAGS @@ -141,17 +142,9 @@ if ( APPLE ) "-fPIC" "-DELPP_THREAD_SAFE" "-fopenmp" - "-Wno-error" - "-Wsign-compare" - "-Wall" "-pedantic" - "-Wno-unused-command-line-argument" - "-Wextra" - "-Wno-unused-parameter" - "-Wno-deprecated" + "-Wno-all" "-DBOOST_STACKTRACE_GNU_SOURCE_NOT_REQUIRED=1" - #"-fvisibility=hidden" - #"-fvisibility-inlines-hidden" ) endif () diff --git a/internal/proxy/util.go b/internal/proxy/util.go index 8a66c6474e..b9c3ea8cc4 100644 --- a/internal/proxy/util.go +++ b/internal/proxy/util.go @@ -311,11 +311,21 @@ func validateDimension(field *schemapb.FieldSchema) error { return errors.New("dimension is not defined in field type params, check type param `dim` for vector field") } - if dim <= 0 || dim > Params.ProxyCfg.MaxDimension.GetAsInt64() { - return fmt.Errorf("invalid dimension: %d. should be in range 1 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()) + if dim <= 1 { + return fmt.Errorf("invalid dimension: %d. should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()) } - if field.DataType == schemapb.DataType_BinaryVector && dim%8 != 0 { - return fmt.Errorf("invalid dimension: %d. should be multiple of 8. ", dim) + + if field.DataType != schemapb.DataType_BinaryVector { + if dim > Params.ProxyCfg.MaxDimension.GetAsInt64() { + return fmt.Errorf("invalid dimension: %d. float vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()) + } + } else { + if dim%8 != 0 { + return fmt.Errorf("invalid dimension: %d. binary vector dimension should be multiple of 8. ", dim) + } + if dim > Params.ProxyCfg.MaxDimension.GetAsInt64()*8 { + return fmt.Errorf("invalid dimension: %d. binary vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()*8) + } } return nil } diff --git a/internal/proxy/util_test.go b/internal/proxy/util_test.go index 94e15f8109..66967c8922 100644 --- a/internal/proxy/util_test.go +++ b/internal/proxy/util_test.go @@ -190,6 +190,16 @@ func TestValidateDimension(t *testing.T) { }, }, } + assert.NotNil(t, validateDimension(fieldSchema)) + fieldSchema = &schemapb.FieldSchema{ + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "2", + }, + }, + } assert.Nil(t, validateDimension(fieldSchema)) fieldSchema.TypeParams = []*commonpb.KeyValuePair{ { @@ -237,6 +247,14 @@ func TestValidateDimension(t *testing.T) { }, } assert.NotNil(t, validateDimension(fieldSchema)) + + fieldSchema.TypeParams = []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "262145", + }, + } + assert.NotNil(t, validateDimension(fieldSchema)) } func TestValidateVectorFieldMetricType(t *testing.T) { diff --git a/pkg/util/indexparamcheck/base_checker.go b/pkg/util/indexparamcheck/base_checker.go index a8c27776c7..50716a9d91 100644 --- a/pkg/util/indexparamcheck/base_checker.go +++ b/pkg/util/indexparamcheck/base_checker.go @@ -1,6 +1,25 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package indexparamcheck import ( + "fmt" + "math" + "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" @@ -9,10 +28,10 @@ import ( type baseChecker struct{} func (c baseChecker) CheckTrain(params map[string]string) error { - if !CheckIntByRange(params, DIM, DefaultMinDim, DefaultMaxDim) { - return errOutOfRange(DIM, DefaultMinDim, DefaultMaxDim) + // vector dimension should be checked on collection creation. this is just some basic check + if !CheckIntByRange(params, DIM, 1, math.MaxInt) { + return fmt.Errorf("failed to check vector dimension, should be larger than 0 and smaller than math.MaxInt") } - return nil } diff --git a/pkg/util/indexparamcheck/constraints.go b/pkg/util/indexparamcheck/constraints.go index bdc27f25f0..dd4ce2a352 100644 --- a/pkg/util/indexparamcheck/constraints.go +++ b/pkg/util/indexparamcheck/constraints.go @@ -15,11 +15,6 @@ const ( // MaxNList is the upper limit of nlist that used in Index IVFxxx MaxNList = 65536 - // DefaultMinDim is the smallest dimension supported in Milvus - DefaultMinDim = 1 - // DefaultMaxDim is the largest dimension supported in Milvus - DefaultMaxDim = 32768 - HNSWMinEfConstruction = 1 HNSWMaxEfConstruction = 2147483647 HNSWMinM = 1 diff --git a/pkg/util/indexparamcheck/ivf_pq_checker_test.go b/pkg/util/indexparamcheck/ivf_pq_checker_test.go index 8c44f22c34..d9f655f874 100644 --- a/pkg/util/indexparamcheck/ivf_pq_checker_test.go +++ b/pkg/util/indexparamcheck/ivf_pq_checker_test.go @@ -57,9 +57,6 @@ func Test_ivfPQChecker_CheckTrain(t *testing.T) { invalidParamsIVF := copyParams(validParams) invalidParamsIVF[IVFM] = "NAN" - invalidParamsM := copyParams(validParams) - invalidParamsM[DIM] = strconv.Itoa(65536) - invalidParamsMzero := copyParams(validParams) invalidParamsMzero[IVFM] = "0" @@ -128,7 +125,6 @@ func Test_ivfPQChecker_CheckTrain(t *testing.T) { {invalidParamsNbits, false}, {invalidParamsWithoutIVF, false}, {invalidParamsIVF, false}, - {invalidParamsM, false}, {invalidParamsMzero, false}, {p1, true}, {p2, true}, diff --git a/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go b/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go index c727b97dd8..5d7e431135 100644 --- a/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go +++ b/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go @@ -49,9 +49,6 @@ func Test_raftIVFPQChecker_CheckTrain(t *testing.T) { invalidParamsIVF := copyParams(validParams) invalidParamsIVF[IVFM] = "NAN" - invalidParamsM := copyParams(validParams) - invalidParamsM[DIM] = strconv.Itoa(65536) - validParamsMzero := copyParams(validParams) validParamsMzero[IVFM] = "0" @@ -135,7 +132,6 @@ func Test_raftIVFPQChecker_CheckTrain(t *testing.T) { {invalidParamsNbits, false}, {invalidParamsWithoutIVF, false}, {invalidParamsIVF, false}, - {invalidParamsM, false}, {validParamsMzero, true}, {p1, true}, {p2, true}, diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 3374046c09..fd0f7f34f3 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -65,7 +65,7 @@ float_field_desc = "float type field" float_vec_field_desc = "float vector type field" binary_vec_field_desc = "binary vector type field" max_dim = 32768 -min_dim = 1 +min_dim = 2 gracefulTime = 1 default_nlist = 128 compact_segment_num_threshold = 3 diff --git a/tests/python_client/milvus_client/test_milvus_client_collection.py b/tests/python_client/milvus_client/test_milvus_client_collection.py index 19dfd11da5..ac73b9256e 100644 --- a/tests/python_client/milvus_client/test_milvus_client_collection.py +++ b/tests/python_client/milvus_client/test_milvus_client_collection.py @@ -120,7 +120,7 @@ class TestMilvusClientCollectionInvalid(TestcaseBase): client = self._connect(enable_milvus_client_api=True) collection_name = cf.gen_unique_str(prefix) # 1. create collection - error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. should be in range 1 ~ 32768"} + error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. should be in range 2 ~ 32768"} client_w.create_collection(client, collection_name, dim, check_task=CheckTasks.err_res, check_items=error) client_w.drop_collection(client, collection_name) diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 5473f40c68..21495eb7bf 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -1678,7 +1678,7 @@ class TestCollectionCountBinary(TestcaseBase): @pytest.fixture( scope="function", params=[ - 1, + 8, 1000, 2001 ], @@ -1711,12 +1711,12 @@ class TestCollectionCountBinary(TestcaseBase): expected: check error message successfully """ self._connect() - dim = 1 + dim = 2 c_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim) collection_w = self.init_collection_wrap(schema=c_schema, check_task=CheckTasks.err_res, check_items={"err_code": 1, - "err_msg": f"invalid dimension: {dim}. should be multiple of 8."}) + "err_msg": f"invalid dimension: {dim}. binary vector dimension should be multiple of 8."}) @pytest.mark.tags(CaseLabel.L2) def test_collection_count_no_entities(self): @@ -4336,7 +4336,7 @@ class TestCollectionMultipleVectorValid(TestcaseBase): """ self._connect() c_name = cf.gen_unique_str(prefix) - another_dim = 1 + another_dim = 2 schema = cf.gen_default_collection_schema(primary_field=primary_key, auto_id=auto_id, dim=ct.max_dim, enable_dynamic_field=enable_dynamic_field, multiple_dim_array=[another_dim]) diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index c181e23507..93389d7cd2 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -473,7 +473,7 @@ class TestCollectionSearchInvalid(TestcaseBase): """ # 1. create a collection nb = 1 - dim = 1 + dim = 2 fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"), cf.gen_float_vec_field(dim=dim)] schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1") @@ -3402,7 +3402,7 @@ class TestCollectionSearch(TestcaseBase): """ # 1. create a collection nb = 10 - dim = 1 + dim = 2 fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"), cf.gen_float_vec_field(dim=dim)] schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")