From 4bda6c33add714dc7a2f0aa90467b41742bb1e49 Mon Sep 17 00:00:00 2001
From: Xiaofan <83447078+xiaofan-luan@users.noreply.github.com>
Date: Mon, 4 Mar 2024 22:21:00 -0800
Subject: [PATCH] fix: binary vector should not limit dimension to 32768
 (#30676)

all the vector dimension check should happen on collection creation but
not index build
fix #30285

Signed-off-by: xiaofanluan <xiaofan.luan@zilliz.com>
---
 internal/core/CMakeLists.txt                  | 11 ++------
 internal/proxy/util.go                        | 18 ++++++++++---
 internal/proxy/util_test.go                   | 18 +++++++++++++
 pkg/util/indexparamcheck/base_checker.go      | 25 ++++++++++++++++---
 pkg/util/indexparamcheck/constraints.go       |  5 ----
 .../indexparamcheck/ivf_pq_checker_test.go    |  4 ---
 .../raft_ivf_pq_checker_test.go               |  4 ---
 tests/python_client/common/common_type.py     |  2 +-
 .../test_milvus_client_collection.py          |  2 +-
 .../testcases/test_collection.py              |  8 +++---
 tests/python_client/testcases/test_search.py  |  4 +--
 11 files changed, 64 insertions(+), 37 deletions(-)

diff --git a/internal/core/CMakeLists.txt b/internal/core/CMakeLists.txt
index 240749495a..1c22ee5120 100644
--- a/internal/core/CMakeLists.txt
+++ b/internal/core/CMakeLists.txt
@@ -127,6 +127,7 @@ if (LINUX OR MSYS)
                   "-DELPP_THREAD_SAFE"
                   "-fopenmp"
                   "-Wno-error"
+                  "-Wno-all"
                   )
     if (CMAKE_BUILD_TYPE STREQUAL "Release")
         append_flags( CMAKE_CXX_FLAGS
@@ -141,17 +142,9 @@ if ( APPLE )
             "-fPIC"
             "-DELPP_THREAD_SAFE"
             "-fopenmp"
-            "-Wno-error"
-            "-Wsign-compare"
-            "-Wall"
             "-pedantic"
-            "-Wno-unused-command-line-argument"
-            "-Wextra"
-            "-Wno-unused-parameter"
-            "-Wno-deprecated"
+            "-Wno-all"
             "-DBOOST_STACKTRACE_GNU_SOURCE_NOT_REQUIRED=1"
-            #"-fvisibility=hidden"
-            #"-fvisibility-inlines-hidden"
             )
 endif ()
 
diff --git a/internal/proxy/util.go b/internal/proxy/util.go
index 8a66c6474e..b9c3ea8cc4 100644
--- a/internal/proxy/util.go
+++ b/internal/proxy/util.go
@@ -311,11 +311,21 @@ func validateDimension(field *schemapb.FieldSchema) error {
 		return errors.New("dimension is not defined in field type params, check type param `dim` for vector field")
 	}
 
-	if dim <= 0 || dim > Params.ProxyCfg.MaxDimension.GetAsInt64() {
-		return fmt.Errorf("invalid dimension: %d. should be in range 1 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt())
+	if dim <= 1 {
+		return fmt.Errorf("invalid dimension: %d. should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt())
 	}
-	if field.DataType == schemapb.DataType_BinaryVector && dim%8 != 0 {
-		return fmt.Errorf("invalid dimension: %d. should be multiple of 8. ", dim)
+
+	if field.DataType != schemapb.DataType_BinaryVector {
+		if dim > Params.ProxyCfg.MaxDimension.GetAsInt64() {
+			return fmt.Errorf("invalid dimension: %d. float vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt())
+		}
+	} else {
+		if dim%8 != 0 {
+			return fmt.Errorf("invalid dimension: %d. binary vector dimension should be multiple of 8. ", dim)
+		}
+		if dim > Params.ProxyCfg.MaxDimension.GetAsInt64()*8 {
+			return fmt.Errorf("invalid dimension: %d. binary vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()*8)
+		}
 	}
 	return nil
 }
diff --git a/internal/proxy/util_test.go b/internal/proxy/util_test.go
index 94e15f8109..66967c8922 100644
--- a/internal/proxy/util_test.go
+++ b/internal/proxy/util_test.go
@@ -190,6 +190,16 @@ func TestValidateDimension(t *testing.T) {
 			},
 		},
 	}
+	assert.NotNil(t, validateDimension(fieldSchema))
+	fieldSchema = &schemapb.FieldSchema{
+		DataType: schemapb.DataType_FloatVector,
+		TypeParams: []*commonpb.KeyValuePair{
+			{
+				Key:   common.DimKey,
+				Value: "2",
+			},
+		},
+	}
 	assert.Nil(t, validateDimension(fieldSchema))
 	fieldSchema.TypeParams = []*commonpb.KeyValuePair{
 		{
@@ -237,6 +247,14 @@ func TestValidateDimension(t *testing.T) {
 		},
 	}
 	assert.NotNil(t, validateDimension(fieldSchema))
+
+	fieldSchema.TypeParams = []*commonpb.KeyValuePair{
+		{
+			Key:   common.DimKey,
+			Value: "262145",
+		},
+	}
+	assert.NotNil(t, validateDimension(fieldSchema))
 }
 
 func TestValidateVectorFieldMetricType(t *testing.T) {
diff --git a/pkg/util/indexparamcheck/base_checker.go b/pkg/util/indexparamcheck/base_checker.go
index a8c27776c7..50716a9d91 100644
--- a/pkg/util/indexparamcheck/base_checker.go
+++ b/pkg/util/indexparamcheck/base_checker.go
@@ -1,6 +1,25 @@
+// Licensed to the LF AI & Data foundation under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package indexparamcheck
 
 import (
+	"fmt"
+	"math"
+
 	"github.com/cockroachdb/errors"
 
 	"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
@@ -9,10 +28,10 @@ import (
 type baseChecker struct{}
 
 func (c baseChecker) CheckTrain(params map[string]string) error {
-	if !CheckIntByRange(params, DIM, DefaultMinDim, DefaultMaxDim) {
-		return errOutOfRange(DIM, DefaultMinDim, DefaultMaxDim)
+	// vector dimension should be checked on collection creation. this is just some basic check
+	if !CheckIntByRange(params, DIM, 1, math.MaxInt) {
+		return fmt.Errorf("failed to check vector dimension, should be larger than 0 and smaller than math.MaxInt")
 	}
-
 	return nil
 }
 
diff --git a/pkg/util/indexparamcheck/constraints.go b/pkg/util/indexparamcheck/constraints.go
index bdc27f25f0..dd4ce2a352 100644
--- a/pkg/util/indexparamcheck/constraints.go
+++ b/pkg/util/indexparamcheck/constraints.go
@@ -15,11 +15,6 @@ const (
 	// MaxNList is the upper limit of nlist that used in Index IVFxxx
 	MaxNList = 65536
 
-	// DefaultMinDim is the smallest dimension supported in Milvus
-	DefaultMinDim = 1
-	// DefaultMaxDim is the largest dimension supported in Milvus
-	DefaultMaxDim = 32768
-
 	HNSWMinEfConstruction = 1
 	HNSWMaxEfConstruction = 2147483647
 	HNSWMinM              = 1
diff --git a/pkg/util/indexparamcheck/ivf_pq_checker_test.go b/pkg/util/indexparamcheck/ivf_pq_checker_test.go
index 8c44f22c34..d9f655f874 100644
--- a/pkg/util/indexparamcheck/ivf_pq_checker_test.go
+++ b/pkg/util/indexparamcheck/ivf_pq_checker_test.go
@@ -57,9 +57,6 @@ func Test_ivfPQChecker_CheckTrain(t *testing.T) {
 	invalidParamsIVF := copyParams(validParams)
 	invalidParamsIVF[IVFM] = "NAN"
 
-	invalidParamsM := copyParams(validParams)
-	invalidParamsM[DIM] = strconv.Itoa(65536)
-
 	invalidParamsMzero := copyParams(validParams)
 	invalidParamsMzero[IVFM] = "0"
 
@@ -128,7 +125,6 @@ func Test_ivfPQChecker_CheckTrain(t *testing.T) {
 		{invalidParamsNbits, false},
 		{invalidParamsWithoutIVF, false},
 		{invalidParamsIVF, false},
-		{invalidParamsM, false},
 		{invalidParamsMzero, false},
 		{p1, true},
 		{p2, true},
diff --git a/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go b/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go
index c727b97dd8..5d7e431135 100644
--- a/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go
+++ b/pkg/util/indexparamcheck/raft_ivf_pq_checker_test.go
@@ -49,9 +49,6 @@ func Test_raftIVFPQChecker_CheckTrain(t *testing.T) {
 	invalidParamsIVF := copyParams(validParams)
 	invalidParamsIVF[IVFM] = "NAN"
 
-	invalidParamsM := copyParams(validParams)
-	invalidParamsM[DIM] = strconv.Itoa(65536)
-
 	validParamsMzero := copyParams(validParams)
 	validParamsMzero[IVFM] = "0"
 
@@ -135,7 +132,6 @@ func Test_raftIVFPQChecker_CheckTrain(t *testing.T) {
 		{invalidParamsNbits, false},
 		{invalidParamsWithoutIVF, false},
 		{invalidParamsIVF, false},
-		{invalidParamsM, false},
 		{validParamsMzero, true},
 		{p1, true},
 		{p2, true},
diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py
index 3374046c09..fd0f7f34f3 100644
--- a/tests/python_client/common/common_type.py
+++ b/tests/python_client/common/common_type.py
@@ -65,7 +65,7 @@ float_field_desc = "float type field"
 float_vec_field_desc = "float vector type field"
 binary_vec_field_desc = "binary vector type field"
 max_dim = 32768
-min_dim = 1
+min_dim = 2
 gracefulTime = 1
 default_nlist = 128
 compact_segment_num_threshold = 3
diff --git a/tests/python_client/milvus_client/test_milvus_client_collection.py b/tests/python_client/milvus_client/test_milvus_client_collection.py
index 19dfd11da5..ac73b9256e 100644
--- a/tests/python_client/milvus_client/test_milvus_client_collection.py
+++ b/tests/python_client/milvus_client/test_milvus_client_collection.py
@@ -120,7 +120,7 @@ class TestMilvusClientCollectionInvalid(TestcaseBase):
         client = self._connect(enable_milvus_client_api=True)
         collection_name = cf.gen_unique_str(prefix)
         # 1. create collection
-        error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. should be in range 1 ~ 32768"}
+        error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. should be in range 2 ~ 32768"}
         client_w.create_collection(client, collection_name, dim,
                                    check_task=CheckTasks.err_res, check_items=error)
         client_w.drop_collection(client, collection_name)
diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py
index 5473f40c68..21495eb7bf 100644
--- a/tests/python_client/testcases/test_collection.py
+++ b/tests/python_client/testcases/test_collection.py
@@ -1678,7 +1678,7 @@ class TestCollectionCountBinary(TestcaseBase):
     @pytest.fixture(
         scope="function",
         params=[
-            1,
+            8,
             1000,
             2001
         ],
@@ -1711,12 +1711,12 @@ class TestCollectionCountBinary(TestcaseBase):
         expected: check error message successfully
         """
         self._connect()
-        dim = 1
+        dim = 2
         c_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim)
         collection_w = self.init_collection_wrap(schema=c_schema,
                                                  check_task=CheckTasks.err_res,
                                                  check_items={"err_code": 1,
-                                                              "err_msg": f"invalid dimension: {dim}. should be multiple of 8."})
+                                                              "err_msg": f"invalid dimension: {dim}. binary vector dimension should be multiple of 8."})
 
     @pytest.mark.tags(CaseLabel.L2)
     def test_collection_count_no_entities(self):
@@ -4336,7 +4336,7 @@ class TestCollectionMultipleVectorValid(TestcaseBase):
         """
         self._connect()
         c_name = cf.gen_unique_str(prefix)
-        another_dim = 1
+        another_dim = 2
         schema = cf.gen_default_collection_schema(primary_field=primary_key, auto_id=auto_id, dim=ct.max_dim,
                                                   enable_dynamic_field=enable_dynamic_field,
                                                   multiple_dim_array=[another_dim])
diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py
index c181e23507..93389d7cd2 100644
--- a/tests/python_client/testcases/test_search.py
+++ b/tests/python_client/testcases/test_search.py
@@ -473,7 +473,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
         """
         # 1. create a collection
         nb = 1
-        dim = 1
+        dim = 2
         fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
                   cf.gen_float_vec_field(dim=dim)]
         schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")
@@ -3402,7 +3402,7 @@ class TestCollectionSearch(TestcaseBase):
         """
         # 1. create a collection
         nb = 10
-        dim = 1
+        dim = 2
         fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
                   cf.gen_float_vec_field(dim=dim)]
         schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")