mirror of https://github.com/milvus-io/milvus.git
fix: binary vector should not limit dimension to 32768 (#30676)
all the vector dimension check should happen on collection creation but not index build fix #30285 Signed-off-by: xiaofanluan <xiaofan.luan@zilliz.com>pull/31000/head
parent
1936aa4caa
commit
4bda6c33ad
|
@ -127,6 +127,7 @@ if (LINUX OR MSYS)
|
|||
"-DELPP_THREAD_SAFE"
|
||||
"-fopenmp"
|
||||
"-Wno-error"
|
||||
"-Wno-all"
|
||||
)
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Release")
|
||||
append_flags( CMAKE_CXX_FLAGS
|
||||
|
@ -141,17 +142,9 @@ if ( APPLE )
|
|||
"-fPIC"
|
||||
"-DELPP_THREAD_SAFE"
|
||||
"-fopenmp"
|
||||
"-Wno-error"
|
||||
"-Wsign-compare"
|
||||
"-Wall"
|
||||
"-pedantic"
|
||||
"-Wno-unused-command-line-argument"
|
||||
"-Wextra"
|
||||
"-Wno-unused-parameter"
|
||||
"-Wno-deprecated"
|
||||
"-Wno-all"
|
||||
"-DBOOST_STACKTRACE_GNU_SOURCE_NOT_REQUIRED=1"
|
||||
#"-fvisibility=hidden"
|
||||
#"-fvisibility-inlines-hidden"
|
||||
)
|
||||
endif ()
|
||||
|
||||
|
|
|
@ -311,11 +311,21 @@ func validateDimension(field *schemapb.FieldSchema) error {
|
|||
return errors.New("dimension is not defined in field type params, check type param `dim` for vector field")
|
||||
}
|
||||
|
||||
if dim <= 0 || dim > Params.ProxyCfg.MaxDimension.GetAsInt64() {
|
||||
return fmt.Errorf("invalid dimension: %d. should be in range 1 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt())
|
||||
if dim <= 1 {
|
||||
return fmt.Errorf("invalid dimension: %d. should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt())
|
||||
}
|
||||
if field.DataType == schemapb.DataType_BinaryVector && dim%8 != 0 {
|
||||
return fmt.Errorf("invalid dimension: %d. should be multiple of 8. ", dim)
|
||||
|
||||
if field.DataType != schemapb.DataType_BinaryVector {
|
||||
if dim > Params.ProxyCfg.MaxDimension.GetAsInt64() {
|
||||
return fmt.Errorf("invalid dimension: %d. float vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt())
|
||||
}
|
||||
} else {
|
||||
if dim%8 != 0 {
|
||||
return fmt.Errorf("invalid dimension: %d. binary vector dimension should be multiple of 8. ", dim)
|
||||
}
|
||||
if dim > Params.ProxyCfg.MaxDimension.GetAsInt64()*8 {
|
||||
return fmt.Errorf("invalid dimension: %d. binary vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()*8)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -190,6 +190,16 @@ func TestValidateDimension(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
assert.NotNil(t, validateDimension(fieldSchema))
|
||||
fieldSchema = &schemapb.FieldSchema{
|
||||
DataType: schemapb.DataType_FloatVector,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{
|
||||
Key: common.DimKey,
|
||||
Value: "2",
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Nil(t, validateDimension(fieldSchema))
|
||||
fieldSchema.TypeParams = []*commonpb.KeyValuePair{
|
||||
{
|
||||
|
@ -237,6 +247,14 @@ func TestValidateDimension(t *testing.T) {
|
|||
},
|
||||
}
|
||||
assert.NotNil(t, validateDimension(fieldSchema))
|
||||
|
||||
fieldSchema.TypeParams = []*commonpb.KeyValuePair{
|
||||
{
|
||||
Key: common.DimKey,
|
||||
Value: "262145",
|
||||
},
|
||||
}
|
||||
assert.NotNil(t, validateDimension(fieldSchema))
|
||||
}
|
||||
|
||||
func TestValidateVectorFieldMetricType(t *testing.T) {
|
||||
|
|
|
@ -1,6 +1,25 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package indexparamcheck
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
|
@ -9,10 +28,10 @@ import (
|
|||
type baseChecker struct{}
|
||||
|
||||
func (c baseChecker) CheckTrain(params map[string]string) error {
|
||||
if !CheckIntByRange(params, DIM, DefaultMinDim, DefaultMaxDim) {
|
||||
return errOutOfRange(DIM, DefaultMinDim, DefaultMaxDim)
|
||||
// vector dimension should be checked on collection creation. this is just some basic check
|
||||
if !CheckIntByRange(params, DIM, 1, math.MaxInt) {
|
||||
return fmt.Errorf("failed to check vector dimension, should be larger than 0 and smaller than math.MaxInt")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -15,11 +15,6 @@ const (
|
|||
// MaxNList is the upper limit of nlist that used in Index IVFxxx
|
||||
MaxNList = 65536
|
||||
|
||||
// DefaultMinDim is the smallest dimension supported in Milvus
|
||||
DefaultMinDim = 1
|
||||
// DefaultMaxDim is the largest dimension supported in Milvus
|
||||
DefaultMaxDim = 32768
|
||||
|
||||
HNSWMinEfConstruction = 1
|
||||
HNSWMaxEfConstruction = 2147483647
|
||||
HNSWMinM = 1
|
||||
|
|
|
@ -57,9 +57,6 @@ func Test_ivfPQChecker_CheckTrain(t *testing.T) {
|
|||
invalidParamsIVF := copyParams(validParams)
|
||||
invalidParamsIVF[IVFM] = "NAN"
|
||||
|
||||
invalidParamsM := copyParams(validParams)
|
||||
invalidParamsM[DIM] = strconv.Itoa(65536)
|
||||
|
||||
invalidParamsMzero := copyParams(validParams)
|
||||
invalidParamsMzero[IVFM] = "0"
|
||||
|
||||
|
@ -128,7 +125,6 @@ func Test_ivfPQChecker_CheckTrain(t *testing.T) {
|
|||
{invalidParamsNbits, false},
|
||||
{invalidParamsWithoutIVF, false},
|
||||
{invalidParamsIVF, false},
|
||||
{invalidParamsM, false},
|
||||
{invalidParamsMzero, false},
|
||||
{p1, true},
|
||||
{p2, true},
|
||||
|
|
|
@ -49,9 +49,6 @@ func Test_raftIVFPQChecker_CheckTrain(t *testing.T) {
|
|||
invalidParamsIVF := copyParams(validParams)
|
||||
invalidParamsIVF[IVFM] = "NAN"
|
||||
|
||||
invalidParamsM := copyParams(validParams)
|
||||
invalidParamsM[DIM] = strconv.Itoa(65536)
|
||||
|
||||
validParamsMzero := copyParams(validParams)
|
||||
validParamsMzero[IVFM] = "0"
|
||||
|
||||
|
@ -135,7 +132,6 @@ func Test_raftIVFPQChecker_CheckTrain(t *testing.T) {
|
|||
{invalidParamsNbits, false},
|
||||
{invalidParamsWithoutIVF, false},
|
||||
{invalidParamsIVF, false},
|
||||
{invalidParamsM, false},
|
||||
{validParamsMzero, true},
|
||||
{p1, true},
|
||||
{p2, true},
|
||||
|
|
|
@ -65,7 +65,7 @@ float_field_desc = "float type field"
|
|||
float_vec_field_desc = "float vector type field"
|
||||
binary_vec_field_desc = "binary vector type field"
|
||||
max_dim = 32768
|
||||
min_dim = 1
|
||||
min_dim = 2
|
||||
gracefulTime = 1
|
||||
default_nlist = 128
|
||||
compact_segment_num_threshold = 3
|
||||
|
|
|
@ -120,7 +120,7 @@ class TestMilvusClientCollectionInvalid(TestcaseBase):
|
|||
client = self._connect(enable_milvus_client_api=True)
|
||||
collection_name = cf.gen_unique_str(prefix)
|
||||
# 1. create collection
|
||||
error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. should be in range 1 ~ 32768"}
|
||||
error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. should be in range 2 ~ 32768"}
|
||||
client_w.create_collection(client, collection_name, dim,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
client_w.drop_collection(client, collection_name)
|
||||
|
|
|
@ -1678,7 +1678,7 @@ class TestCollectionCountBinary(TestcaseBase):
|
|||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=[
|
||||
1,
|
||||
8,
|
||||
1000,
|
||||
2001
|
||||
],
|
||||
|
@ -1711,12 +1711,12 @@ class TestCollectionCountBinary(TestcaseBase):
|
|||
expected: check error message successfully
|
||||
"""
|
||||
self._connect()
|
||||
dim = 1
|
||||
dim = 2
|
||||
c_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim)
|
||||
collection_w = self.init_collection_wrap(schema=c_schema,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1,
|
||||
"err_msg": f"invalid dimension: {dim}. should be multiple of 8."})
|
||||
"err_msg": f"invalid dimension: {dim}. binary vector dimension should be multiple of 8."})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_collection_count_no_entities(self):
|
||||
|
@ -4336,7 +4336,7 @@ class TestCollectionMultipleVectorValid(TestcaseBase):
|
|||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
another_dim = 1
|
||||
another_dim = 2
|
||||
schema = cf.gen_default_collection_schema(primary_field=primary_key, auto_id=auto_id, dim=ct.max_dim,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
multiple_dim_array=[another_dim])
|
||||
|
|
|
@ -473,7 +473,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
|
|||
"""
|
||||
# 1. create a collection
|
||||
nb = 1
|
||||
dim = 1
|
||||
dim = 2
|
||||
fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
|
||||
cf.gen_float_vec_field(dim=dim)]
|
||||
schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")
|
||||
|
@ -3402,7 +3402,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||
"""
|
||||
# 1. create a collection
|
||||
nb = 10
|
||||
dim = 1
|
||||
dim = 2
|
||||
fields = [cf.gen_int64_field("int64_1"), cf.gen_int64_field("int64_2"),
|
||||
cf.gen_float_vec_field(dim=dim)]
|
||||
schema = cf.gen_collection_schema(fields=fields, primary_field="int64_1")
|
||||
|
|
Loading…
Reference in New Issue