// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package importutil import ( "context" "encoding/json" "math" "testing" "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/commonpb" "github.com/milvus-io/milvus-proto/go-api/schemapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/stretchr/testify/assert" ) // sampleSchema() return a schema contains all supported data types with an int64 primary key func sampleSchema() *schemapb.CollectionSchema { schema := &schemapb.CollectionSchema{ Name: "schema", Description: "schema", AutoID: true, Fields: []*schemapb.FieldSchema{ { FieldID: 102, Name: "FieldBool", IsPrimaryKey: false, Description: "bool", DataType: schemapb.DataType_Bool, }, { FieldID: 103, Name: "FieldInt8", IsPrimaryKey: false, Description: "int8", DataType: schemapb.DataType_Int8, }, { FieldID: 104, Name: "FieldInt16", IsPrimaryKey: false, Description: "int16", DataType: schemapb.DataType_Int16, }, { FieldID: 105, Name: "FieldInt32", IsPrimaryKey: false, Description: "int32", DataType: schemapb.DataType_Int32, }, { FieldID: 106, Name: "FieldInt64", IsPrimaryKey: true, AutoID: false, Description: "int64", DataType: schemapb.DataType_Int64, }, { FieldID: 107, Name: "FieldFloat", IsPrimaryKey: false, Description: "float", DataType: schemapb.DataType_Float, }, { FieldID: 108, Name: "FieldDouble", IsPrimaryKey: false, Description: "double", DataType: schemapb.DataType_Double, }, { FieldID: 109, Name: "FieldString", IsPrimaryKey: false, Description: "string", DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{ {Key: "max_length", Value: "128"}, }, }, { FieldID: 110, Name: "FieldBinaryVector", IsPrimaryKey: false, Description: "binary_vector", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{ {Key: "dim", Value: "16"}, }, }, { FieldID: 111, Name: "FieldFloatVector", IsPrimaryKey: false, Description: "float_vector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ {Key: "dim", Value: "4"}, }, }, }, } return schema } // sampleContent/sampleRow is json structs to represent sampleSchema() for testing type sampleRow struct { FieldBool bool FieldInt8 int8 FieldInt16 int16 FieldInt32 int32 FieldInt64 int64 FieldFloat float32 FieldDouble float64 FieldString string FieldBinaryVector []int FieldFloatVector []float32 } type sampleContent struct { Rows []sampleRow } // strKeySchema() return a schema with a varchar primary key func strKeySchema() *schemapb.CollectionSchema { schema := &schemapb.CollectionSchema{ Name: "schema", Description: "schema", AutoID: true, Fields: []*schemapb.FieldSchema{ { FieldID: 101, Name: "UID", IsPrimaryKey: true, AutoID: false, Description: "uid", DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{ {Key: "max_length", Value: "1024"}, }, }, { FieldID: 102, Name: "FieldInt32", IsPrimaryKey: false, Description: "int_scalar", DataType: schemapb.DataType_Int32, }, { FieldID: 103, Name: "FieldFloat", IsPrimaryKey: false, Description: "float_scalar", DataType: schemapb.DataType_Float, }, { FieldID: 104, Name: "FieldString", IsPrimaryKey: false, Description: "string_scalar", DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{ {Key: "max_length", Value: "128"}, }, }, { FieldID: 105, Name: "FieldBool", IsPrimaryKey: false, Description: "bool_scalar", DataType: schemapb.DataType_Bool, }, { FieldID: 106, Name: "FieldFloatVector", IsPrimaryKey: false, Description: "vectors", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ {Key: "dim", Value: "4"}, }, }, }, } return schema } // strKeyContent/strKeyRow is json structs to represent strKeySchema() for testing type strKeyRow struct { UID string FieldInt32 int32 FieldFloat float32 FieldString string FieldBool bool FieldFloatVector []float32 } type strKeyContent struct { Rows []strKeyRow } func jsonNumber(value string) json.Number { return json.Number(value) } func Test_IsCanceled(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) assert.False(t, isCanceled(ctx)) cancel() assert.True(t, isCanceled(ctx)) } func Test_InitSegmentData(t *testing.T) { testFunc := func(schema *schemapb.CollectionSchema) { fields := initSegmentData(schema) assert.Equal(t, len(schema.Fields)+1, len(fields)) for _, field := range schema.Fields { data, ok := fields[field.FieldID] assert.True(t, ok) assert.NotNil(t, data) } printFieldsDataInfo(fields, "dummy", []string{}) } testFunc(sampleSchema()) testFunc(strKeySchema()) // unsupported data type schema := &schemapb.CollectionSchema{ Name: "schema", AutoID: true, Fields: []*schemapb.FieldSchema{ { FieldID: 101, Name: "uid", IsPrimaryKey: true, AutoID: true, DataType: schemapb.DataType_Int64, }, { FieldID: 102, Name: "flag", IsPrimaryKey: false, DataType: schemapb.DataType_None, }, }, } data := initSegmentData(schema) assert.Nil(t, data) } func Test_parseFloat(t *testing.T) { value, err := parseFloat("dummy", 32, "") assert.Zero(t, value) assert.Error(t, err) value, err = parseFloat("NaN", 32, "") assert.Zero(t, value) assert.Error(t, err) value, err = parseFloat("Inf", 32, "") assert.Zero(t, value) assert.Error(t, err) value, err = parseFloat("Infinity", 32, "") assert.Zero(t, value) assert.Error(t, err) value, err = parseFloat("3.5e+38", 32, "") assert.Zero(t, value) assert.Error(t, err) value, err = parseFloat("1.8e+308", 64, "") assert.Zero(t, value) assert.Error(t, err) value, err = parseFloat("3.14159", 32, "") assert.True(t, math.Abs(value-3.14159) < 0.000001) assert.Nil(t, err) value, err = parseFloat("2.718281828459045", 64, "") assert.True(t, math.Abs(value-2.718281828459045) < 0.0000000000000001) assert.Nil(t, err) } func Test_InitValidators(t *testing.T) { validators := make(map[storage.FieldID]*Validator) err := initValidators(nil, validators) assert.NotNil(t, err) schema := sampleSchema() // success case err = initValidators(schema, validators) assert.Nil(t, err) assert.Equal(t, len(schema.Fields), len(validators)) for _, field := range schema.Fields { fieldID := field.GetFieldID() assert.Equal(t, field.GetName(), validators[fieldID].fieldName) assert.Equal(t, field.GetIsPrimaryKey(), validators[fieldID].primaryKey) assert.Equal(t, field.GetAutoID(), validators[fieldID].autoID) if field.GetDataType() != schemapb.DataType_VarChar && field.GetDataType() != schemapb.DataType_String { assert.False(t, validators[fieldID].isString) } else { assert.True(t, validators[fieldID].isString) } } name2ID := make(map[string]storage.FieldID) for _, field := range schema.Fields { name2ID[field.GetName()] = field.GetFieldID() } fields := initSegmentData(schema) assert.NotNil(t, fields) checkConvertFunc := func(funcName string, validVal interface{}, invalidVal interface{}) { id := name2ID[funcName] v, ok := validators[id] assert.True(t, ok) fieldData := fields[id] preNum := fieldData.RowNum() err = v.convertFunc(validVal, fieldData) assert.Nil(t, err) postNum := fieldData.RowNum() assert.Equal(t, 1, postNum-preNum) err = v.convertFunc(invalidVal, fieldData) assert.NotNil(t, err) } t.Run("check convert functions", func(t *testing.T) { var validVal interface{} = true var invalidVal interface{} = 5 checkConvertFunc("FieldBool", validVal, invalidVal) validVal = jsonNumber("100") invalidVal = jsonNumber("128") checkConvertFunc("FieldInt8", validVal, invalidVal) invalidVal = jsonNumber("65536") checkConvertFunc("FieldInt16", validVal, invalidVal) invalidVal = jsonNumber("2147483648") checkConvertFunc("FieldInt32", validVal, invalidVal) invalidVal = jsonNumber("1.2") checkConvertFunc("FieldInt64", validVal, invalidVal) invalidVal = jsonNumber("dummy") checkConvertFunc("FieldFloat", validVal, invalidVal) checkConvertFunc("FieldDouble", validVal, invalidVal) invalidVal = "6" checkConvertFunc("FieldInt8", validVal, invalidVal) checkConvertFunc("FieldInt16", validVal, invalidVal) checkConvertFunc("FieldInt32", validVal, invalidVal) checkConvertFunc("FieldInt64", validVal, invalidVal) checkConvertFunc("FieldFloat", validVal, invalidVal) checkConvertFunc("FieldDouble", validVal, invalidVal) validVal = "aa" checkConvertFunc("FieldString", validVal, nil) // the binary vector dimension is 16, shoud input two uint8 values, each value should between 0~255 validVal = []interface{}{jsonNumber("100"), jsonNumber("101")} invalidVal = []interface{}{jsonNumber("100"), jsonNumber("1256")} checkConvertFunc("FieldBinaryVector", validVal, invalidVal) invalidVal = false checkConvertFunc("FieldBinaryVector", validVal, invalidVal) invalidVal = []interface{}{jsonNumber("100")} checkConvertFunc("FieldBinaryVector", validVal, invalidVal) invalidVal = []interface{}{jsonNumber("100"), 0} checkConvertFunc("FieldBinaryVector", validVal, invalidVal) // the float vector dimension is 4, each value should be valid float number validVal = []interface{}{jsonNumber("1"), jsonNumber("2"), jsonNumber("3"), jsonNumber("4")} invalidVal = []interface{}{jsonNumber("1"), jsonNumber("2"), jsonNumber("3"), jsonNumber("dummy")} checkConvertFunc("FieldFloatVector", validVal, invalidVal) invalidVal = false checkConvertFunc("FieldFloatVector", validVal, invalidVal) invalidVal = []interface{}{jsonNumber("1")} checkConvertFunc("FieldFloatVector", validVal, invalidVal) invalidVal = []interface{}{jsonNumber("1"), jsonNumber("2"), jsonNumber("3"), true} checkConvertFunc("FieldFloatVector", validVal, invalidVal) }) t.Run("init error cases", func(t *testing.T) { schema = &schemapb.CollectionSchema{ Name: "schema", Description: "schema", AutoID: true, Fields: make([]*schemapb.FieldSchema, 0), } schema.Fields = append(schema.Fields, &schemapb.FieldSchema{ FieldID: 111, Name: "FieldFloatVector", IsPrimaryKey: false, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ {Key: "dim", Value: "aa"}, }, }) validators = make(map[storage.FieldID]*Validator) err = initValidators(schema, validators) assert.NotNil(t, err) schema.Fields = make([]*schemapb.FieldSchema, 0) schema.Fields = append(schema.Fields, &schemapb.FieldSchema{ FieldID: 110, Name: "FieldBinaryVector", IsPrimaryKey: false, DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{ {Key: "dim", Value: "aa"}, }, }) err = initValidators(schema, validators) assert.NotNil(t, err) // unsupported data type schema.Fields = make([]*schemapb.FieldSchema, 0) schema.Fields = append(schema.Fields, &schemapb.FieldSchema{ FieldID: 110, Name: "dummy", IsPrimaryKey: false, DataType: schemapb.DataType_None, }) err = initValidators(schema, validators) assert.NotNil(t, err) }) } func Test_GetFileNameAndExt(t *testing.T) { filePath := "aaa/bbb/ccc.txt" name, ext := GetFileNameAndExt(filePath) assert.EqualValues(t, "ccc", name) assert.EqualValues(t, ".txt", ext) } func Test_GetFieldDimension(t *testing.T) { schema := &schemapb.FieldSchema{ FieldID: 111, Name: "FieldFloatVector", IsPrimaryKey: false, Description: "float_vector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ {Key: "dim", Value: "4"}, }, } dim, err := getFieldDimension(schema) assert.Nil(t, err) assert.Equal(t, 4, dim) schema.TypeParams = []*commonpb.KeyValuePair{ {Key: "dim", Value: "abc"}, } dim, err = getFieldDimension(schema) assert.NotNil(t, err) assert.Equal(t, 0, dim) schema.TypeParams = []*commonpb.KeyValuePair{} dim, err = getFieldDimension(schema) assert.NotNil(t, err) assert.Equal(t, 0, dim) } func Test_TryFlushBlocks(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) flushCounter := 0 flushRowCount := 0 flushFunc := func(fields map[storage.FieldID]storage.FieldData, shardID int) error { flushCounter++ rowCount := 0 for _, v := range fields { rowCount = v.RowNum() break } flushRowCount += rowCount for _, v := range fields { assert.Equal(t, rowCount, v.RowNum()) } return nil } blockSize := int64(1024) maxTotalSize := int64(4096) shardNum := int32(3) // prepare flush data, 3 shards, each shard 10 rows rowCount := 10 fieldsData := createFieldsData(rowCount) // non-force flush segmentsData := createSegmentsData(fieldsData, shardNum) err := tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, false) assert.Nil(t, err) assert.Equal(t, 0, flushCounter) assert.Equal(t, 0, flushRowCount) // force flush err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, true) assert.Nil(t, err) assert.Equal(t, int(shardNum), flushCounter) assert.Equal(t, rowCount*int(shardNum), flushRowCount) // after force flush, no data left flushCounter = 0 flushRowCount = 0 err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, true) assert.Nil(t, err) assert.Equal(t, 0, flushCounter) assert.Equal(t, 0, flushRowCount) // flush when segment size exceeds blockSize segmentsData = createSegmentsData(fieldsData, shardNum) blockSize = 100 // blockSize is 100 bytes, less than the 10 rows size err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, false) assert.Nil(t, err) assert.Equal(t, int(shardNum), flushCounter) assert.Equal(t, rowCount*int(shardNum), flushRowCount) flushCounter = 0 flushRowCount = 0 err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, true) // no data left assert.Nil(t, err) assert.Equal(t, 0, flushCounter) assert.Equal(t, 0, flushRowCount) // flush when segments total size exceeds maxTotalSize segmentsData = createSegmentsData(fieldsData, shardNum) blockSize = 4096 // blockSize is 4096 bytes, larger than the 10 rows size maxTotalSize = 100 // maxTotalSize is 100 bytes, less than the 30 rows size err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, false) assert.Nil(t, err) assert.Equal(t, 1, flushCounter) // only the max segment is flushed assert.Equal(t, 10, flushRowCount) flushCounter = 0 flushRowCount = 0 err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, true) // two segments left assert.Nil(t, err) assert.Equal(t, 2, flushCounter) assert.Equal(t, 20, flushRowCount) // call flush function failed flushFunc = func(fields map[storage.FieldID]storage.FieldData, shardID int) error { return errors.New("error") } segmentsData = createSegmentsData(fieldsData, shardNum) err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, true) // failed to force flush assert.Error(t, err) err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, 1, maxTotalSize, false) // failed to flush block larger than blockSize assert.Error(t, err) err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, false) // failed to flush biggest block assert.Error(t, err) // canceled cancel() flushCounter = 0 flushRowCount = 0 segmentsData = createSegmentsData(fieldsData, shardNum) err = tryFlushBlocks(ctx, segmentsData, sampleSchema(), flushFunc, blockSize, maxTotalSize, true) assert.Error(t, err) assert.Equal(t, 0, flushCounter) assert.Equal(t, 0, flushRowCount) } func Test_GetTypeName(t *testing.T) { str := getTypeName(schemapb.DataType_Bool) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_Int8) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_Int16) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_Int32) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_Int64) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_Float) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_Double) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_VarChar) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_String) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_BinaryVector) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_FloatVector) assert.NotEmpty(t, str) str = getTypeName(schemapb.DataType_None) assert.Equal(t, "InvalidType", str) } func Test_PkToShard(t *testing.T) { a := int32(99) shard, err := pkToShard(a, 2) assert.Error(t, err) assert.Zero(t, shard) s := "abcdef" shardNum := uint32(3) shard, err = pkToShard(s, shardNum) assert.NoError(t, err) hash := typeutil.HashString2Uint32(s) assert.Equal(t, hash%shardNum, shard) pk := int64(100) shardNum = uint32(4) shard, err = pkToShard(pk, shardNum) assert.NoError(t, err) hash, _ = typeutil.Hash32Int64(pk) assert.Equal(t, hash%shardNum, shard) pk = int64(99999) shardNum = uint32(5) shard, err = pkToShard(pk, shardNum) assert.NoError(t, err) hash, _ = typeutil.Hash32Int64(pk) assert.Equal(t, hash%shardNum, shard) } func Test_UpdateKVInfo(t *testing.T) { err := UpdateKVInfo(nil, "a", "1") assert.Error(t, err) infos := make([]*commonpb.KeyValuePair, 0) err = UpdateKVInfo(&infos, "a", "1") assert.NoError(t, err) assert.Equal(t, 1, len(infos)) assert.Equal(t, "1", infos[0].Value) err = UpdateKVInfo(&infos, "a", "2") assert.NoError(t, err) assert.Equal(t, "2", infos[0].Value) err = UpdateKVInfo(&infos, "b", "5") assert.NoError(t, err) assert.Equal(t, 2, len(infos)) assert.Equal(t, "5", infos[1].Value) }