diff --git a/internal/distributed/proxy/httpserver/constant.go b/internal/distributed/proxy/httpserver/constant.go index 4653b9231d..354f155e6a 100644 --- a/internal/distributed/proxy/httpserver/constant.go +++ b/internal/distributed/proxy/httpserver/constant.go @@ -29,15 +29,16 @@ const ( HTTPReturnMessage = "message" HTTPReturnData = "data" - HTTPReturnFieldName = "name" - HTTPReturnFieldType = "type" - HTTPReturnFieldPrimaryKey = "primaryKey" - HTTPReturnFieldAutoID = "autoId" - HTTPReturnDescription = "description" + HTTPReturnFieldName = "name" + HTTPReturnFieldType = "type" + HTTPReturnFieldPrimaryKey = "primaryKey" + HTTPReturnFieldPartitionKey = "partitionKey" + HTTPReturnFieldAutoID = "autoId" + HTTPReturnDescription = "description" - HTTPReturnIndexName = "indexName" - HTTPReturnIndexField = "fieldName" - HTTPReturnIndexMetricsType = "metricType" + HTTPIndexName = "indexName" + HTTPIndexField = "fieldName" + HTTPReturnIndexMetricType = "metricType" HTTPReturnDistance = "distance" diff --git a/internal/distributed/proxy/httpserver/handler_v1.go b/internal/distributed/proxy/httpserver/handler_v1.go index feb8fdd051..5c2044a340 100644 --- a/internal/distributed/proxy/httpserver/handler_v1.go +++ b/internal/distributed/proxy/httpserver/handler_v1.go @@ -320,7 +320,7 @@ func (h *HandlersV1) getCollectionDetails(c *gin.Context) { } vectorField := "" for _, field := range coll.Schema.Fields { - if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector { + if IsVectorField(field) { vectorField = field.Name break } diff --git a/internal/distributed/proxy/httpserver/handler_v1_test.go b/internal/distributed/proxy/httpserver/handler_v1_test.go index fd6bfd1a8e..ba66c88f9e 100644 --- a/internal/distributed/proxy/httpserver/handler_v1_test.go +++ b/internal/distributed/proxy/httpserver/handler_v1_test.go @@ -53,7 +53,7 @@ var DefaultShowCollectionsResp = milvuspb.ShowCollectionsResponse{ var DefaultDescCollectionResp = milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, - Schema: generateCollectionSchema(schemapb.DataType_Int64, false), + Schema: generateCollectionSchema(schemapb.DataType_Int64), ShardsNum: ShardNumDefault, Status: &StatusSuccess, } @@ -267,7 +267,7 @@ func TestVectorCollectionsDescribe(t *testing.T) { name: "get load status fail", mp: mp2, exceptCode: http.StatusOK, - expectedBody: "{\"code\":200,\"data\":{\"collectionName\":\"" + DefaultCollectionName + "\",\"description\":\"\",\"enableDynamicField\":true,\"fields\":[{\"autoId\":false,\"description\":\"\",\"name\":\"book_id\",\"primaryKey\":true,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"word_count\",\"primaryKey\":false,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"book_intro\",\"primaryKey\":false,\"type\":\"FloatVector(2)\"}],\"indexes\":[{\"fieldName\":\"book_intro\",\"indexName\":\"" + DefaultIndexName + "\",\"metricType\":\"L2\"}],\"load\":\"\",\"shardsNum\":1}}", + expectedBody: "{\"code\":200,\"data\":{\"collectionName\":\"" + DefaultCollectionName + "\",\"description\":\"\",\"enableDynamicField\":true,\"fields\":[{\"autoId\":false,\"description\":\"\",\"name\":\"book_id\",\"partitionKey\":false,\"primaryKey\":true,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"word_count\",\"partitionKey\":false,\"primaryKey\":false,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"book_intro\",\"partitionKey\":false,\"primaryKey\":false,\"type\":\"FloatVector(2)\"}],\"indexes\":[{\"fieldName\":\"book_intro\",\"indexName\":\"" + DefaultIndexName + "\",\"metricType\":\"L2\"}],\"load\":\"\",\"shardsNum\":1}}", }) mp3 := mocks.NewMockProxy(t) @@ -278,7 +278,7 @@ func TestVectorCollectionsDescribe(t *testing.T) { name: "get indexes fail", mp: mp3, exceptCode: http.StatusOK, - expectedBody: "{\"code\":200,\"data\":{\"collectionName\":\"" + DefaultCollectionName + "\",\"description\":\"\",\"enableDynamicField\":true,\"fields\":[{\"autoId\":false,\"description\":\"\",\"name\":\"book_id\",\"primaryKey\":true,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"word_count\",\"primaryKey\":false,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"book_intro\",\"primaryKey\":false,\"type\":\"FloatVector(2)\"}],\"indexes\":[],\"load\":\"LoadStateLoaded\",\"shardsNum\":1}}", + expectedBody: "{\"code\":200,\"data\":{\"collectionName\":\"" + DefaultCollectionName + "\",\"description\":\"\",\"enableDynamicField\":true,\"fields\":[{\"autoId\":false,\"description\":\"\",\"name\":\"book_id\",\"partitionKey\":false,\"primaryKey\":true,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"word_count\",\"partitionKey\":false,\"primaryKey\":false,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"book_intro\",\"partitionKey\":false,\"primaryKey\":false,\"type\":\"FloatVector(2)\"}],\"indexes\":[],\"load\":\"LoadStateLoaded\",\"shardsNum\":1}}", }) mp4 := mocks.NewMockProxy(t) @@ -289,7 +289,7 @@ func TestVectorCollectionsDescribe(t *testing.T) { name: "show collection details success", mp: mp4, exceptCode: http.StatusOK, - expectedBody: "{\"code\":200,\"data\":{\"collectionName\":\"" + DefaultCollectionName + "\",\"description\":\"\",\"enableDynamicField\":true,\"fields\":[{\"autoId\":false,\"description\":\"\",\"name\":\"book_id\",\"primaryKey\":true,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"word_count\",\"primaryKey\":false,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"book_intro\",\"primaryKey\":false,\"type\":\"FloatVector(2)\"}],\"indexes\":[{\"fieldName\":\"book_intro\",\"indexName\":\"" + DefaultIndexName + "\",\"metricType\":\"L2\"}],\"load\":\"LoadStateLoaded\",\"shardsNum\":1}}", + expectedBody: "{\"code\":200,\"data\":{\"collectionName\":\"" + DefaultCollectionName + "\",\"description\":\"\",\"enableDynamicField\":true,\"fields\":[{\"autoId\":false,\"description\":\"\",\"name\":\"book_id\",\"partitionKey\":false,\"primaryKey\":true,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"word_count\",\"partitionKey\":false,\"primaryKey\":false,\"type\":\"Int64\"},{\"autoId\":false,\"description\":\"\",\"name\":\"book_intro\",\"partitionKey\":false,\"primaryKey\":false,\"type\":\"FloatVector(2)\"}],\"indexes\":[{\"fieldName\":\"book_intro\",\"indexName\":\"" + DefaultIndexName + "\",\"metricType\":\"L2\"}],\"load\":\"LoadStateLoaded\",\"shardsNum\":1}}", }) for _, tt := range testCases { @@ -757,10 +757,9 @@ func TestInsertForDataType(t *testing.T) { paramtable.Init() paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true") schemas := map[string]*schemapb.CollectionSchema{ - "[success]kinds of data type": newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false)), - "[success]use binary vector": newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, true)), - "[success]with dynamic field": withDynamicField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false))), - "[success]with array fields": withArrayField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false))), + "[success]kinds of data type": newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64)), + "[success]with dynamic field": withDynamicField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64))), + "[success]with array fields": withArrayField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64))), } for name, schema := range schemas { t.Run(name, func(t *testing.T) { @@ -831,7 +830,7 @@ func TestReturnInt64(t *testing.T) { } for _, dataType := range schemas { t.Run("[insert]httpCfg.allow: false", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, @@ -862,7 +861,7 @@ func TestReturnInt64(t *testing.T) { for _, dataType := range schemas { t.Run("[upsert]httpCfg.allow: false", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, @@ -893,7 +892,7 @@ func TestReturnInt64(t *testing.T) { for _, dataType := range schemas { t.Run("[insert]httpCfg.allow: false, Accept-Type-Allow-Int64: true", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, @@ -925,7 +924,7 @@ func TestReturnInt64(t *testing.T) { for _, dataType := range schemas { t.Run("[upsert]httpCfg.allow: false, Accept-Type-Allow-Int64: true", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, @@ -958,7 +957,7 @@ func TestReturnInt64(t *testing.T) { paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true") for _, dataType := range schemas { t.Run("[insert]httpCfg.allow: true", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, @@ -989,7 +988,7 @@ func TestReturnInt64(t *testing.T) { for _, dataType := range schemas { t.Run("[upsert]httpCfg.allow: true", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, @@ -1020,7 +1019,7 @@ func TestReturnInt64(t *testing.T) { for _, dataType := range schemas { t.Run("[insert]httpCfg.allow: true, Accept-Type-Allow-Int64: false", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, @@ -1052,7 +1051,7 @@ func TestReturnInt64(t *testing.T) { for _, dataType := range schemas { t.Run("[upsert]httpCfg.allow: true, Accept-Type-Allow-Int64: false", func(t *testing.T) { - schema := newCollectionSchema(generateCollectionSchema(dataType, false)) + schema := newCollectionSchema(generateCollectionSchema(dataType)) mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index 8a8a330f4e..9ad2083f70 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -10,7 +10,6 @@ import ( "strconv" "strings" - "github.com/cockroachdb/errors" "github.com/gin-gonic/gin" "github.com/golang/protobuf/proto" "github.com/spf13/cast" @@ -113,7 +112,7 @@ func convertRange(field *schemapb.FieldSchema, result gjson.Result) (string, err func checkGetPrimaryKey(coll *schemapb.CollectionSchema, idResult gjson.Result) (string, error) { primaryField, ok := getPrimaryField(coll) if !ok { - return "", errors.New("fail to find primary key from collection description") + return "", fmt.Errorf("collection: %s has no primary field", coll.Name) } resultStr, err := convertRange(primaryField, idResult) if err != nil { @@ -124,16 +123,26 @@ func checkGetPrimaryKey(coll *schemapb.CollectionSchema, idResult gjson.Result) } // --------------------- collection details --------------------- // + +func IsVectorField(field *schemapb.FieldSchema) bool { + switch field.DataType { + case schemapb.DataType_BinaryVector, schemapb.DataType_FloatVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector: + return true + } + return false +} + func printFields(fields []*schemapb.FieldSchema) []gin.H { var res []gin.H for _, field := range fields { fieldDetail := gin.H{ - HTTPReturnFieldName: field.Name, - HTTPReturnFieldPrimaryKey: field.IsPrimaryKey, - HTTPReturnFieldAutoID: field.AutoID, - HTTPReturnDescription: field.Description, + HTTPReturnFieldName: field.Name, + HTTPReturnFieldPrimaryKey: field.IsPrimaryKey, + HTTPReturnFieldPartitionKey: field.IsPartitionKey, + HTTPReturnFieldAutoID: field.AutoID, + HTTPReturnDescription: field.Description, } - if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector { + if IsVectorField(field) { dim, _ := getDim(field) fieldDetail[HTTPReturnFieldType] = field.DataType.String() + "(" + strconv.FormatInt(dim, 10) + ")" } else if field.DataType == schemapb.DataType_VarChar { @@ -162,9 +171,9 @@ func printIndexes(indexes []*milvuspb.IndexDescription) []gin.H { var res []gin.H for _, index := range indexes { res = append(res, gin.H{ - HTTPReturnIndexName: index.IndexName, - HTTPReturnIndexField: index.FieldName, - HTTPReturnIndexMetricsType: getMetricType(index.Params), + HTTPIndexName: index.IndexName, + HTTPIndexField: index.FieldName, + HTTPReturnIndexMetricType: getMetricType(index.Params), }) } return res @@ -187,8 +196,6 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, for _, data := range dataResultArray { reallyData := map[string]interface{}{} - var vectorArray []float32 - var binaryArray []byte if data.Type == gjson.JSON { for _, field := range collSchema.Fields { fieldType := field.DataType @@ -205,15 +212,48 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, switch fieldType { case schemapb.DataType_FloatVector: - for _, vector := range gjson.Get(data.Raw, fieldName).Array() { - vectorArray = append(vectorArray, cast.ToFloat32(vector.Num)) + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + } + var vectorArray []float32 + err := json.Unmarshal([]byte(dataString), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = vectorArray case schemapb.DataType_BinaryVector: - for _, vector := range gjson.Get(data.Raw, fieldName).Array() { - binaryArray = append(binaryArray, cast.ToUint8(vector.Num)) + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray } - reallyData[fieldName] = binaryArray + vectorStr := gjson.Get(data.Raw, fieldName).Raw + var vectorArray []byte + err := json.Unmarshal([]byte(vectorStr), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + } + reallyData[fieldName] = vectorArray + case schemapb.DataType_Float16Vector: + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + } + vectorStr := gjson.Get(data.Raw, fieldName).Raw + var vectorArray []byte + err := json.Unmarshal([]byte(vectorStr), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + } + reallyData[fieldName] = vectorArray + case schemapb.DataType_BFloat16Vector: + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + } + vectorStr := gjson.Get(data.Raw, fieldName).Raw + var vectorArray []byte + err := json.Unmarshal([]byte(vectorStr), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + } + reallyData[fieldName] = vectorArray case schemapb.DataType_Bool: result, err := cast.ToBoolE(dataString) if err != nil { @@ -239,7 +279,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, } reallyData[fieldName] = result case schemapb.DataType_Int64: - result, err := cast.ToInt64E(dataString) + result, err := json.Number(dataString).Int64() if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray } @@ -250,7 +290,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, arr := make([]bool, 0) err := json.Unmarshal([]byte(dataString), &arr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_BoolData{ @@ -263,7 +304,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, arr := make([]int32, 0) err := json.Unmarshal([]byte(dataString), &arr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -276,7 +318,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, arr := make([]int32, 0) err := json.Unmarshal([]byte(dataString), &arr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -289,7 +332,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, arr := make([]int32, 0) err := json.Unmarshal([]byte(dataString), &arr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -300,9 +344,18 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, } case schemapb.DataType_Int64: arr := make([]int64, 0) - err := json.Unmarshal([]byte(dataString), &arr) + numArr := make([]json.Number, 0) + err := json.Unmarshal([]byte(dataString), &numArr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + } + for _, num := range numArr { + intVal, err := num.Int64() + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + } + arr = append(arr, intVal) } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_LongData{ @@ -315,7 +368,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, arr := make([]float32, 0) err := json.Unmarshal([]byte(dataString), &arr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_FloatData{ @@ -328,7 +382,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, arr := make([]float64, 0) err := json.Unmarshal([]byte(dataString), &arr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_DoubleData{ @@ -341,7 +396,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, arr := make([]string, 0) err := json.Unmarshal([]byte(dataString), &arr) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_StringData{ @@ -434,7 +490,8 @@ func convertFloatVectorToArray(vector [][]float32, dim int64) ([]float32, error) floatArray := make([]float32, 0) for _, arr := range vector { if int64(len(arr)) != dim { - return nil, errors.New("vector length diff from dimension") + return nil, fmt.Errorf("[]float32 size %d doesn't equal to vector dimension %d of %s", + len(arr), dim, schemapb.DataType_name[int32(schemapb.DataType_FloatVector)]) } for i := int64(0); i < dim; i++ { floatArray = append(floatArray, arr[i]) @@ -443,12 +500,21 @@ func convertFloatVectorToArray(vector [][]float32, dim int64) ([]float32, error) return floatArray, nil } -func convertBinaryVectorToArray(vector [][]byte, dim int64) ([]byte, error) { +func convertBinaryVectorToArray(vector [][]byte, dim int64, dataType schemapb.DataType) ([]byte, error) { binaryArray := make([]byte, 0) - bytesLen := dim / 8 + var bytesLen int64 + switch dataType { + case schemapb.DataType_BinaryVector: + bytesLen = dim / 8 + case schemapb.DataType_Float16Vector: + bytesLen = dim * 2 + case schemapb.DataType_BFloat16Vector: + bytesLen = dim * 2 + } for _, arr := range vector { if int64(len(arr)) != bytesLen { - return nil, errors.New("vector length diff from dimension") + return nil, fmt.Errorf("[]byte size %d doesn't equal to vector dimension %d of %s", + len(arr), dim, schemapb.DataType_name[int32(dataType)]) } for i := int64(0); i < bytesLen; i++ { binaryArray = append(binaryArray, arr[i]) @@ -503,13 +569,13 @@ func convertToIntArray(dataType schemapb.DataType, arr interface{}) []int32 { func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) ([]*schemapb.FieldData, error) { rowsLen := len(rows) if rowsLen == 0 { - return []*schemapb.FieldData{}, errors.New("0 length column") + return []*schemapb.FieldData{}, fmt.Errorf("no row need to be convert to columns") } isDynamic := sch.EnableDynamicField - var dim int64 nameColumns := make(map[string]interface{}) + nameDims := make(map[string]int64) fieldData := make(map[string]*schemapb.FieldData) for _, field := range sch.Fields { // skip auto id pk field @@ -542,10 +608,20 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) data = make([][]byte, 0, rowsLen) case schemapb.DataType_FloatVector: data = make([][]float32, 0, rowsLen) - dim, _ = getDim(field) + dim, _ := getDim(field) + nameDims[field.Name] = dim case schemapb.DataType_BinaryVector: data = make([][]byte, 0, rowsLen) - dim, _ = getDim(field) + dim, _ := getDim(field) + nameDims[field.Name] = dim + case schemapb.DataType_Float16Vector: + data = make([][]byte, 0, rowsLen) + dim, _ := getDim(field) + nameDims[field.Name] = dim + case schemapb.DataType_BFloat16Vector: + data = make([][]byte, 0, rowsLen) + dim, _ := getDim(field) + nameDims[field.Name] = dim default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -557,8 +633,8 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) IsDynamic: field.IsDynamic, } } - if dim == 0 { - return nil, errors.New("cannot find dimension") + if len(nameDims) == 0 { + return nil, fmt.Errorf("collection: %s has no vector field", sch.Name) } dynamicCol := make([][]byte, 0, rowsLen) @@ -608,6 +684,10 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) nameColumns[field.Name] = append(nameColumns[field.Name].([][]float32), candi.v.Interface().([]float32)) case schemapb.DataType_BinaryVector: nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + case schemapb.DataType_Float16Vector: + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + case schemapb.DataType_BFloat16Vector: + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -742,6 +822,7 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) }, } case schemapb.DataType_FloatVector: + dim := nameDims[name] arr, err := convertFloatVectorToArray(column.([][]float32), dim) if err != nil { return nil, err @@ -757,7 +838,8 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) }, } case schemapb.DataType_BinaryVector: - arr, err := convertBinaryVectorToArray(column.([][]byte), dim) + dim := nameDims[name] + arr, err := convertBinaryVectorToArray(column.([][]byte), dim, colData.Type) if err != nil { return nil, err } @@ -769,6 +851,34 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) }, }, } + case schemapb.DataType_Float16Vector: + dim := nameDims[name] + arr, err := convertBinaryVectorToArray(column.([][]byte), dim, colData.Type) + if err != nil { + return nil, err + } + colData.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_Float16Vector{ + Float16Vector: arr, + }, + }, + } + case schemapb.DataType_BFloat16Vector: + dim := nameDims[name] + arr, err := convertBinaryVectorToArray(column.([][]byte), dim, colData.Type) + if err != nil { + return nil, err + } + colData.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_Bfloat16Vector{ + Bfloat16Vector: arr, + }, + }, + } default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", colData.Type, name) } @@ -876,6 +986,10 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap rowsNum = int64(len(fieldDataList[0].GetVectors().GetBinaryVector())*8) / fieldDataList[0].GetVectors().GetDim() case schemapb.DataType_FloatVector: rowsNum = int64(len(fieldDataList[0].GetVectors().GetFloatVector().Data)) / fieldDataList[0].GetVectors().GetDim() + case schemapb.DataType_Float16Vector: + rowsNum = int64(len(fieldDataList[0].GetVectors().GetFloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() + case schemapb.DataType_BFloat16Vector: + rowsNum = int64(len(fieldDataList[0].GetVectors().GetBfloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", fieldDataList[0].Type, fieldDataList[0].FieldName) } @@ -927,6 +1041,10 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetBinaryVector()[i*(fieldDataList[j].GetVectors().GetDim()/8) : (i+1)*(fieldDataList[j].GetVectors().GetDim()/8)] case schemapb.DataType_FloatVector: row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetFloatVector().Data[i*fieldDataList[j].GetVectors().GetDim() : (i+1)*fieldDataList[j].GetVectors().GetDim()] + case schemapb.DataType_Float16Vector: + row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetFloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] + case schemapb.DataType_BFloat16Vector: + row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetBfloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] case schemapb.DataType_Array: row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetArrayData().Data[i] case schemapb.DataType_JSON: diff --git a/internal/distributed/proxy/httpserver/utils_test.go b/internal/distributed/proxy/httpserver/utils_test.go index 3333db1109..0595a8f8dc 100644 --- a/internal/distributed/proxy/httpserver/utils_test.go +++ b/internal/distributed/proxy/httpserver/utils_test.go @@ -1,6 +1,8 @@ package httpserver import ( + "encoding/json" + "math" "strconv" "strings" "testing" @@ -21,6 +23,8 @@ const ( FieldBookIntro = "book_intro" ) +var DefaultScores = []float32{0.01, 0.04, 0.09} + func generatePrimaryField(datatype schemapb.DataType) schemapb.FieldSchema { return schemapb.FieldSchema{ FieldID: common.StartOfUserFieldID, @@ -63,42 +67,29 @@ func generateIds(dataType schemapb.DataType, num int) *schemapb.IDs { return nil } -func generateVectorFieldSchema(useBinary bool) schemapb.FieldSchema { - if useBinary { - return schemapb.FieldSchema{ - FieldID: common.StartOfUserFieldID + 2, - Name: "field-binary", - IsPrimaryKey: false, - Description: "", - DataType: 100, - AutoID: false, - TypeParams: []*commonpb.KeyValuePair{ - { - Key: common.DimKey, - Value: "8", - }, - }, - } +func generateVectorFieldSchema(dataType schemapb.DataType) schemapb.FieldSchema { + dim := "2" + if dataType == schemapb.DataType_BinaryVector { + dim = "8" } return schemapb.FieldSchema{ - FieldID: common.StartOfUserFieldID + 2, - Name: FieldBookIntro, + FieldID: common.StartOfUserFieldID + int64(dataType), IsPrimaryKey: false, - Description: "", - DataType: 101, + DataType: dataType, AutoID: false, TypeParams: []*commonpb.KeyValuePair{ { Key: common.DimKey, - Value: "2", + Value: dim, }, }, } } -func generateCollectionSchema(datatype schemapb.DataType, useBinary bool) *schemapb.CollectionSchema { - primaryField := generatePrimaryField(datatype) - vectorField := generateVectorFieldSchema(useBinary) +func generateCollectionSchema(primaryDataType schemapb.DataType) *schemapb.CollectionSchema { + primaryField := generatePrimaryField(primaryDataType) + vectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector) + vectorField.Name = FieldBookIntro return &schemapb.CollectionSchema{ Name: DefaultCollectionName, Description: "", @@ -142,11 +133,12 @@ func generateIndexes() []*milvuspb.IndexDescription { } } -func generateVectorFieldData(useBinary bool) schemapb.FieldData { - if useBinary { +func generateVectorFieldData(vectorType schemapb.DataType) schemapb.FieldData { + switch vectorType { + case schemapb.DataType_BinaryVector: return schemapb.FieldData{ Type: schemapb.DataType_BinaryVector, - FieldName: "field-binary", + FieldName: FieldBookIntro, Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Dim: 8, @@ -157,6 +149,34 @@ func generateVectorFieldData(useBinary bool) schemapb.FieldData { }, IsDynamic: false, } + case schemapb.DataType_Float16Vector: + return schemapb.FieldData{ + Type: schemapb.DataType_Float16Vector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: 8, + Data: &schemapb.VectorField_Float16Vector{ + Float16Vector: []byte{byte(0), byte(0), byte(1), byte(1), byte(2), byte(2)}, + }, + }, + }, + IsDynamic: false, + } + case schemapb.DataType_BFloat16Vector: + return schemapb.FieldData{ + Type: schemapb.DataType_BFloat16Vector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: 8, + Data: &schemapb.VectorField_Bfloat16Vector{ + Bfloat16Vector: []byte{byte(0), byte(0), byte(1), byte(1), byte(2), byte(2)}, + }, + }, + }, + IsDynamic: false, + } } return schemapb.FieldData{ Type: schemapb.DataType_FloatVector, @@ -206,40 +226,68 @@ func generateFieldData() []*schemapb.FieldData { IsDynamic: false, } - fieldData3 := generateVectorFieldData(false) + fieldData3 := generateVectorFieldData(schemapb.DataType_FloatVector) return []*schemapb.FieldData{&fieldData1, &fieldData2, &fieldData3} } -func generateSearchResult(dataType schemapb.DataType) []map[string]interface{} { +func wrapRequestBody(data []map[string]interface{}) ([]byte, error) { + body := map[string]interface{}{} + body["data"] = data + return json.Marshal(body) +} + +func generateRawRows(dataType schemapb.DataType) []map[string]interface{} { row1 := map[string]interface{}{ - DefaultPrimaryFieldName: int64(1), - FieldBookID: int64(1), - FieldWordCount: int64(1000), - FieldBookIntro: []float32{0.1, 0.11}, - HTTPReturnDistance: float32(0.01), + FieldBookID: int64(1), + FieldWordCount: int64(1000), + FieldBookIntro: []float32{0.1, 0.11}, } row2 := map[string]interface{}{ - DefaultPrimaryFieldName: int64(2), - FieldBookID: int64(2), - FieldWordCount: int64(2000), - FieldBookIntro: []float32{0.2, 0.22}, - HTTPReturnDistance: float32(0.04), + FieldBookID: int64(2), + FieldWordCount: int64(2000), + FieldBookIntro: []float32{0.2, 0.22}, } row3 := map[string]interface{}{ - DefaultPrimaryFieldName: int64(3), - FieldBookID: int64(3), - FieldWordCount: int64(3000), - FieldBookIntro: []float32{0.3, 0.33}, - HTTPReturnDistance: float32(0.09), + FieldBookID: int64(3), + FieldWordCount: int64(3000), + FieldBookIntro: []float32{0.3, 0.33}, } if dataType == schemapb.DataType_String { - row1[DefaultPrimaryFieldName] = "1" - row2[DefaultPrimaryFieldName] = "2" - row3[DefaultPrimaryFieldName] = "3" + row1[FieldBookID] = "1" + row2[FieldBookID] = "2" + row3[FieldBookID] = "3" } return []map[string]interface{}{row1, row2, row3} } +func generateRequestBody(dataType schemapb.DataType) ([]byte, error) { + return wrapRequestBody(generateRawRows(dataType)) +} + +func generateRequestBodyWithArray(dataType schemapb.DataType) ([]byte, error) { + rows := generateRawRows(dataType) + for _, result := range rows { + result["array-bool"] = "[true]" + result["array-int8"] = "[0]" + result["array-int16"] = "[0]" + result["array-int32"] = "[0]" + result["array-int64"] = "[0]" + result["array-float"] = "[0.0]" + result["array-double"] = "[0.0]" + result["array-varchar"] = "[\"\"]" + } + return wrapRequestBody(rows) +} + +func generateSearchResult(dataType schemapb.DataType) []map[string]interface{} { + rows := generateRawRows(dataType) + for i, row := range rows { + row[DefaultPrimaryFieldName] = row[FieldBookID] + row[HTTPReturnDistance] = DefaultScores[i] + } + return rows +} + func generateQueryResult64(withDistance bool) []map[string]interface{} { row1 := map[string]interface{}{ FieldBookID: float64(1), @@ -265,36 +313,39 @@ func generateQueryResult64(withDistance bool) []map[string]interface{} { } func TestPrintCollectionDetails(t *testing.T) { - coll := generateCollectionSchema(schemapb.DataType_Int64, false) + coll := generateCollectionSchema(schemapb.DataType_Int64) indexes := generateIndexes() assert.Equal(t, []gin.H{ { - HTTPReturnFieldName: FieldBookID, - HTTPReturnFieldType: "Int64", - HTTPReturnFieldPrimaryKey: true, - HTTPReturnFieldAutoID: false, - HTTPReturnDescription: "", + HTTPReturnFieldName: FieldBookID, + HTTPReturnFieldType: "Int64", + HTTPReturnFieldPartitionKey: false, + HTTPReturnFieldPrimaryKey: true, + HTTPReturnFieldAutoID: false, + HTTPReturnDescription: "", }, { - HTTPReturnFieldName: FieldWordCount, - HTTPReturnFieldType: "Int64", - HTTPReturnFieldPrimaryKey: false, - HTTPReturnFieldAutoID: false, - HTTPReturnDescription: "", + HTTPReturnFieldName: FieldWordCount, + HTTPReturnFieldType: "Int64", + HTTPReturnFieldPartitionKey: false, + HTTPReturnFieldPrimaryKey: false, + HTTPReturnFieldAutoID: false, + HTTPReturnDescription: "", }, { - HTTPReturnFieldName: FieldBookIntro, - HTTPReturnFieldType: "FloatVector(2)", - HTTPReturnFieldPrimaryKey: false, - HTTPReturnFieldAutoID: false, - HTTPReturnDescription: "", + HTTPReturnFieldName: FieldBookIntro, + HTTPReturnFieldType: "FloatVector(2)", + HTTPReturnFieldPartitionKey: false, + HTTPReturnFieldPrimaryKey: false, + HTTPReturnFieldAutoID: false, + HTTPReturnDescription: "", }, }, printFields(coll.Fields)) assert.Equal(t, []gin.H{ { - HTTPReturnIndexName: DefaultIndexName, - HTTPReturnIndexField: FieldBookIntro, - HTTPReturnIndexMetricsType: DefaultMetricType, + HTTPIndexName: DefaultIndexName, + HTTPIndexField: FieldBookIntro, + HTTPReturnIndexMetricType: DefaultMetricType, }, }, printIndexes(indexes)) assert.Equal(t, DefaultMetricType, getMetricType(indexes[0].Params)) @@ -307,17 +358,18 @@ func TestPrintCollectionDetails(t *testing.T) { } assert.Equal(t, []gin.H{ { - HTTPReturnFieldName: "field-varchar", - HTTPReturnFieldType: "VarChar(10)", - HTTPReturnFieldPrimaryKey: false, - HTTPReturnFieldAutoID: false, - HTTPReturnDescription: "", + HTTPReturnFieldName: "field-varchar", + HTTPReturnFieldType: "VarChar(10)", + HTTPReturnFieldPartitionKey: false, + HTTPReturnFieldPrimaryKey: false, + HTTPReturnFieldAutoID: false, + HTTPReturnDescription: "", }, }, printFields(fields)) } func TestPrimaryField(t *testing.T) { - coll := generateCollectionSchema(schemapb.DataType_Int64, false) + coll := generateCollectionSchema(schemapb.DataType_Int64) primaryField := generatePrimaryField(schemapb.DataType_Int64) field, ok := getPrimaryField(coll) assert.Equal(t, true, ok) @@ -341,7 +393,7 @@ func TestPrimaryField(t *testing.T) { rangeStr, err = convertRange(&primaryField, idStr) assert.Equal(t, nil, err) assert.Equal(t, `"1","2","3"`, rangeStr) - coll2 := generateCollectionSchema(schemapb.DataType_VarChar, false) + coll2 := generateCollectionSchema(schemapb.DataType_VarChar) filter, err = checkGetPrimaryKey(coll2, idStr) assert.Equal(t, nil, err) assert.Equal(t, `book_id in ["1","2","3"]`, filter) @@ -350,7 +402,7 @@ func TestPrimaryField(t *testing.T) { func TestInsertWithDynamicFields(t *testing.T) { body := "{\"data\": {\"id\": 0, \"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}" req := InsertReq{} - coll := generateCollectionSchema(schemapb.DataType_Int64, false) + coll := generateCollectionSchema(schemapb.DataType_Int64) var err error err, req.Data = checkAndSetData(body, coll) assert.Equal(t, nil, err) @@ -364,6 +416,78 @@ func TestInsertWithDynamicFields(t *testing.T) { assert.Equal(t, "{\"classified\":false,\"id\":0}", string(fieldsData[len(fieldsData)-1].GetScalars().GetJsonData().GetData()[0])) } +func TestInsertWithoutVector(t *testing.T) { + body := "{\"data\": {}}" + var err error + primaryField := generatePrimaryField(schemapb.DataType_Int64) + primaryField.AutoID = true + floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector) + floatVectorField.Name = "floatVector" + binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector) + binaryVectorField.Name = "binaryVector" + float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector) + float16VectorField.Name = "float16Vector" + bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) + bfloat16VectorField.Name = "bfloat16Vector" + err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + Name: DefaultCollectionName, + Fields: []*schemapb.FieldSchema{ + &primaryField, &floatVectorField, + }, + EnableDynamicField: true, + }) + assert.Error(t, err) + assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) + err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + Name: DefaultCollectionName, + Fields: []*schemapb.FieldSchema{ + &primaryField, &binaryVectorField, + }, + EnableDynamicField: true, + }) + assert.Error(t, err) + assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) + err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + Name: DefaultCollectionName, + Fields: []*schemapb.FieldSchema{ + &primaryField, &float16VectorField, + }, + EnableDynamicField: true, + }) + assert.Error(t, err) + assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) + err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + Name: DefaultCollectionName, + Fields: []*schemapb.FieldSchema{ + &primaryField, &bfloat16VectorField, + }, + EnableDynamicField: true, + }) + assert.Error(t, err) + assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) +} + +func TestInsertWithInt64(t *testing.T) { + arrayFieldName := "array-int64" + body := "{\"data\": {\"book_id\": 9999999999999999, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}}" + coll := generateCollectionSchema(schemapb.DataType_Int64) + coll.Fields = append(coll.Fields, &schemapb.FieldSchema{ + Name: arrayFieldName, + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int64, + }) + err, data := checkAndSetData(body, coll) + assert.Equal(t, nil, err) + assert.Equal(t, 1, len(data)) + assert.Equal(t, int64(9999999999999999), data[0][FieldBookID]) + arr, _ := data[0][arrayFieldName].(*schemapb.ScalarField) + assert.Equal(t, int64(9999999999999999), arr.GetLongData().GetData()[0]) + + body = "{\"data\": {\"book_id\": 9999999999999999, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999.0]}}" + err, _ = checkAndSetData(body, coll) + assert.Error(t, err) +} + func TestSerialize(t *testing.T) { parameters := []float32{0.11111, 0.22222} // assert.Equal(t, "\ufffd\ufffd\ufffd=\ufffd\ufffdc\u003e", string(serialize(parameters))) @@ -413,7 +537,7 @@ func compareRow(m1 map[string]interface{}, m2 map[string]interface{}) bool { return false } } - } else if (key == "field-binary") || (key == "field-json") { + } else if key == "field-json" { arr1 := value.([]byte) arr2 := m2[key].([]byte) if len(arr1) != len(arr2) { @@ -424,13 +548,15 @@ func compareRow(m1 map[string]interface{}, m2 map[string]interface{}) bool { return false } } + } else if strings.HasPrefix(key, "array-") { + continue } else if value != m2[key] { return false } } for key, value := range m2 { - if (key == FieldBookIntro) || (key == "field-binary") || (key == "field-json") || (key == "field-array") { + if (key == FieldBookIntro) || (key == "field-json") || (key == "field-array") { continue } else if strings.HasPrefix(key, "array-") { continue @@ -457,7 +583,7 @@ func compareRows(row1 []map[string]interface{}, row2 []map[string]interface{}, c func TestBuildQueryResp(t *testing.T) { outputFields := []string{FieldBookID, FieldWordCount, "author", "date"} - rows, err := buildQueryResp(int64(0), outputFields, generateFieldData(), generateIds(schemapb.DataType_Int64, 3), []float32{0.01, 0.04, 0.09}, true) // []*schemapb.FieldData{&fieldData1, &fieldData2, &fieldData3} + rows, err := buildQueryResp(int64(0), outputFields, generateFieldData(), generateIds(schemapb.DataType_Int64, 3), DefaultScores, true) // []*schemapb.FieldData{&fieldData1, &fieldData2, &fieldData3} assert.Equal(t, nil, err) exceptRows := generateSearchResult(schemapb.DataType_Int64) assert.Equal(t, true, compareRows(rows, exceptRows, compareRow)) @@ -796,10 +922,16 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data case schemapb.DataType_VarChar: return []*schemapb.FieldData{&fieldData8} case schemapb.DataType_BinaryVector: - vectorField := generateVectorFieldData(true) + vectorField := generateVectorFieldData(firstFieldType) return []*schemapb.FieldData{&vectorField} case schemapb.DataType_FloatVector: - vectorField := generateVectorFieldData(false) + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Float16Vector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_BFloat16Vector: + vectorField := generateVectorFieldData(firstFieldType) return []*schemapb.FieldData{&vectorField} case schemapb.DataType_Array: return []*schemapb.FieldData{&fieldData10} @@ -827,7 +959,6 @@ func newSearchResult(results []map[string]interface{}) []map[string]interface{} result["field-double"] = float64(i) result["field-varchar"] = strconv.Itoa(i) result["field-string"] = strconv.Itoa(i) - result["field-binary"] = []byte{byte(i)} result["field-json"] = []byte(`{"XXX": 0}`) result["field-array"] = []bool{true} result["array-bool"] = []bool{true} @@ -898,14 +1029,14 @@ func newCollectionSchemaWithArray(coll *schemapb.CollectionSchema) *schemapb.Col fieldSchema8 := schemapb.FieldSchema{ Name: "array-varchar", DataType: schemapb.DataType_Array, - ElementType: schemapb.DataType_String, + ElementType: schemapb.DataType_VarChar, } coll.Fields = append(coll.Fields, &fieldSchema8) return coll } -func newSearchResultWithArray(results []map[string]interface{}) []map[string]interface{} { +func newRowsWithArray(results []map[string]interface{}) []map[string]interface{} { for i, result := range results { result["array-bool"] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_BoolData{ @@ -968,50 +1099,138 @@ func newSearchResultWithArray(results []map[string]interface{}) []map[string]int return results } -func TestAnyToColumn(t *testing.T) { - data, err := anyToColumns(newSearchResultWithArray(generateSearchResult(schemapb.DataType_Int64)), newCollectionSchemaWithArray(generateCollectionSchema(schemapb.DataType_Int64, false))) +func TestArray(t *testing.T) { + body, _ := generateRequestBody(schemapb.DataType_Int64) + collectionSchema := generateCollectionSchema(schemapb.DataType_Int64) + err, rows := checkAndSetData(string(body), collectionSchema) assert.Equal(t, nil, err) - assert.Equal(t, 12, len(data)) + assert.Equal(t, true, compareRows(rows, generateRawRows(schemapb.DataType_Int64), compareRow)) + data, err := anyToColumns(rows, collectionSchema) + assert.Equal(t, nil, err) + assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) + + body, _ = generateRequestBodyWithArray(schemapb.DataType_Int64) + collectionSchema = newCollectionSchemaWithArray(generateCollectionSchema(schemapb.DataType_Int64)) + err, rows = checkAndSetData(string(body), collectionSchema) + assert.Equal(t, nil, err) + assert.Equal(t, true, compareRows(rows, newRowsWithArray(generateRawRows(schemapb.DataType_Int64)), compareRow)) + data, err = anyToColumns(rows, collectionSchema) + assert.Equal(t, nil, err) + assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) +} + +func TestVector(t *testing.T) { + floatVector := "vector-float" + binaryVector := "vector-binary" + float16Vector := "vector-float16" + bfloat16Vector := "vector-bfloat16" + row1 := map[string]interface{}{ + FieldBookID: int64(1), + floatVector: []float32{0.1, 0.11}, + binaryVector: []byte{1}, + float16Vector: []byte{1, 1, 11, 11}, + bfloat16Vector: []byte{1, 1, 11, 11}, + } + row2 := map[string]interface{}{ + FieldBookID: int64(2), + floatVector: []float32{0.2, 0.22}, + binaryVector: []byte{2}, + float16Vector: []byte{2, 2, 22, 22}, + bfloat16Vector: []byte{2, 2, 22, 22}, + } + row3 := map[string]interface{}{ + FieldBookID: int64(3), + floatVector: []float32{0.3, 0.33}, + binaryVector: []byte{3}, + float16Vector: []byte{3, 3, 33, 33}, + bfloat16Vector: []byte{3, 3, 33, 33}, + } + body, _ := wrapRequestBody([]map[string]interface{}{row1, row2, row3}) + primaryField := generatePrimaryField(schemapb.DataType_Int64) + floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector) + floatVectorField.Name = floatVector + binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector) + binaryVectorField.Name = binaryVector + float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector) + float16VectorField.Name = float16Vector + bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) + bfloat16VectorField.Name = bfloat16Vector + collectionSchema := &schemapb.CollectionSchema{ + Name: DefaultCollectionName, + Description: "", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + &primaryField, &floatVectorField, &binaryVectorField, &float16VectorField, &bfloat16VectorField, + }, + EnableDynamicField: true, + } + err, rows := checkAndSetData(string(body), collectionSchema) + assert.Equal(t, nil, err) + for _, row := range rows { + assert.Equal(t, 1, len(row[binaryVector].([]byte))) + assert.Equal(t, 4, len(row[float16Vector].([]byte))) + assert.Equal(t, 4, len(row[bfloat16Vector].([]byte))) + } + data, err := anyToColumns(rows, collectionSchema) + assert.Equal(t, nil, err) + assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) + + row1[bfloat16Vector] = []int64{99999999, -99999999} + body, _ = wrapRequestBody([]map[string]interface{}{row1}) + err, _ = checkAndSetData(string(body), collectionSchema) + assert.Error(t, err) + row1[float16Vector] = []int64{99999999, -99999999} + body, _ = wrapRequestBody([]map[string]interface{}{row1}) + err, _ = checkAndSetData(string(body), collectionSchema) + assert.Error(t, err) + row1[binaryVector] = []int64{99999999, -99999999} + body, _ = wrapRequestBody([]map[string]interface{}{row1}) + err, _ = checkAndSetData(string(body), collectionSchema) + assert.Error(t, err) + row1[floatVector] = []float64{math.MaxFloat64, 0} + body, _ = wrapRequestBody([]map[string]interface{}{row1}) + err, _ = checkAndSetData(string(body), collectionSchema) + assert.Error(t, err) } func TestBuildQueryResps(t *testing.T) { outputFields := []string{"XXX", "YYY"} outputFieldsList := [][]string{outputFields, {"$meta"}, {"$meta", FieldBookID, FieldBookIntro, "YYY"}} for _, theOutputFields := range outputFieldsList { - rows, err := buildQueryResp(int64(0), theOutputFields, newFieldData(generateFieldData(), schemapb.DataType_None), generateIds(schemapb.DataType_Int64, 3), []float32{0.01, 0.04, 0.09}, true) + rows, err := buildQueryResp(int64(0), theOutputFields, newFieldData(generateFieldData(), schemapb.DataType_None), generateIds(schemapb.DataType_Int64, 3), DefaultScores, true) assert.Equal(t, nil, err) exceptRows := newSearchResult(generateSearchResult(schemapb.DataType_Int64)) assert.Equal(t, true, compareRows(rows, exceptRows, compareRow)) } dataTypes := []schemapb.DataType{ - schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, + schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_Bool, schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32, schemapb.DataType_Float, schemapb.DataType_Double, schemapb.DataType_String, schemapb.DataType_VarChar, schemapb.DataType_JSON, schemapb.DataType_Array, } for _, dateType := range dataTypes { - _, err := buildQueryResp(int64(0), outputFields, newFieldData([]*schemapb.FieldData{}, dateType), generateIds(schemapb.DataType_Int64, 3), []float32{0.01, 0.04, 0.09}, true) + _, err := buildQueryResp(int64(0), outputFields, newFieldData([]*schemapb.FieldData{}, dateType), generateIds(schemapb.DataType_Int64, 3), DefaultScores, true) assert.Equal(t, nil, err) } - _, err := buildQueryResp(int64(0), outputFields, newFieldData([]*schemapb.FieldData{}, 1000), generateIds(schemapb.DataType_Int64, 3), []float32{0.01, 0.04, 0.09}, true) + _, err := buildQueryResp(int64(0), outputFields, newFieldData([]*schemapb.FieldData{}, 1000), generateIds(schemapb.DataType_Int64, 3), DefaultScores, true) assert.Equal(t, "the type(1000) of field(wrong-field-type) is not supported, use other sdk please", err.Error()) - res, err := buildQueryResp(int64(0), outputFields, []*schemapb.FieldData{}, generateIds(schemapb.DataType_Int64, 3), []float32{0.01, 0.04, 0.09}, true) + res, err := buildQueryResp(int64(0), outputFields, []*schemapb.FieldData{}, generateIds(schemapb.DataType_Int64, 3), DefaultScores, true) assert.Equal(t, 3, len(res)) assert.Equal(t, nil, err) - res, err = buildQueryResp(int64(0), outputFields, []*schemapb.FieldData{}, generateIds(schemapb.DataType_Int64, 3), []float32{0.01, 0.04, 0.09}, false) + res, err = buildQueryResp(int64(0), outputFields, []*schemapb.FieldData{}, generateIds(schemapb.DataType_Int64, 3), DefaultScores, false) assert.Equal(t, 3, len(res)) assert.Equal(t, nil, err) - res, err = buildQueryResp(int64(0), outputFields, []*schemapb.FieldData{}, generateIds(schemapb.DataType_VarChar, 3), []float32{0.01, 0.04, 0.09}, true) + res, err = buildQueryResp(int64(0), outputFields, []*schemapb.FieldData{}, generateIds(schemapb.DataType_VarChar, 3), DefaultScores, true) assert.Equal(t, 3, len(res)) assert.Equal(t, nil, err) - _, err = buildQueryResp(int64(0), outputFields, generateFieldData(), generateIds(schemapb.DataType_Int64, 3), []float32{0.01, 0.04, 0.09}, false) + _, err = buildQueryResp(int64(0), outputFields, generateFieldData(), generateIds(schemapb.DataType_Int64, 3), DefaultScores, false) assert.Equal(t, nil, err) // len(rows) != len(scores), didn't show distance