enhance: unify data generation test APIs (#32955)

Issue: #22837

Signed-off-by: Cai Yudong <yudong.cai@zilliz.com>
pull/33068/head
Cai Yudong 2024-05-14 14:33:33 +08:00 committed by GitHub
parent 96489b814d
commit 4fc7915c70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 1092 additions and 1464 deletions

View File

@ -18,13 +18,9 @@ package proxy
import (
"context"
"encoding/binary"
"math"
"math/rand"
"sync"
"time"
"github.com/x448/float16"
"google.golang.org/grpc"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
@ -36,6 +32,7 @@ import (
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/uniquegenerator"
)
@ -346,418 +343,28 @@ func newSimpleMockMsgStreamFactory() *simpleMockMsgStreamFactory {
}
func generateFieldData(dataType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData {
fieldData := &schemapb.FieldData{
Type: dataType,
FieldName: fieldName,
if dataType < 100 {
return testutils.GenerateScalarFieldData(dataType, fieldName, numRows)
}
switch dataType {
case schemapb.DataType_Bool:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: generateBoolArray(numRows),
},
},
},
}
case schemapb.DataType_Int32:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(numRows),
},
},
},
}
case schemapb.DataType_Int64:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: generateInt64Array(numRows),
},
},
},
}
case schemapb.DataType_Float:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: generateFloat32Array(numRows),
},
},
},
}
case schemapb.DataType_Double:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: generateFloat64Array(numRows),
},
},
},
}
case schemapb.DataType_VarChar:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: generateVarCharArray(numRows, maxTestStringLen),
},
},
},
}
case schemapb.DataType_FloatVector:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(testVecDim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(numRows, testVecDim),
},
},
},
}
case schemapb.DataType_BinaryVector:
fieldData.FieldName = fieldName
fieldData.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(testVecDim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: generateBinaryVectors(numRows, testVecDim),
},
},
}
default:
// TODO::
}
return fieldData
}
func generateBoolArray(numRows int) []bool {
ret := make([]bool, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Int()%2 == 0)
}
return ret
}
func generateInt8Array(numRows int) []int8 {
ret := make([]int8, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int8(rand.Int()))
}
return ret
}
func generateInt16Array(numRows int) []int16 {
ret := make([]int16, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int16(rand.Int()))
}
return ret
}
func generateInt32Array(numRows int) []int32 {
ret := make([]int32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int32(rand.Int()))
}
return ret
}
func generateInt64Array(numRows int) []int64 {
ret := make([]int64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int64(rand.Int()))
}
return ret
}
func generateUint64Array(numRows int) []uint64 {
ret := make([]uint64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Uint64())
}
return ret
}
func generateFloat32Array(numRows int) []float32 {
ret := make([]float32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateFloat64Array(numRows int) []float64 {
ret := make([]float64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float64())
}
return ret
}
func generateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func generateFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
v := float16.Fromfloat32(rand.Float32()).Bits()
binary.LittleEndian.PutUint16(ret[i*2:], v)
}
return ret
}
func generateBFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
f := rand.Float32()
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, len(ret16)*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func generateBFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
var f float32
if i%2 == 0 {
f = float32(math.NaN())
} else {
f = float32(math.Inf(1))
}
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, len(ret16)*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func generateFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
if i%2 == 0 {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.Inf(1)))
} else {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.NaN()))
}
}
return ret
}
func generateVarCharArray(numRows int, maxLen int) []string {
ret := make([]string, numRows)
for i := 0; i < numRows; i++ {
ret[i] = funcutil.RandomString(rand.Intn(maxLen))
}
return ret
return testutils.GenerateVectorFieldData(dataType, fieldName, numRows, testVecDim)
}
func newScalarFieldData(fieldSchema *schemapb.FieldSchema, fieldName string, numRows int) *schemapb.FieldData {
ret := &schemapb.FieldData{
Type: fieldSchema.DataType,
FieldName: fieldName,
Field: nil,
}
switch fieldSchema.DataType {
case schemapb.DataType_Bool:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: generateBoolArray(numRows),
},
},
},
}
case schemapb.DataType_Int8:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(numRows),
},
},
},
}
case schemapb.DataType_Int16:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(numRows),
},
},
},
}
case schemapb.DataType_Int32:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(numRows),
},
},
},
}
case schemapb.DataType_Int64:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: generateInt64Array(numRows),
},
},
},
}
case schemapb.DataType_Float:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: generateFloat32Array(numRows),
},
},
},
}
case schemapb.DataType_Double:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: generateFloat64Array(numRows),
},
},
},
}
case schemapb.DataType_VarChar:
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: generateVarCharArray(numRows, testMaxVarCharLength),
},
},
},
}
}
return ret
return testutils.GenerateScalarFieldData(fieldSchema.GetDataType(), fieldName, numRows)
}
func newFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(numRows, dim),
},
},
},
},
}
return testutils.NewFloatVectorFieldData(fieldName, numRows, dim)
}
func newBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: generateBinaryVectors(numRows, dim),
},
},
},
}
return testutils.NewBinaryVectorFieldData(fieldName, numRows, dim)
}
func newFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: generateFloat16Vectors(numRows, dim),
},
},
},
}
return testutils.NewFloat16VectorFieldData(fieldName, numRows, dim)
}
func newBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: generateBFloat16Vectors(numRows, dim),
},
},
},
}
}
func generateHashKeys(numRows int) []uint32 {
ret := make([]uint32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Uint32())
}
return ret
return testutils.NewBFloat16VectorFieldData(fieldName, numRows, dim)
}

View File

@ -32,11 +32,12 @@ import (
"github.com/milvus-io/milvus/pkg/mq/msgstream"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
)
func TestRepackInsertData(t *testing.T) {
nb := 10
hash := generateHashKeys(nb)
hash := testutils.GenerateHashKeys(nb)
prefix := "TestRepackInsertData"
dbName := ""
collectionName := prefix + funcutil.GenRandomStr()
@ -143,7 +144,7 @@ func TestRepackInsertData(t *testing.T) {
func TestRepackInsertDataWithPartitionKey(t *testing.T) {
nb := 10
hash := generateHashKeys(nb)
hash := testutils.GenerateHashKeys(nb)
prefix := "TestRepackInsertData"
collectionName := prefix + funcutil.GenRandomStr()

View File

@ -73,6 +73,7 @@ import (
"github.com/milvus-io/milvus/pkg/util/metric"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -599,7 +600,7 @@ func TestProxy(t *testing.T) {
constructCollectionInsertRequest := func() *milvuspb.InsertRequest {
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim)
hashKeys := generateHashKeys(rowNum)
hashKeys := testutils.GenerateHashKeys(rowNum)
return &milvuspb.InsertRequest{
Base: nil,
DbName: dbName,
@ -614,7 +615,7 @@ func TestProxy(t *testing.T) {
constructPartitionInsertRequest := func() *milvuspb.InsertRequest {
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim)
hashKeys := generateHashKeys(rowNum)
hashKeys := testutils.GenerateHashKeys(rowNum)
return &milvuspb.InsertRequest{
Base: nil,
DbName: dbName,
@ -629,7 +630,7 @@ func TestProxy(t *testing.T) {
constructCollectionUpsertRequest := func() *milvuspb.UpsertRequest {
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim)
hashKeys := generateHashKeys(rowNum)
hashKeys := testutils.GenerateHashKeys(rowNum)
return &milvuspb.UpsertRequest{
Base: nil,
DbName: dbName,
@ -1811,7 +1812,7 @@ func TestProxy(t *testing.T) {
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(nq, dim),
Data: testutils.GenerateFloatVectors(nq, dim),
},
},
},
@ -1824,7 +1825,7 @@ func TestProxy(t *testing.T) {
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(nq, dim),
Data: testutils.GenerateFloatVectors(nq, dim),
},
},
},
@ -3723,7 +3724,7 @@ func TestProxy(t *testing.T) {
pkFieldData := newScalarFieldData(schema.Fields[0], int64Field, rowNum)
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim)
hashKeys := generateHashKeys(rowNum)
hashKeys := testutils.GenerateHashKeys(rowNum)
return &milvuspb.UpsertRequest{
Base: nil,
DbName: dbName,
@ -3739,7 +3740,7 @@ func TestProxy(t *testing.T) {
pkFieldData := newScalarFieldData(schema.Fields[0], int64Field, rowNum)
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim)
hashKeys := generateHashKeys(rowNum)
hashKeys := testutils.GenerateHashKeys(rowNum)
return &milvuspb.UpsertRequest{
Base: nil,
DbName: dbName,
@ -3755,7 +3756,7 @@ func TestProxy(t *testing.T) {
pkFieldData := newScalarFieldData(schema.Fields[0], int64Field, rowNum)
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim)
hashKeys := generateHashKeys(rowNum)
hashKeys := testutils.GenerateHashKeys(rowNum)
return &milvuspb.UpsertRequest{
Base: nil,
DbName: dbName,

View File

@ -13,6 +13,7 @@ import (
"github.com/milvus-io/milvus/pkg/mq/msgstream"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
)
func TestInsertTask_CheckAligned(t *testing.T) {
@ -56,8 +57,8 @@ func TestInsertTask_CheckAligned(t *testing.T) {
MsgType: commonpb.MsgType_Insert,
},
Version: msgpb.InsertDataVersion_ColumnBased,
RowIDs: generateInt64Array(numRows),
Timestamps: generateUint64Array(numRows),
RowIDs: testutils.GenerateInt64Array(numRows),
Timestamps: testutils.GenerateUint64Array(numRows),
},
},
schema: &schemapb.CollectionSchema{

View File

@ -39,6 +39,7 @@ import (
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -195,7 +196,7 @@ func TestQueryTask_all(t *testing.T) {
Status: merr.Success(),
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{Data: generateInt64Array(hitNum)},
IntId: &schemapb.LongArray{Data: testutils.GenerateInt64Array(hitNum)},
},
},
}

View File

@ -49,6 +49,7 @@ import (
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metric"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/typeutil"
"github.com/milvus-io/milvus/pkg/util/uniquegenerator"
@ -1680,7 +1681,7 @@ func TestTask_Int64PrimaryKey(t *testing.T) {
defer segAllocator.Close()
t.Run("insert", func(t *testing.T) {
hash := generateHashKeys(nb)
hash := testutils.GenerateHashKeys(nb)
task := &insertTask{
insertMsg: &BaseInsertTask{
BaseMsg: msgstream.BaseMsg{
@ -1874,7 +1875,7 @@ func TestTask_VarCharPrimaryKey(t *testing.T) {
defer segAllocator.Close()
t.Run("insert", func(t *testing.T) {
hash := generateHashKeys(nb)
hash := testutils.GenerateHashKeys(nb)
task := &insertTask{
insertMsg: &BaseInsertTask{
BaseMsg: msgstream.BaseMsg{
@ -1929,7 +1930,7 @@ func TestTask_VarCharPrimaryKey(t *testing.T) {
})
t.Run("upsert", func(t *testing.T) {
hash := generateHashKeys(nb)
hash := testutils.GenerateHashKeys(nb)
task := &upsertTask{
upsertMsg: &msgstream.UpsertMsg{
InsertMsg: &BaseInsertTask{
@ -3339,7 +3340,7 @@ func TestPartitionKey(t *testing.T) {
})
t.Run("Upsert", func(t *testing.T) {
hash := generateHashKeys(nb)
hash := testutils.GenerateHashKeys(nb)
ut := &upsertTask{
ctx: ctx,
Condition: NewTaskCondition(ctx),

View File

@ -28,6 +28,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/mq/msgstream"
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
"github.com/milvus-io/milvus/pkg/util/testutils"
)
func TestUpsertTask_CheckAligned(t *testing.T) {
@ -96,8 +97,8 @@ func TestUpsertTask_CheckAligned(t *testing.T) {
NumRows: uint32(numRows),
FieldsData: []*schemapb.FieldData{},
},
rowIDs: generateInt64Array(numRows),
timestamps: generateUint64Array(numRows),
rowIDs: testutils.GenerateInt64Array(numRows),
timestamps: testutils.GenerateUint64Array(numRows),
schema: schema,
upsertMsg: &msgstream.UpsertMsg{
InsertMsg: &msgstream.InsertMsg{

View File

@ -12,6 +12,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -291,8 +292,8 @@ func Test_validateUtil_checkFloatVectorFieldData(t *testing.T) {
func Test_validateUtil_checkFloat16VectorFieldData(t *testing.T) {
nb := 5
dim := int64(8)
data := generateFloat16Vectors(nb, int(dim))
invalidData := generateFloat16VectorsWithInvalidData(nb, int(dim))
data := testutils.GenerateFloat16Vectors(nb, int(dim))
invalidData := testutils.GenerateFloat16VectorsWithInvalidData(nb, int(dim))
t.Run("not float16 vector", func(t *testing.T) {
f := &schemapb.FieldData{}
@ -382,8 +383,8 @@ func Test_validateUtil_checkFloat16VectorFieldData(t *testing.T) {
func Test_validateUtil_checkBfloatVectorFieldData(t *testing.T) {
nb := 5
dim := int64(8)
data := generateFloat16Vectors(nb, int(dim))
invalidData := generateBFloat16VectorsWithInvalidData(nb, int(dim))
data := testutils.GenerateFloat16Vectors(nb, int(dim))
invalidData := testutils.GenerateBFloat16VectorsWithInvalidData(nb, int(dim))
t.Run("not float vector", func(t *testing.T) {
f := &schemapb.FieldData{}
v := newValidateUtil()
@ -1196,7 +1197,7 @@ func Test_validateUtil_checkAligned(t *testing.T) {
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(10, 8),
Data: testutils.GenerateFloatVectors(10, 8),
},
},
Dim: 8,
@ -1209,7 +1210,7 @@ func Test_validateUtil_checkAligned(t *testing.T) {
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: generateBinaryVectors(10, 8),
BinaryVector: testutils.GenerateBinaryVectors(10, 8),
},
Dim: 8,
},
@ -1222,7 +1223,7 @@ func Test_validateUtil_checkAligned(t *testing.T) {
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: generateVarCharArray(10, 8),
Data: testutils.GenerateVarCharArray(10, 8),
},
},
},
@ -1356,7 +1357,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: generateBinaryVectors(2, 8),
BinaryVector: testutils.GenerateBinaryVectors(2, 8),
},
},
},
@ -1368,7 +1369,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: generateVarCharArray(2, 8),
Data: testutils.GenerateVarCharArray(2, 8),
},
},
},
@ -1380,7 +1381,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: generateFloat16Vectors(2, 8),
Float16Vector: testutils.GenerateFloat16Vectors(2, 8),
},
},
},
@ -1391,7 +1392,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: generateBFloat16Vectors(2, 8),
Bfloat16Vector: testutils.GenerateBFloat16Vectors(2, 8),
},
},
},
@ -1474,7 +1475,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(2, 1),
Data: testutils.GenerateFloatVectors(2, 1),
},
},
},
@ -1486,7 +1487,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: generateBinaryVectors(2, 8),
BinaryVector: testutils.GenerateBinaryVectors(2, 8),
},
},
},
@ -1497,7 +1498,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: generateFloat16Vectors(2, 8),
Float16Vector: testutils.GenerateFloat16Vectors(2, 8),
},
},
},
@ -1508,7 +1509,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: generateBFloat16Vectors(2, 8),
Bfloat16Vector: testutils.GenerateBFloat16Vectors(2, 8),
},
},
},
@ -2323,7 +2324,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Dim: 8,
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(2, 8),
Data: testutils.GenerateFloatVectors(2, 8),
},
},
},
@ -2336,7 +2337,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Vectors: &schemapb.VectorField{
Dim: 8,
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: generateBinaryVectors(2, 8),
BinaryVector: testutils.GenerateBinaryVectors(2, 8),
},
},
},
@ -2348,7 +2349,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: generateVarCharArray(2, 8),
Data: testutils.GenerateVarCharArray(2, 8),
},
},
},
@ -2568,7 +2569,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: generateFloat32Array(2),
Data: testutils.GenerateFloat32Array(2),
},
},
},
@ -2581,7 +2582,7 @@ func Test_validateUtil_Validate(t *testing.T) {
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: generateFloat64Array(2),
Data: testutils.GenerateFloat64Array(2),
},
},
},

View File

@ -22,9 +22,9 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/querynodev2/segments"
"github.com/milvus-io/milvus/pkg/mq/msgstream"
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
"github.com/milvus-io/milvus/pkg/util/testutils"
)
const defaultDim = 128
@ -164,9 +164,9 @@ func genFiledDataWithSchema(schema *schemapb.CollectionSchema, numRows int) []*s
fieldsData := make([]*schemapb.FieldData, 0)
for _, field := range schema.Fields {
if field.DataType < 100 {
fieldsData = append(fieldsData, segments.GenTestScalarFieldData(field.DataType, field.DataType.String(), field.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(field.DataType, field.DataType.String(), field.GetFieldID(), numRows))
} else {
fieldsData = append(fieldsData, segments.GenTestVectorFiledData(field.DataType, field.DataType.String(), field.GetFieldID(), numRows, defaultDim))
fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(field.DataType, field.DataType.String(), field.GetFieldID(), numRows, defaultDim))
}
}
return fieldsData

View File

@ -28,7 +28,6 @@ import (
"github.com/cockroachdb/errors"
"github.com/golang/protobuf/proto"
"github.com/x448/float16"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
@ -397,338 +396,6 @@ func GenTestIndexMeta(collectionID int64, schema *schemapb.CollectionSchema) *se
return &indexMeta
}
// ---------- unittest util functions ----------
// gen field data
func generateBoolArray(numRows int) []bool {
ret := make([]bool, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Int()%2 == 0)
}
return ret
}
func generateInt8Array(numRows int) []int8 {
ret := make([]int8, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int8(rand.Int()))
}
return ret
}
func generateInt16Array(numRows int) []int16 {
ret := make([]int16, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int16(rand.Int()))
}
return ret
}
func generateInt32Array(numRows int) []int32 {
ret := make([]int32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Int31())
}
return ret
}
func generateInt64Array(numRows int) []int64 {
ret := make([]int64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int64(i))
}
return ret
}
func generateFloat32Array(numRows int) []float32 {
ret := make([]float32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateStringArray(numRows int) []string {
ret := make([]string, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, strconv.Itoa(rand.Int()))
}
return ret
}
func generateArrayArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(10),
},
},
})
}
return ret
}
func generateJSONArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, []byte(fmt.Sprintf(`{"key":%d}`, i+1)))
}
return ret
}
func generateFloat64Array(numRows int) []float64 {
ret := make([]float64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float64())
}
return ret
}
func generateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func generateFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
v := float16.Fromfloat32(rand.Float32()).Bits()
binary.LittleEndian.PutUint16(ret[i*2:], v)
}
return ret
}
func generateBFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
f := rand.Float32()
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, len(ret16)*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, fieldID int64, numRows int) *schemapb.FieldData {
ret := &schemapb.FieldData{
Type: dType,
FieldName: fieldName,
Field: nil,
}
switch dType {
case schemapb.DataType_Bool:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: generateBoolArray(numRows),
},
},
},
}
case schemapb.DataType_Int8:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(numRows),
},
},
},
}
case schemapb.DataType_Int16:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(numRows),
},
},
},
}
case schemapb.DataType_Int32:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: generateInt32Array(numRows),
},
},
},
}
case schemapb.DataType_Int64:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: generateInt64Array(numRows),
},
},
},
}
case schemapb.DataType_Float:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: generateFloat32Array(numRows),
},
},
},
}
case schemapb.DataType_Double:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: generateFloat64Array(numRows),
},
},
},
}
case schemapb.DataType_VarChar:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: generateStringArray(numRows),
},
},
},
}
case schemapb.DataType_Array:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: generateArrayArray(numRows),
},
},
},
}
case schemapb.DataType_JSON:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: generateJSONArray(numRows),
},
},
},
}
default:
panic("data type not supported")
}
return ret
}
// dim is ignored for sparse
func GenTestVectorFiledData(dType schemapb.DataType, fieldName string, fieldID int64, numRows int, dim int) *schemapb.FieldData {
ret := &schemapb.FieldData{
Type: dType,
FieldName: fieldName,
Field: nil,
}
switch dType {
case schemapb.DataType_BinaryVector:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: generateBinaryVectors(numRows, dim),
},
},
}
case schemapb.DataType_FloatVector:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: generateFloatVectors(numRows, dim),
},
},
},
}
case schemapb.DataType_Float16Vector:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: generateFloat16Vectors(numRows, dim),
},
},
}
case schemapb.DataType_BFloat16Vector:
ret.FieldId = fieldID
ret.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: generateBFloat16Vectors(numRows, dim),
},
},
}
case schemapb.DataType_SparseFloatVector:
ret.FieldId = fieldID
sparseData := testutils.GenerateSparseFloatVectors(numRows)
ret.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: sparseData.Dim,
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &schemapb.SparseFloatArray{
Dim: sparseData.Dim,
Contents: sparseData.Contents,
},
},
},
}
default:
panic("data type not supported")
}
return ret
}
func NewTestChunkManagerFactory(params *paramtable.ComponentParam, rootPath string) *storage.ChunkManagerFactory {
return storage.NewChunkManagerFactory("minio",
storage.RootPath(rootPath),
@ -854,67 +521,67 @@ func genInsertData(msgLength int, schema *schemapb.CollectionSchema) (*storage.I
switch f.DataType {
case schemapb.DataType_Bool:
insertData.Data[f.FieldID] = &storage.BoolFieldData{
Data: generateBoolArray(msgLength),
Data: testutils.GenerateBoolArray(msgLength),
}
case schemapb.DataType_Int8:
insertData.Data[f.FieldID] = &storage.Int8FieldData{
Data: generateInt8Array(msgLength),
Data: testutils.GenerateInt8Array(msgLength),
}
case schemapb.DataType_Int16:
insertData.Data[f.FieldID] = &storage.Int16FieldData{
Data: generateInt16Array(msgLength),
Data: testutils.GenerateInt16Array(msgLength),
}
case schemapb.DataType_Int32:
insertData.Data[f.FieldID] = &storage.Int32FieldData{
Data: generateInt32Array(msgLength),
Data: testutils.GenerateInt32Array(msgLength),
}
case schemapb.DataType_Int64:
insertData.Data[f.FieldID] = &storage.Int64FieldData{
Data: generateInt64Array(msgLength),
Data: testutils.GenerateInt64Array(msgLength),
}
case schemapb.DataType_Float:
insertData.Data[f.FieldID] = &storage.FloatFieldData{
Data: generateFloat32Array(msgLength),
Data: testutils.GenerateFloat32Array(msgLength),
}
case schemapb.DataType_Double:
insertData.Data[f.FieldID] = &storage.DoubleFieldData{
Data: generateFloat64Array(msgLength),
Data: testutils.GenerateFloat64Array(msgLength),
}
case schemapb.DataType_String, schemapb.DataType_VarChar:
insertData.Data[f.FieldID] = &storage.StringFieldData{
Data: generateStringArray(msgLength),
Data: testutils.GenerateStringArray(msgLength),
}
case schemapb.DataType_Array:
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
ElementType: schemapb.DataType_Int32,
Data: generateArrayArray(msgLength),
Data: testutils.GenerateArrayOfIntArray(msgLength),
}
case schemapb.DataType_JSON:
insertData.Data[f.FieldID] = &storage.JSONFieldData{
Data: generateJSONArray(msgLength),
Data: testutils.GenerateJSONArray(msgLength),
}
case schemapb.DataType_FloatVector:
dim := simpleFloatVecField.dim // if no dim specified, use simpleFloatVecField's dim
insertData.Data[f.FieldID] = &storage.FloatVectorFieldData{
Data: generateFloatVectors(msgLength, dim),
Data: testutils.GenerateFloatVectors(msgLength, dim),
Dim: dim,
}
case schemapb.DataType_Float16Vector:
dim := simpleFloat16VecField.dim
insertData.Data[f.FieldID] = &storage.Float16VectorFieldData{
Data: generateFloat16Vectors(msgLength, dim),
Data: testutils.GenerateFloat16Vectors(msgLength, dim),
Dim: dim,
}
case schemapb.DataType_BFloat16Vector:
dim := simpleFloat16VecField.dim
insertData.Data[f.FieldID] = &storage.BFloat16VectorFieldData{
Data: generateBFloat16Vectors(msgLength, dim),
Data: testutils.GenerateBFloat16Vectors(msgLength, dim),
Dim: dim,
}
case schemapb.DataType_BinaryVector:
dim := simpleBinVecField.dim
insertData.Data[f.FieldID] = &storage.BinaryVectorFieldData{
Data: generateBinaryVectors(msgLength, dim),
Data: testutils.GenerateBinaryVectors(msgLength, dim),
Dim: dim,
}
case schemapb.DataType_SparseFloatVector:
@ -929,7 +596,7 @@ func genInsertData(msgLength int, schema *schemapb.CollectionSchema) (*storage.I
}
// set data for rowID field
insertData.Data[rowIDFieldID] = &storage.Int64FieldData{
Data: generateInt64Array(msgLength),
Data: testutils.GenerateInt64Array(msgLength),
}
// set data for ts field
insertData.Data[timestampFieldID] = &storage.Int64FieldData{
@ -1018,13 +685,13 @@ func GenAndSaveIndexV2(collectionID, partitionID, segmentID, buildID int64,
var dataset *indexcgowrapper.Dataset
switch fieldSchema.DataType {
case schemapb.DataType_BinaryVector:
dataset = indexcgowrapper.GenBinaryVecDataset(generateBinaryVectors(msgLength, defaultDim))
dataset = indexcgowrapper.GenBinaryVecDataset(testutils.GenerateBinaryVectors(msgLength, defaultDim))
case schemapb.DataType_FloatVector:
dataset = indexcgowrapper.GenFloatVecDataset(generateFloatVectors(msgLength, defaultDim))
dataset = indexcgowrapper.GenFloatVecDataset(testutils.GenerateFloatVectors(msgLength, defaultDim))
case schemapb.DataType_Float16Vector:
dataset = indexcgowrapper.GenFloat16VecDataset(generateFloat16Vectors(msgLength, defaultDim))
dataset = indexcgowrapper.GenFloat16VecDataset(testutils.GenerateFloat16Vectors(msgLength, defaultDim))
case schemapb.DataType_BFloat16Vector:
dataset = indexcgowrapper.GenBFloat16VecDataset(generateBFloat16Vectors(msgLength, defaultDim))
dataset = indexcgowrapper.GenBFloat16VecDataset(testutils.GenerateBFloat16Vectors(msgLength, defaultDim))
case schemapb.DataType_SparseFloatVector:
data := testutils.GenerateSparseFloatVectors(msgLength)
dataset = indexcgowrapper.GenSparseFloatVecDataset(&storage.SparseFloatVectorFieldData{
@ -1091,7 +758,7 @@ func GenAndSaveIndex(collectionID, partitionID, segmentID, fieldID int64, msgLen
}
defer index.Delete()
err = index.Build(indexcgowrapper.GenFloatVecDataset(generateFloatVectors(msgLength, defaultDim)))
err = index.Build(indexcgowrapper.GenFloatVecDataset(testutils.GenerateFloatVectors(msgLength, defaultDim)))
if err != nil {
return nil, err
}
@ -1400,39 +1067,39 @@ func genInsertMsg(collection *Collection, partitionID, segment int64, numRows in
for _, f := range collection.Schema().Fields {
switch f.DataType {
case schemapb.DataType_Bool:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleBoolField.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleBoolField.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_Int8:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt8Field.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt8Field.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_Int16:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt16Field.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt16Field.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_Int32:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt32Field.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt32Field.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_Int64:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt64Field.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt64Field.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_Float:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleFloatField.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleFloatField.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_Double:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleDoubleField.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleDoubleField.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_VarChar:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleVarCharField.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleVarCharField.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_Array:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleArrayField.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleArrayField.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_JSON:
fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleJSONField.fieldName, f.GetFieldID(), numRows))
fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleJSONField.fieldName, f.GetFieldID(), numRows))
case schemapb.DataType_FloatVector:
dim := simpleFloatVecField.dim // if no dim specified, use simpleFloatVecField's dim
fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim))
fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim))
case schemapb.DataType_BinaryVector:
dim := simpleBinVecField.dim // if no dim specified, use simpleFloatVecField's dim
fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim))
fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim))
case schemapb.DataType_Float16Vector:
dim := simpleFloat16VecField.dim // if no dim specified, use simpleFloatVecField's dim
fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim))
fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim))
case schemapb.DataType_BFloat16Vector:
dim := simpleBFloat16VecField.dim // if no dim specified, use simpleFloatVecField's dim
fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim))
fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim))
case schemapb.DataType_SparseFloatVector:
fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, 0))
fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, 0))
default:
err := errors.New("data type not supported")
return nil, err
@ -1544,192 +1211,10 @@ func genSimpleRetrievePlanExpr(schema *schemapb.CollectionSchema) ([]byte, error
}
func genFieldData(fieldName string, fieldID int64, fieldType schemapb.DataType, fieldValue interface{}, dim int64) *schemapb.FieldData {
var fieldData *schemapb.FieldData
switch fieldType {
case schemapb.DataType_Bool:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_Bool,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: fieldValue.([]bool),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_Int32:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_Int32,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: fieldValue.([]int32),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_Int64:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: fieldValue.([]int64),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_Float:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_Float,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_Double:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_Double,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: fieldValue.([]float64),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_VarChar:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: fieldValue.([]string),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_BinaryVector:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: dim,
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: fieldValue.([]byte),
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_FloatVector:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: dim,
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_Float16Vector:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: dim,
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: fieldValue.([]byte),
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_BFloat16Vector:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: dim,
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: fieldValue.([]byte),
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_JSON:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_JSON,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: fieldValue.([][]byte),
},
},
},
},
FieldId: fieldID,
}
case schemapb.DataType_Array:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: fieldValue.([]*schemapb.ScalarField),
},
},
},
},
FieldId: fieldID,
}
default:
log.Error("not supported field type", zap.String("field type", fieldType.String()))
if fieldType < 100 {
return testutils.GenerateScalarFieldDataWithValue(fieldType, fieldName, fieldID, fieldValue)
}
return fieldData
return testutils.GenerateVectorFieldDataWithValue(fieldType, fieldName, fieldID, fieldValue, int(dim))
}
func genSearchResultData(nq int64, topk int64, ids []int64, scores []float32, topks []int64) *schemapb.SearchResultData {

View File

@ -36,6 +36,7 @@ import (
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -128,7 +129,7 @@ func (suite *ReduceSuite) TestReduceAllFunc() {
nq := int64(10)
// TODO: replace below by genPlaceholderGroup(nq)
vec := generateFloatVectors(1, defaultDim)
vec := testutils.GenerateFloatVectors(1, defaultDim)
var searchRawData []byte
for i, ele := range vec {
buf := make([]byte, 4)

View File

@ -20,7 +20,6 @@ import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"math/rand"
"strconv"
"testing"
@ -435,93 +434,6 @@ func genAllFieldsSchema(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse
return schema, pkFieldID, fieldIDs
}
func generateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func generateFloat16Vectors(numRows, dim int) []byte {
total := (numRows * dim) * 2
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func generateBFloat16Vectors(numRows, dim int) []byte {
total := (numRows * dim) * 2
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func generateBoolArray(numRows int) []bool {
ret := make([]bool, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Int()%2 == 0)
}
return ret
}
func generateInt32Array(numRows int) []int32 {
ret := make([]int32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int32(rand.Int()))
}
return ret
}
func generateInt64Array(numRows int) []int64 {
ret := make([]int64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int64(rand.Int()))
}
return ret
}
func generateFloat32Array(numRows int) []float32 {
ret := make([]float32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateFloat64Array(numRows int) []float64 {
ret := make([]float64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float64())
}
return ret
}
func generateBytesArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, []byte(fmt.Sprint(rand.Int())))
}
return ret
}
func generateInt32ArrayList(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
@ -546,22 +458,22 @@ func genRowWithAllFields(fVecDim, bVecDim, f16VecDim, bf16VecDim int) (blob *com
var buffer bytes.Buffer
switch field.DataType {
case schemapb.DataType_FloatVector:
fVec := generateFloatVectors(1, fVecDim)
fVec := testutils.GenerateFloatVectors(1, fVecDim)
_ = binary.Write(&buffer, common.Endian, fVec)
ret.Value = append(ret.Value, buffer.Bytes()...)
row = append(row, fVec)
case schemapb.DataType_BinaryVector:
bVec := generateBinaryVectors(1, bVecDim)
bVec := testutils.GenerateBinaryVectors(1, bVecDim)
_ = binary.Write(&buffer, common.Endian, bVec)
ret.Value = append(ret.Value, buffer.Bytes()...)
row = append(row, bVec)
case schemapb.DataType_Float16Vector:
f16Vec := generateFloat16Vectors(1, f16VecDim)
f16Vec := testutils.GenerateFloat16Vectors(1, f16VecDim)
_ = binary.Write(&buffer, common.Endian, f16Vec)
ret.Value = append(ret.Value, buffer.Bytes()...)
row = append(row, f16Vec)
case schemapb.DataType_BFloat16Vector:
bf16Vec := generateBFloat16Vectors(1, bf16VecDim)
bf16Vec := testutils.GenerateBFloat16Vectors(1, bf16VecDim)
_ = binary.Write(&buffer, common.Endian, bf16Vec)
ret.Value = append(ret.Value, buffer.Bytes()...)
row = append(row, bf16Vec)
@ -689,7 +601,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
for idx, field := range schema.Fields {
switch field.DataType {
case schemapb.DataType_Bool:
data := generateBoolArray(numRows)
data := testutils.GenerateBoolArray(numRows)
f := &schemapb.FieldData{
Type: field.DataType,
FieldName: field.Name,
@ -709,7 +621,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], d)
}
case schemapb.DataType_Int8:
data := generateInt32Array(numRows)
data := testutils.GenerateInt32Array(numRows)
f := &schemapb.FieldData{
Type: field.DataType,
FieldName: field.Name,
@ -729,7 +641,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], int8(d))
}
case schemapb.DataType_Int16:
data := generateInt32Array(numRows)
data := testutils.GenerateInt32Array(numRows)
f := &schemapb.FieldData{
Type: field.DataType,
FieldName: field.Name,
@ -749,7 +661,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], int16(d))
}
case schemapb.DataType_Int32:
data := generateInt32Array(numRows)
data := testutils.GenerateInt32Array(numRows)
f := &schemapb.FieldData{
Type: field.DataType,
FieldName: field.Name,
@ -769,7 +681,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], d)
}
case schemapb.DataType_Int64:
data := generateInt64Array(numRows)
data := testutils.GenerateInt64Array(numRows)
f := &schemapb.FieldData{
Type: field.DataType,
FieldName: field.Name,
@ -790,7 +702,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}
pks = data
case schemapb.DataType_Float:
data := generateFloat32Array(numRows)
data := testutils.GenerateFloat32Array(numRows)
f := &schemapb.FieldData{
Type: field.DataType,
FieldName: field.Name,
@ -810,7 +722,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], d)
}
case schemapb.DataType_Double:
data := generateFloat64Array(numRows)
data := testutils.GenerateFloat64Array(numRows)
f := &schemapb.FieldData{
Type: field.DataType,
FieldName: field.Name,
@ -830,7 +742,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], d)
}
case schemapb.DataType_FloatVector:
data := generateFloatVectors(numRows, fVecDim)
data := testutils.GenerateFloatVectors(numRows, fVecDim)
f := &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: field.Name,
@ -851,7 +763,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], data[nrows*fVecDim:(nrows+1)*fVecDim])
}
case schemapb.DataType_BinaryVector:
data := generateBinaryVectors(numRows, bVecDim)
data := testutils.GenerateBinaryVectors(numRows, bVecDim)
f := &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: field.Name,
@ -870,7 +782,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], data[nrows*bVecDim/8:(nrows+1)*bVecDim/8])
}
case schemapb.DataType_Float16Vector:
data := generateFloat16Vectors(numRows, f16VecDim)
data := testutils.GenerateFloat16Vectors(numRows, f16VecDim)
f := &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: field.Name,
@ -889,7 +801,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
columns[idx] = append(columns[idx], data[nrows*f16VecDim*2:(nrows+1)*f16VecDim*2])
}
case schemapb.DataType_BFloat16Vector:
data := generateBFloat16Vectors(numRows, bf16VecDim)
data := testutils.GenerateBFloat16Vectors(numRows, bf16VecDim)
f := &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: field.Name,
@ -950,7 +862,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
}
case schemapb.DataType_JSON:
data := generateBytesArray(numRows)
data := testutils.GenerateBytesArray(numRows)
f := &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: field.GetName(),

View File

@ -1,8 +1,6 @@
package testutil
import (
rand2 "crypto/rand"
"encoding/json"
"fmt"
"math/rand"
"strconv"
@ -108,210 +106,117 @@ func CreateInsertData(schema *schemapb.CollectionSchema, rows int) (*storage.Ins
if err != nil {
return nil, err
}
for _, field := range schema.GetFields() {
if field.GetAutoID() {
for _, f := range schema.GetFields() {
if f.GetAutoID() {
continue
}
switch field.GetDataType() {
switch f.GetDataType() {
case schemapb.DataType_Bool:
boolData := make([]bool, 0)
for i := 0; i < rows; i++ {
boolData = append(boolData, i%3 != 0)
insertData.Data[f.FieldID] = &storage.BoolFieldData{
Data: testutils.GenerateBoolArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.BoolFieldData{Data: boolData}
case schemapb.DataType_Float:
floatData := make([]float32, 0)
for i := 0; i < rows; i++ {
floatData = append(floatData, float32(i/2))
}
insertData.Data[field.GetFieldID()] = &storage.FloatFieldData{Data: floatData}
case schemapb.DataType_Double:
doubleData := make([]float64, 0)
for i := 0; i < rows; i++ {
doubleData = append(doubleData, float64(i/5))
}
insertData.Data[field.GetFieldID()] = &storage.DoubleFieldData{Data: doubleData}
case schemapb.DataType_Int8:
int8Data := make([]int8, 0)
for i := 0; i < rows; i++ {
int8Data = append(int8Data, int8(i%256))
insertData.Data[f.FieldID] = &storage.Int8FieldData{
Data: testutils.GenerateInt8Array(rows),
}
insertData.Data[field.GetFieldID()] = &storage.Int8FieldData{Data: int8Data}
case schemapb.DataType_Int16:
int16Data := make([]int16, 0)
for i := 0; i < rows; i++ {
int16Data = append(int16Data, int16(i%65536))
insertData.Data[f.FieldID] = &storage.Int16FieldData{
Data: testutils.GenerateInt16Array(rows),
}
insertData.Data[field.GetFieldID()] = &storage.Int16FieldData{Data: int16Data}
case schemapb.DataType_Int32:
int32Data := make([]int32, 0)
for i := 0; i < rows; i++ {
int32Data = append(int32Data, int32(i%1000))
insertData.Data[f.FieldID] = &storage.Int32FieldData{
Data: testutils.GenerateInt32Array(rows),
}
insertData.Data[field.GetFieldID()] = &storage.Int32FieldData{Data: int32Data}
case schemapb.DataType_Int64:
int64Data := make([]int64, 0)
for i := 0; i < rows; i++ {
int64Data = append(int64Data, int64(i))
insertData.Data[f.FieldID] = &storage.Int64FieldData{
Data: testutils.GenerateInt64Array(rows),
}
case schemapb.DataType_Float:
insertData.Data[f.FieldID] = &storage.FloatFieldData{
Data: testutils.GenerateFloat32Array(rows),
}
case schemapb.DataType_Double:
insertData.Data[f.FieldID] = &storage.DoubleFieldData{
Data: testutils.GenerateFloat64Array(rows),
}
insertData.Data[field.GetFieldID()] = &storage.Int64FieldData{Data: int64Data}
case schemapb.DataType_BinaryVector:
dim, err := typeutil.GetDim(field)
dim, err := typeutil.GetDim(f)
if err != nil {
return nil, err
}
binVecData := make([]byte, 0)
total := rows * int(dim) / 8
for i := 0; i < total; i++ {
binVecData = append(binVecData, byte(i%256))
insertData.Data[f.FieldID] = &storage.BinaryVectorFieldData{
Data: testutils.GenerateBinaryVectors(rows, int(dim)),
Dim: int(dim),
}
insertData.Data[field.GetFieldID()] = &storage.BinaryVectorFieldData{Data: binVecData, Dim: int(dim)}
case schemapb.DataType_FloatVector:
dim, err := typeutil.GetDim(field)
dim, err := typeutil.GetDim(f)
if err != nil {
return nil, err
}
floatVecData := make([]float32, 0)
total := rows * int(dim)
for i := 0; i < total; i++ {
floatVecData = append(floatVecData, rand.Float32())
insertData.Data[f.GetFieldID()] = &storage.FloatVectorFieldData{
Data: testutils.GenerateFloatVectors(rows, int(dim)),
Dim: int(dim),
}
insertData.Data[field.GetFieldID()] = &storage.FloatVectorFieldData{Data: floatVecData, Dim: int(dim)}
case schemapb.DataType_Float16Vector:
dim, err := typeutil.GetDim(field)
dim, err := typeutil.GetDim(f)
if err != nil {
return nil, err
}
total := int64(rows) * dim * 2
float16VecData := make([]byte, total)
_, err = rand2.Read(float16VecData)
if err != nil {
return nil, err
insertData.Data[f.FieldID] = &storage.Float16VectorFieldData{
Data: testutils.GenerateFloat16Vectors(rows, int(dim)),
Dim: int(dim),
}
insertData.Data[field.GetFieldID()] = &storage.Float16VectorFieldData{Data: float16VecData, Dim: int(dim)}
case schemapb.DataType_BFloat16Vector:
dim, err := typeutil.GetDim(field)
dim, err := typeutil.GetDim(f)
if err != nil {
return nil, err
}
total := int64(rows) * dim * 2
bfloat16VecData := make([]byte, total)
_, err = rand2.Read(bfloat16VecData)
if err != nil {
return nil, err
insertData.Data[f.FieldID] = &storage.BFloat16VectorFieldData{
Data: testutils.GenerateBFloat16Vectors(rows, int(dim)),
Dim: int(dim),
}
insertData.Data[field.GetFieldID()] = &storage.BFloat16VectorFieldData{Data: bfloat16VecData, Dim: int(dim)}
case schemapb.DataType_SparseFloatVector:
sparseFloatVecData := testutils.GenerateSparseFloatVectors(rows)
insertData.Data[field.GetFieldID()] = &storage.SparseFloatVectorFieldData{
insertData.Data[f.FieldID] = &storage.SparseFloatVectorFieldData{
SparseFloatArray: *sparseFloatVecData,
}
case schemapb.DataType_String, schemapb.DataType_VarChar:
varcharData := make([]string, 0)
for i := 0; i < rows; i++ {
varcharData = append(varcharData, strconv.Itoa(i))
insertData.Data[f.FieldID] = &storage.StringFieldData{
Data: testutils.GenerateStringArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.StringFieldData{Data: varcharData}
case schemapb.DataType_JSON:
jsonData := make([][]byte, 0)
for i := 0; i < rows; i++ {
if i%4 == 0 {
v, _ := json.Marshal("{\"a\": \"%s\", \"b\": %d}")
jsonData = append(jsonData, v)
} else if i%4 == 1 {
v, _ := json.Marshal(i)
jsonData = append(jsonData, v)
} else if i%4 == 2 {
v, _ := json.Marshal(float32(i) * 0.1)
jsonData = append(jsonData, v)
} else if i%4 == 3 {
v, _ := json.Marshal(strconv.Itoa(i))
jsonData = append(jsonData, v)
}
insertData.Data[f.FieldID] = &storage.JSONFieldData{
Data: testutils.GenerateJSONArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.JSONFieldData{Data: jsonData}
case schemapb.DataType_Array:
arrayData := make([]*schemapb.ScalarField, 0)
switch field.GetElementType() {
switch f.GetElementType() {
case schemapb.DataType_Bool:
for i := 0; i < rows; i++ {
data := []bool{i%2 == 0, i%3 == 0, i%4 == 0}
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: data,
},
},
})
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfBoolArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData}
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
for i := 0; i < rows; i++ {
data := []int32{int32(i), int32(i + 1), int32(i + 2)}
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: data,
},
},
})
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfIntArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData}
case schemapb.DataType_Int64:
for i := 0; i < rows; i++ {
data := []int64{int64(i), int64(i + 1), int64(i + 2)}
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: data,
},
},
})
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfLongArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData}
case schemapb.DataType_Float:
for i := 0; i < rows; i++ {
data := []float32{float32(i) * 0.1, float32(i+1) * 0.1, float32(i+2) * 0.1}
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: data,
},
},
})
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfFloatArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData}
case schemapb.DataType_Double:
for i := 0; i < rows; i++ {
data := []float64{float64(i) * 0.02, float64(i+1) * 0.02, float64(i+2) * 0.02}
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: data,
},
},
})
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfDoubleArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData}
case schemapb.DataType_String, schemapb.DataType_VarChar:
for i := 0; i < rows; i++ {
data := []string{
randomString(5) + "-" + fmt.Sprintf("%d", i),
randomString(5) + "-" + fmt.Sprintf("%d", i),
randomString(5) + "-" + fmt.Sprintf("%d", i),
}
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: data,
},
},
})
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfStringArray(rows),
}
insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData}
}
default:
panic(fmt.Sprintf("unexpected data type: %s", field.GetDataType().String()))
panic(fmt.Sprintf("unsupported data type: %s", f.GetDataType().String()))
}
}
return insertData, nil

View File

@ -146,6 +146,7 @@ require (
github.com/tklauser/numcpus v0.4.0 // indirect
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
github.com/twmb/murmur3 v1.1.3 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.etcd.io/bbolt v1.3.6 // indirect

View File

@ -730,6 +730,8 @@ github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBn
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=

View File

@ -0,0 +1,912 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package testutils
import (
"encoding/binary"
"encoding/json"
"fmt"
"math"
"math/rand"
"sort"
"strconv"
"github.com/x448/float16"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
const elemCountOfArray = 10
// generate data
func GenerateBoolArray(numRows int) []bool {
ret := make([]bool, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, i%2 == 0)
}
return ret
}
func GenerateInt8Array(numRows int) []int8 {
ret := make([]int8, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int8(i))
}
return ret
}
func GenerateInt16Array(numRows int) []int16 {
ret := make([]int16, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int16(i))
}
return ret
}
func GenerateInt32Array(numRows int) []int32 {
ret := make([]int32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int32(i))
}
return ret
}
func GenerateInt64Array(numRows int) []int64 {
ret := make([]int64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int64(i))
}
return ret
}
func GenerateUint64Array(numRows int) []uint64 {
ret := make([]uint64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, uint64(i))
}
return ret
}
func GenerateFloat32Array(numRows int) []float32 {
ret := make([]float32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, float32(i))
}
return ret
}
func GenerateFloat64Array(numRows int) []float64 {
ret := make([]float64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, float64(i))
}
return ret
}
func GenerateVarCharArray(numRows int, maxLen int) []string {
ret := make([]string, numRows)
for i := 0; i < numRows; i++ {
ret[i] = funcutil.RandomString(rand.Intn(maxLen))
}
return ret
}
func GenerateStringArray(numRows int) []string {
ret := make([]string, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, strconv.Itoa(i))
}
return ret
}
func GenerateJSONArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
if i%4 == 0 {
v, _ := json.Marshal("{\"a\": \"%s\", \"b\": %d}")
ret = append(ret, v)
} else if i%4 == 1 {
v, _ := json.Marshal(i)
ret = append(ret, v)
} else if i%4 == 2 {
v, _ := json.Marshal(float32(i) * 0.1)
ret = append(ret, v)
} else if i%4 == 3 {
v, _ := json.Marshal(strconv.Itoa(i))
ret = append(ret, v)
}
}
return ret
}
func GenerateArrayOfBoolArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: GenerateBoolArray(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfIntArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfLongArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: GenerateInt64Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfFloatArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: GenerateFloat32Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfDoubleArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: GenerateFloat64Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfStringArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateStringArray(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateBytesArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, []byte(fmt.Sprint(rand.Int())))
}
return ret
}
func GenerateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func GenerateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func GenerateFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
v := float16.Fromfloat32(rand.Float32()).Bits()
binary.LittleEndian.PutUint16(ret[i*2:], v)
}
return ret
}
func GenerateBFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
f := rand.Float32()
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, len(ret16)*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func GenerateBFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
var f float32
if i%2 == 0 {
f = float32(math.NaN())
} else {
f = float32(math.Inf(1))
}
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, len(ret16)*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func GenerateFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
if i%2 == 0 {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.Inf(1)))
} else {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.NaN()))
}
}
return ret
}
func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray {
dim := 700
avgNnz := 20
var contents [][]byte
maxDim := 0
uniqueAndSort := func(indices []uint32) []uint32 {
seen := make(map[uint32]bool)
var result []uint32
for _, value := range indices {
if _, ok := seen[value]; !ok {
seen[value] = true
result = append(result, value)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i] < result[j]
})
return result
}
for i := 0; i < numRows; i++ {
nnz := rand.Intn(avgNnz*2) + 1
indices := make([]uint32, 0, nnz)
for j := 0; j < nnz; j++ {
indices = append(indices, uint32(rand.Intn(dim)))
}
indices = uniqueAndSort(indices)
values := make([]float32, 0, len(indices))
for j := 0; j < len(indices); j++ {
values = append(values, rand.Float32())
}
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
rowBytes := typeutil.CreateSparseFloatRow(indices, values)
contents = append(contents, rowBytes)
}
return &schemapb.SparseFloatArray{
Dim: int64(maxDim),
Contents: contents,
}
}
func GenerateHashKeys(numRows int) []uint32 {
ret := make([]uint32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Uint32())
}
return ret
}
// generate FieldData
func NewBoolFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Bool,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: GenerateBoolArray(numRows),
},
},
},
},
}
}
func NewBoolFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Bool,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: fieldValue.([]bool),
},
},
},
},
}
}
func NewInt8FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int8,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt16FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int16,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt32FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int32,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt32FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int32,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: fieldValue.([]int32),
},
},
},
},
}
}
func NewInt64FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: GenerateInt64Array(numRows),
},
},
},
},
}
}
func NewInt64FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: fieldValue.([]int64),
},
},
},
},
}
}
func NewFloatFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: GenerateFloat32Array(numRows),
},
},
},
},
}
}
func NewFloatFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
}
}
func NewDoubleFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Double,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: GenerateFloat64Array(numRows),
},
},
},
},
}
}
func NewDoubleFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Double,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: fieldValue.([]float64),
},
},
},
},
}
}
func NewVarCharFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateVarCharArray(numRows, 10),
},
},
},
},
}
}
func NewVarCharFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: fieldValue.([]string),
},
},
},
},
}
}
func NewStringFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_String,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateStringArray(numRows),
},
},
},
},
}
}
func NewJSONFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_JSON,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: GenerateJSONArray(numRows),
},
},
},
},
}
}
func NewJSONFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_JSON,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: fieldValue.([][]byte),
},
},
},
},
}
}
func NewArrayFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: GenerateArrayOfIntArray(numRows),
},
},
},
},
}
}
func NewArrayFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: fieldValue.([]*schemapb.ScalarField),
},
},
},
},
}
}
func NewBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: GenerateBinaryVectors(numRows, dim),
},
},
},
}
}
func NewBinaryVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: fieldValue.([]byte),
},
},
},
}
}
func NewFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: GenerateFloatVectors(numRows, dim),
},
},
},
},
}
}
func NewFloatVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
}
}
func NewFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: GenerateFloat16Vectors(numRows, dim),
},
},
},
}
}
func NewFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: fieldValue.([]byte),
},
},
},
}
}
func NewBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: GenerateBFloat16Vectors(numRows, dim),
},
},
},
}
}
func NewBFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: fieldValue.([]byte),
},
},
},
}
}
func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.FieldData {
sparseData := GenerateSparseFloatVectors(numRows)
return &schemapb.FieldData{
Type: schemapb.DataType_SparseFloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: sparseData.Dim,
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &schemapb.SparseFloatArray{
Dim: sparseData.Dim,
Contents: sparseData.Contents,
},
},
},
},
}
}
func GenerateScalarFieldData(dType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData {
switch dType {
case schemapb.DataType_Bool:
return NewBoolFieldData(fieldName, numRows)
case schemapb.DataType_Int8:
return NewInt8FieldData(fieldName, numRows)
case schemapb.DataType_Int16:
return NewInt16FieldData(fieldName, numRows)
case schemapb.DataType_Int32:
return NewInt32FieldData(fieldName, numRows)
case schemapb.DataType_Int64:
return NewInt64FieldData(fieldName, numRows)
case schemapb.DataType_Float:
return NewFloatFieldData(fieldName, numRows)
case schemapb.DataType_Double:
return NewDoubleFieldData(fieldName, numRows)
case schemapb.DataType_VarChar:
return NewVarCharFieldData(fieldName, numRows)
case schemapb.DataType_String:
return NewStringFieldData(fieldName, numRows)
case schemapb.DataType_Array:
return NewArrayFieldData(fieldName, numRows)
case schemapb.DataType_JSON:
return NewJSONFieldData(fieldName, numRows)
default:
panic("unsupported data type")
}
}
func GenerateScalarFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int) *schemapb.FieldData {
fieldData := GenerateScalarFieldData(dType, fieldName, numRows)
fieldData.FieldId = fieldID
return fieldData
}
func GenerateScalarFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}) *schemapb.FieldData {
var fieldData *schemapb.FieldData
switch dType {
case schemapb.DataType_Bool:
fieldData = NewBoolFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Int32:
fieldData = NewInt32FieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Int64:
fieldData = NewInt64FieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Float:
fieldData = NewFloatFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Double:
fieldData = NewDoubleFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_VarChar:
fieldData = NewVarCharFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Array:
fieldData = NewArrayFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_JSON:
fieldData = NewJSONFieldDataWithValue(fieldName, fieldValue)
default:
panic("unsupported data type")
}
fieldData.FieldId = fieldID
return fieldData
}
func GenerateVectorFieldData(dType schemapb.DataType, fieldName string, numRows int, dim int) *schemapb.FieldData {
switch dType {
case schemapb.DataType_BinaryVector:
return NewBinaryVectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_FloatVector:
return NewFloatVectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_Float16Vector:
return NewFloat16VectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_BFloat16Vector:
return NewBFloat16VectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_SparseFloatVector:
return NewSparseFloatVectorFieldData(fieldName, numRows)
default:
panic("unsupported data type")
}
}
func GenerateVectorFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int, dim int) *schemapb.FieldData {
fieldData := GenerateVectorFieldData(dType, fieldName, numRows, dim)
fieldData.FieldId = fieldID
return fieldData
}
func GenerateVectorFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}, dim int) *schemapb.FieldData {
var fieldData *schemapb.FieldData
switch dType {
case schemapb.DataType_BinaryVector:
fieldData = NewBinaryVectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_FloatVector:
fieldData = NewFloatVectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_Float16Vector:
fieldData = NewFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_BFloat16Vector:
fieldData = NewBFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim)
default:
panic("unsupported data type")
}
fieldData.FieldId = fieldID
return fieldData
}

View File

@ -1,70 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package testutils
import (
"math/rand"
"sort"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray {
dim := 700
avgNnz := 20
var contents [][]byte
maxDim := 0
uniqueAndSort := func(indices []uint32) []uint32 {
seen := make(map[uint32]bool)
var result []uint32
for _, value := range indices {
if _, ok := seen[value]; !ok {
seen[value] = true
result = append(result, value)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i] < result[j]
})
return result
}
for i := 0; i < numRows; i++ {
nnz := rand.Intn(avgNnz*2) + 1
indices := make([]uint32, 0, nnz)
for j := 0; j < nnz; j++ {
indices = append(indices, uint32(rand.Intn(dim)))
}
indices = uniqueAndSort(indices)
values := make([]float32, 0, len(indices))
for j := 0; j < len(indices); j++ {
values = append(values, rand.Float32())
}
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
rowBytes := typeutil.CreateSparseFloatRow(indices, values)
contents = append(contents, rowBytes)
}
return &schemapb.SparseFloatArray{
Dim: int64(maxDim),
Contents: contents,
}
}

View File

@ -18,14 +18,8 @@ package integration
import (
"context"
"encoding/binary"
"fmt"
"math"
"math/rand"
"time"
"github.com/x448/float16"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/testutils"
@ -120,97 +114,27 @@ func NewVarCharSameFieldData(fieldName string, numRows int, value string) *schem
}
func NewStringFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateStringArray(numRows),
},
},
},
},
}
return testutils.NewStringFieldData(fieldName, numRows)
}
func NewFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: GenerateFloatVectors(numRows, dim),
},
},
},
},
}
return testutils.NewFloatVectorFieldData(fieldName, numRows, dim)
}
func NewFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: GenerateFloat16Vectors(numRows, dim),
},
},
},
}
return testutils.NewFloat16VectorFieldData(fieldName, numRows, dim)
}
func NewBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: GenerateBFloat16Vectors(numRows, dim),
},
},
},
}
return testutils.NewBFloat16VectorFieldData(fieldName, numRows, dim)
}
func NewBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: GenerateBinaryVectors(numRows, dim),
},
},
},
}
return testutils.NewBinaryVectorFieldData(fieldName, numRows, dim)
}
func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.FieldData {
sparseVecs := GenerateSparseFloatArray(numRows)
return &schemapb.FieldData{
Type: schemapb.DataType_SparseFloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: sparseVecs.Dim,
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: sparseVecs,
},
},
},
}
return testutils.NewSparseFloatVectorFieldData(fieldName, numRows)
}
func GenerateInt64Array(numRows int, start int64) []int64 {
@ -237,68 +161,10 @@ func GenerateSameStringArray(numRows int, value string) []string {
return ret
}
func GenerateStringArray(numRows int) []string {
ret := make([]string, numRows)
for i := 0; i < numRows; i++ {
ret[i] = fmt.Sprintf("%d", i)
}
return ret
}
func GenerateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func GenerateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func GenerateFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
v := float16.Fromfloat32(rand.Float32()).Bits()
binary.LittleEndian.PutUint16(ret[i*2:], v)
}
return ret
}
func GenerateBFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
f := rand.Float32()
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, total*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func GenerateSparseFloatArray(numRows int) *schemapb.SparseFloatArray {
return testutils.GenerateSparseFloatVectors(numRows)
}
func GenerateHashKeys(numRows int) []uint32 {
ret := make([]uint32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Uint32())
}
return ret
return testutils.GenerateHashKeys(numRows)
}