enhance: add helpers to parse sparse float vector in JSON (#32543)

issue: #29419

added helper functions to parse JSON representation of sparse float
vectors, will be used by both the restful server and the import utils.

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
pull/32605/head
Buqian Zheng 2024-04-25 14:47:24 +08:00 committed by GitHub
parent b287fbaa2e
commit 8a1017a152
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 287 additions and 111 deletions

View File

@ -43,8 +43,8 @@ import (
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
var compactTestDir = "/tmp/milvus_test/compact"
@ -110,8 +110,8 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
{false, schemapb.DataType_SparseFloatVector, []interface{}{nil, nil}, "invalid sparsefloatvector"},
{false, schemapb.DataType_SparseFloatVector, []interface{}{[]byte{255}, []byte{15}}, "invalid sparsefloatvector"},
{true, schemapb.DataType_SparseFloatVector, []interface{}{
testutils.CreateSparseFloatRow([]uint32{1, 2}, []float32{1.0, 2.0}),
testutils.CreateSparseFloatRow([]uint32{3, 4}, []float32{1.0, 2.0}),
typeutil.CreateSparseFloatRow([]uint32{1, 2}, []float32{1.0, 2.0}),
typeutil.CreateSparseFloatRow([]uint32{3, 4}, []float32{1.0, 2.0}),
}, "valid sparsefloatvector"},
}

View File

@ -26,7 +26,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func generateTestSchema() *schemapb.CollectionSchema {
@ -128,7 +128,7 @@ func generateTestData(num int) ([]*Blob, error) {
field104 = append(field104, f104...)
field105 = append(field105, f104...)
field106 = append(field106, testutils.CreateSparseFloatRow([]uint32{0, uint32(18 * i), uint32(284 * i)}, []float32{1.1, 0.3, 2.4}))
field106 = append(field106, typeutil.CreateSparseFloatRow([]uint32{0, uint32(18 * i), uint32(284 * i)}, []float32{1.1, 0.3, 2.4}))
}
data := &InsertData{Data: map[FieldID]FieldData{
@ -186,7 +186,7 @@ func assertTestData(t *testing.T, i int, value *Value) {
f104[j] = byte(i)
}
f106 := testutils.CreateSparseFloatRow([]uint32{0, uint32(18 * i), uint32(284 * i)}, []float32{1.1, 0.3, 2.4})
f106 := typeutil.CreateSparseFloatRow([]uint32{0, uint32(18 * i), uint32(284 * i)}, []float32{1.1, 0.3, 2.4})
assert.EqualValues(t, &Value{
int64(i),

View File

@ -30,7 +30,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
const (
@ -279,9 +279,9 @@ func TestInsertCodec(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
},
},
@ -342,9 +342,9 @@ func TestInsertCodec(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 300,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{5, 6, 7}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{15, 26, 37}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{105, 207, 299}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{5, 6, 7}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{15, 26, 37}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{105, 207, 299}, []float32{3.1, 3.2, 3.3}),
},
},
},
@ -453,12 +453,12 @@ func TestInsertCodec(t *testing.T) {
// merged dim should be max of all dims
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{5, 6, 7}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{15, 26, 37}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{105, 207, 299}, []float32{3.1, 3.2, 3.3}),
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{5, 6, 7}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{15, 26, 37}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{105, 207, 299}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
}, resultData.Data[SparseFloatVectorField].(*SparseFloatVectorFieldData).SparseFloatArray)

View File

@ -24,7 +24,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func TestDataSorter(t *testing.T) {
@ -200,9 +200,9 @@ func TestDataSorter(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
},
},
@ -273,9 +273,9 @@ func TestDataSorter(t *testing.T) {
assert.Equal(t, schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
},
}, dataSorter.InsertData.Data[112].(*SparseFloatVectorFieldData).SparseFloatArray)
}

View File

@ -9,7 +9,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func TestInsertDataSuite(t *testing.T) {
@ -202,7 +202,7 @@ func (s *InsertDataSuite) SetupTest() {
FloatVectorField: []float32{4, 5, 6, 7},
Float16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255},
BFloat16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255},
SparseFloatVectorField: testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{4, 5, 6}),
SparseFloatVectorField: typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{4, 5, 6}),
ArrayField: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}},
@ -235,7 +235,7 @@ func (s *InsertDataSuite) SetupTest() {
FloatVectorField: []float32{4, 5, 6, 7},
Float16VectorField: []byte{1, 2, 3, 4, 5, 6, 7, 8},
BFloat16VectorField: []byte{1, 2, 3, 4, 5, 6, 7, 8},
SparseFloatVectorField: testutils.CreateSparseFloatRow([]uint32{2, 3, 4}, []float32{4, 5, 6}),
SparseFloatVectorField: typeutil.CreateSparseFloatRow([]uint32{2, 3, 4}, []float32{4, 5, 6}),
ArrayField: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}},

View File

@ -26,7 +26,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func TestPayload_ReaderAndWriter(t *testing.T) {
@ -629,9 +629,9 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
},
})
@ -640,9 +640,9 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
})
@ -672,12 +672,12 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
// merged dim should be max of all dims
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
}, floatVecs.SparseFloatArray)
@ -735,14 +735,14 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
t.Run("TestSparseFloatVector_emptyRow", func(t *testing.T) {
testSparseOneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{}, []float32{}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
}, 600)
testSparseOneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
typeutil.CreateSparseFloatRow([]uint32{}, []float32{}),
typeutil.CreateSparseFloatRow([]uint32{}, []float32{}),
typeutil.CreateSparseFloatRow([]uint32{}, []float32{}),
}, 0)
})
@ -757,17 +757,17 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
}
dim := int(indices[nnz-1]) + 1
testSparseOneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow(indices, values),
typeutil.CreateSparseFloatRow([]uint32{}, []float32{}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow(indices, values),
}, dim)
})
t.Run("TestSparseFloatVector_negativeValues", func(t *testing.T) {
testSparseOneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{-2.1, 2.2, -2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, -3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{}, []float32{}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{-2.1, 2.2, -2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, -3.2, 3.3}),
}, 600)
})
@ -778,9 +778,9 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
t.Run("TestSparseFloatVector_largeIndex", func(t *testing.T) {
int32Max := uint32(math.MaxInt32)
testSparseOneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{-2.1, 2.2, -2.3}),
testutils.CreateSparseFloatRow([]uint32{100, int32Max / 2, int32Max - 1}, []float32{3.1, -3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{}, []float32{}),
typeutil.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{-2.1, 2.2, -2.3}),
typeutil.CreateSparseFloatRow([]uint32{100, int32Max / 2, int32Max - 1}, []float32{3.1, -3.2, 3.3}),
}, int(int32Max))
})
@ -1041,7 +1041,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 53,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
},
},
})
@ -1050,7 +1050,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
},
},
})
@ -1618,7 +1618,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 53,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
},
},
})

View File

@ -36,6 +36,7 @@ import (
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/mq/msgstream"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func TestCheckTsField(t *testing.T) {
@ -1284,8 +1285,8 @@ func TestMergeInsertData(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
},
},
},
@ -1358,7 +1359,7 @@ func TestMergeInsertData(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
},
@ -1444,9 +1445,9 @@ func TestMergeInsertData(t *testing.T) {
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
}, f.(*SparseFloatVectorFieldData))

View File

@ -17,27 +17,13 @@
package testutils
import (
"encoding/binary"
"math"
"math/rand"
"sort"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func SparseFloatRowSetAt(row []byte, pos int, idx uint32, value float32) {
binary.LittleEndian.PutUint32(row[pos*8:], idx)
binary.LittleEndian.PutUint32(row[pos*8+4:], math.Float32bits(value))
}
func CreateSparseFloatRow(indices []uint32, values []float32) []byte {
row := make([]byte, len(indices)*8)
for i := 0; i < len(indices); i++ {
SparseFloatRowSetAt(row, i, indices[i], values[i])
}
return row
}
func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray {
dim := 700
avgNnz := 20
@ -73,7 +59,7 @@ func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray {
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
rowBytes := CreateSparseFloatRow(indices, values)
rowBytes := typeutil.CreateSparseFloatRow(indices, values)
contents = append(contents, rowBytes)
}

View File

@ -17,8 +17,12 @@
package typeutil
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"math"
"sort"
"strconv"
"unsafe"
@ -1496,6 +1500,85 @@ func SparseFloatRowValueAt(row []byte, idx int) float32 {
return math.Float32frombits(common.Endian.Uint32(row[idx*8+4:]))
}
func SparseFloatRowSetAt(row []byte, pos int, idx uint32, value float32) {
binary.LittleEndian.PutUint32(row[pos*8:], idx)
binary.LittleEndian.PutUint32(row[pos*8+4:], math.Float32bits(value))
}
func CreateSparseFloatRow(indices []uint32, values []float32) []byte {
row := make([]byte, len(indices)*8)
for i := 0; i < len(indices); i++ {
SparseFloatRowSetAt(row, i, indices[i], values[i])
}
return row
}
type sparseFloatVectorJSONRepresentation struct {
Indices []uint32 `json:"indices"`
Values []float32 `json:"values"`
}
// accepted format:
// - {"indices": [1, 2, 3], "values": [0.1, 0.2, 0.3]}
// - {"1": 0.1, "2": 0.2, "3": 0.3}
//
// we don't require the indices to be sorted from user input, but the returned
// byte representation must have indices sorted
func CreateSparseFloatRowFromJSON(input []byte) ([]byte, error) {
var indices []uint32
var values []float32
var vec sparseFloatVectorJSONRepresentation
decoder := json.NewDecoder(bytes.NewReader(input))
decoder.DisallowUnknownFields()
err := decoder.Decode(&vec)
if err == nil {
if len(vec.Indices) != len(vec.Values) {
return nil, fmt.Errorf("indices and values length mismatch")
}
if len(vec.Indices) == 0 {
return nil, fmt.Errorf("empty indices/values in JSON input")
}
indices = vec.Indices
values = vec.Values
} else {
var vec2 map[uint32]float32
decoder = json.NewDecoder(bytes.NewReader(input))
decoder.DisallowUnknownFields()
err = decoder.Decode(&vec2)
if err != nil {
return nil, fmt.Errorf("failed to parse JSON input: %v", err)
}
for idx, val := range vec2 {
indices = append(indices, idx)
values = append(values, val)
}
}
indexOrder := make([]int, len(indices))
for i := range indexOrder {
indexOrder[i] = i
}
sort.Slice(indexOrder, func(i, j int) bool {
return indices[indexOrder[i]] < indices[indexOrder[j]]
})
sortedIndices := make([]uint32, len(indices))
sortedValues := make([]float32, len(values))
for i, index := range indexOrder {
sortedIndices[i] = indices[index]
sortedValues[i] = values[index]
}
row := CreateSparseFloatRow(sortedIndices, sortedValues)
if err := ValidateSparseFloatRows(row); err != nil {
return nil, err
}
return row, nil
}
// dim of a sparse float vector is the maximum/last index + 1
func SparseFloatRowDim(row []byte) int64 {
if len(row) == 0 {

View File

@ -32,7 +32,6 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/testutils"
)
func TestSchema(t *testing.T) {
@ -821,8 +820,8 @@ func TestAppendFieldData(t *testing.T) {
SparseFloatVector := &schemapb.SparseFloatArray{
Dim: 231,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
},
}
@ -917,8 +916,8 @@ func TestDeleteFieldData(t *testing.T) {
SparseFloatVector := &schemapb.SparseFloatArray{
Dim: 231,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
},
}
@ -1363,8 +1362,8 @@ func TestGetDataAndGetDataSize(t *testing.T) {
SparseFloatVector := &schemapb.SparseFloatArray{
Dim: 231,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
},
}
@ -1429,13 +1428,13 @@ func TestGetDataAndGetDataSize(t *testing.T) {
func TestMergeFieldData(t *testing.T) {
sparseFloatRows := [][]byte{
// 3 rows for dst
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{300, 410, 520}, []float32{1.1, 1.2, 1.3}),
CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
CreateSparseFloatRow([]uint32{300, 410, 520}, []float32{1.1, 1.2, 1.3}),
// 3 rows for src
testutils.CreateSparseFloatRow([]uint32{600, 800, 2300}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{90, 141, 352}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{160, 280, 340}, []float32{2.1, 2.2, 2.3}),
CreateSparseFloatRow([]uint32{600, 800, 2300}, []float32{2.1, 2.2, 2.3}),
CreateSparseFloatRow([]uint32{90, 141, 352}, []float32{1.1, 1.2, 1.3}),
CreateSparseFloatRow([]uint32{160, 280, 340}, []float32{2.1, 2.2, 2.3}),
}
t.Run("merge data", func(t *testing.T) {
@ -2032,9 +2031,9 @@ func TestFieldData(t *testing.T) {
func TestValidateSparseFloatRows(t *testing.T) {
t.Run("valid rows", func(t *testing.T) {
rows := [][]byte{
testutils.CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}),
testutils.CreateSparseFloatRow([]uint32{2, 4, 6}, []float32{4.0, 5.0, 6.0}),
testutils.CreateSparseFloatRow([]uint32{0, 7, 8}, []float32{7.0, 8.0, 9.0}),
CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}),
CreateSparseFloatRow([]uint32{2, 4, 6}, []float32{4.0, 5.0, 6.0}),
CreateSparseFloatRow([]uint32{0, 7, 8}, []float32{7.0, 8.0, 9.0}),
}
err := ValidateSparseFloatRows(rows...)
assert.NoError(t, err)
@ -2055,7 +2054,7 @@ func TestValidateSparseFloatRows(t *testing.T) {
t.Run("unordered index", func(t *testing.T) {
rows := [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 2000, 500}, []float32{1.0, 2.0, 3.0}),
CreateSparseFloatRow([]uint32{100, 2000, 500}, []float32{1.0, 2.0, 3.0}),
}
err := ValidateSparseFloatRows(rows...)
assert.Error(t, err)
@ -2063,7 +2062,7 @@ func TestValidateSparseFloatRows(t *testing.T) {
t.Run("same index", func(t *testing.T) {
rows := [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 100, 500}, []float32{1.0, 2.0, 3.0}),
CreateSparseFloatRow([]uint32{100, 100, 500}, []float32{1.0, 2.0, 3.0}),
}
err := ValidateSparseFloatRows(rows...)
assert.Error(t, err)
@ -2071,7 +2070,7 @@ func TestValidateSparseFloatRows(t *testing.T) {
t.Run("negative value", func(t *testing.T) {
rows := [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{-1.0, 2.0, 3.0}),
CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{-1.0, 2.0, 3.0}),
}
err := ValidateSparseFloatRows(rows...)
assert.Error(t, err)
@ -2079,19 +2078,19 @@ func TestValidateSparseFloatRows(t *testing.T) {
t.Run("invalid value", func(t *testing.T) {
rows := [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{float32(math.NaN()), 2.0, 3.0}),
CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{float32(math.NaN()), 2.0, 3.0}),
}
err := ValidateSparseFloatRows(rows...)
assert.Error(t, err)
rows = [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{float32(math.Inf(1)), 2.0, 3.0}),
CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{float32(math.Inf(1)), 2.0, 3.0}),
}
err = ValidateSparseFloatRows(rows...)
assert.Error(t, err)
rows = [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{float32(math.Inf(-1)), 2.0, 3.0}),
CreateSparseFloatRow([]uint32{100, 200, 500}, []float32{float32(math.Inf(-1)), 2.0, 3.0}),
}
err = ValidateSparseFloatRows(rows...)
assert.Error(t, err)
@ -2099,7 +2098,7 @@ func TestValidateSparseFloatRows(t *testing.T) {
t.Run("invalid index", func(t *testing.T) {
rows := [][]byte{
testutils.CreateSparseFloatRow([]uint32{3, 5, math.MaxUint32}, []float32{1.0, 2.0, 3.0}),
CreateSparseFloatRow([]uint32{3, 5, math.MaxUint32}, []float32{1.0, 2.0, 3.0}),
}
err := ValidateSparseFloatRows(rows...)
assert.Error(t, err)
@ -2107,7 +2106,7 @@ func TestValidateSparseFloatRows(t *testing.T) {
t.Run("empty indices or values", func(t *testing.T) {
rows := [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
CreateSparseFloatRow([]uint32{}, []float32{}),
}
err := ValidateSparseFloatRows(rows...)
assert.Error(t, err)
@ -2118,3 +2117,111 @@ func TestValidateSparseFloatRows(t *testing.T) {
assert.NoError(t, err)
})
}
func TestParseJsonSparseFloatRow(t *testing.T) {
t.Run("valid row 1", func(t *testing.T) {
row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0,3.0]}`)
res, err := CreateSparseFloatRowFromJSON(row)
assert.NoError(t, err)
assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res)
})
t.Run("valid row 2", func(t *testing.T) {
row := []byte(`{"indices":[3,1,5],"values":[1.0,2.0,3.0]}`)
res, err := CreateSparseFloatRowFromJSON(row)
assert.NoError(t, err)
assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res)
})
t.Run("invalid row 1", func(t *testing.T) {
row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0,3.0`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid row 2", func(t *testing.T) {
row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0]`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid row 3", func(t *testing.T) {
row := []byte(`{"indices":[1],"values":[1.0,2.0]`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid row 4", func(t *testing.T) {
row := []byte(`{"indices":[],"values":[]`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid row 5", func(t *testing.T) {
row := []byte(`{"indices":[-3],"values":[0.2]`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid row 6", func(t *testing.T) {
row := []byte(`{"indices":[3],"values":[-0.2]`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("valid dict row 1", func(t *testing.T) {
row := []byte(`{"1": 1.0, "3": 2.0, "5": 3.0}`)
res, err := CreateSparseFloatRowFromJSON(row)
assert.NoError(t, err)
assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res)
})
t.Run("valid dict row 2", func(t *testing.T) {
row := []byte(`{"3": 1.0, "1": 2.0, "5": 3.0}`)
res, err := CreateSparseFloatRowFromJSON(row)
assert.NoError(t, err)
assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res)
})
t.Run("invalid dict row 1", func(t *testing.T) {
row := []byte(`{"a": 1.0, "3": 2.0, "5": 3.0}`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid dict row 2", func(t *testing.T) {
row := []byte(`{"1": "a", "3": 2.0, "5": 3.0}`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid dict row 3", func(t *testing.T) {
row := []byte(`{"1": "1.0", "3": 2.0, "5": 3.0}`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid dict row 4", func(t *testing.T) {
row := []byte(`{"1": 1.0, "3": 2.0, "5": }`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid dict row 5", func(t *testing.T) {
row := []byte(`{"-1": 1.0, "3": 2.0, "5": 3.0}`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid dict row 6", func(t *testing.T) {
row := []byte(`{"1": -1.0, "3": 2.0, "5": 3.0}`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
t.Run("invalid dict row 7", func(t *testing.T) {
row := []byte(`{}`)
_, err := CreateSparseFloatRowFromJSON(row)
assert.Error(t, err)
})
}

View File

@ -33,7 +33,6 @@ import (
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/metric"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/typeutil"
"github.com/milvus-io/milvus/tests/integration"
)
@ -213,8 +212,8 @@ func (s *SparseTestSuite) TestSparse_invalid_insert() {
// unsorted column index is not allowed
sparseVecs.Contents[0] = make([]byte, 16)
testutils.SparseFloatRowSetAt(sparseVecs.Contents[0], 0, 20, 0.1)
testutils.SparseFloatRowSetAt(sparseVecs.Contents[0], 1, 10, 0.2)
typeutil.SparseFloatRowSetAt(sparseVecs.Contents[0], 0, 20, 0.1)
typeutil.SparseFloatRowSetAt(sparseVecs.Contents[0], 1, 10, 0.2)
insertResult, err = c.Proxy.Insert(ctx, &milvuspb.InsertRequest{
DbName: dbName,
CollectionName: collectionName,
@ -536,8 +535,8 @@ func (s *SparseTestSuite) TestSparse_invalid_search_request() {
// column index in the same row must be ordered
sparseVecs.Contents[0] = make([]byte, 16)
testutils.SparseFloatRowSetAt(sparseVecs.Contents[0], 0, 20, 0.1)
testutils.SparseFloatRowSetAt(sparseVecs.Contents[0], 1, 10, 0.2)
typeutil.SparseFloatRowSetAt(sparseVecs.Contents[0], 0, 20, 0.1)
typeutil.SparseFloatRowSetAt(sparseVecs.Contents[0], 1, 10, 0.2)
replaceQuery(sparseVecs)
searchResult, err = c.Proxy.Search(ctx, searchReq)
s.NoError(err)