enhance: import supports null in parquet and json formats (#35558)

#31728

---------

Signed-off-by: lixinguo <xinguo.li@zilliz.com>
Co-authored-by: lixinguo <xinguo.li@zilliz.com>
pull/35359/head
smellthemoon 2024-08-20 16:50:55 +08:00 committed by GitHub
parent 2fbc628994
commit 80a7c78f28
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 1705 additions and 405 deletions

View File

@ -181,9 +181,7 @@ func (s *ChannelManagerSuite) TearDownTest() {
}
func (s *ChannelManagerSuite) TestReleaseStuck() {
var (
channel = "by-dev-rootcoord-dml-2"
)
channel := "by-dev-rootcoord-dml-2"
s.manager.releaseFunc = func(channel string) {
time.Sleep(1 * time.Second)
}

View File

@ -213,12 +213,12 @@ func (s *BFWriteBufferSuite) TestBufferData() {
value, err := metrics.DataNodeFlowGraphBufferDataSize.GetMetricWithLabelValues(fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(s.metacacheInt64.Collection()))
s.NoError(err)
s.MetricsEqual(value, 5604)
s.MetricsEqual(value, 5607)
delMsg = s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
err = wb.BufferData([]*msgstream.InsertMsg{}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
s.NoError(err)
s.MetricsEqual(value, 5844)
s.MetricsEqual(value, 5847)
})
s.Run("normal_run_varchar", func() {
@ -240,7 +240,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
value, err := metrics.DataNodeFlowGraphBufferDataSize.GetMetricWithLabelValues(fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(s.metacacheInt64.Collection()))
s.NoError(err)
s.MetricsEqual(value, 7224)
s.MetricsEqual(value, 7227)
})
s.Run("int_pk_type_not_match", func() {

View File

@ -142,7 +142,7 @@ func (s *InsertBufferSuite) TestBuffer() {
memSize := insertBuffer.Buffer(groups[0], &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
s.EqualValues(100, insertBuffer.MinTimestamp())
s.EqualValues(5364, memSize)
s.EqualValues(5367, memSize)
}
func (s *InsertBufferSuite) TestYield() {

View File

@ -188,12 +188,12 @@ func (s *L0WriteBufferSuite) TestBufferData() {
value, err := metrics.DataNodeFlowGraphBufferDataSize.GetMetricWithLabelValues(fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(s.metacache.Collection()))
s.NoError(err)
s.MetricsEqual(value, 5604)
s.MetricsEqual(value, 5607)
delMsg = s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
err = wb.BufferData([]*msgstream.InsertMsg{}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
s.NoError(err)
s.MetricsEqual(value, 5844)
s.MetricsEqual(value, 5847)
})
s.Run("pk_type_not_match", func() {

View File

@ -491,11 +491,11 @@ func (wb *writeBufferBase) prepareInsert(insertMsgs []*msgstream.InsertMsg) ([]*
return nil, merr.WrapErrServiceInternal("timestamp column row num not match")
}
timestamps := tsFieldData.GetRows().([]int64)
timestamps := tsFieldData.GetDataRows().([]int64)
switch wb.pkField.GetDataType() {
case schemapb.DataType_Int64:
pks := pkFieldData.GetRows().([]int64)
pks := pkFieldData.GetDataRows().([]int64)
for idx, pk := range pks {
ts, ok := inData.intPKTs[pk]
if !ok || timestamps[idx] < ts {
@ -503,7 +503,7 @@ func (wb *writeBufferBase) prepareInsert(insertMsgs []*msgstream.InsertMsg) ([]*
}
}
case schemapb.DataType_VarChar:
pks := pkFieldData.GetRows().([]string)
pks := pkFieldData.GetDataRows().([]string)
for idx, pk := range pks {
ts, ok := inData.strPKTs[pk]
if !ok || timestamps[idx] < ts {

View File

@ -246,8 +246,8 @@ func TestInsertCodecFailed(t *testing.T) {
insertCodec := NewInsertCodecWithSchema(schema)
insertDataEmpty := &InsertData{
Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}, nil},
RowIDField: &Int64FieldData{[]int64{}, nil, false},
TimestampField: &Int64FieldData{[]int64{}, nil, false},
},
}
_, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty)
@ -430,16 +430,16 @@ func TestInsertCodec(t *testing.T) {
insertDataEmpty := &InsertData{
Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}, nil},
BoolField: &BoolFieldData{[]bool{}, nil},
Int8Field: &Int8FieldData{[]int8{}, nil},
Int16Field: &Int16FieldData{[]int16{}, nil},
Int32Field: &Int32FieldData{[]int32{}, nil},
Int64Field: &Int64FieldData{[]int64{}, nil},
FloatField: &FloatFieldData{[]float32{}, nil},
DoubleField: &DoubleFieldData{[]float64{}, nil},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil},
RowIDField: &Int64FieldData{[]int64{}, nil, false},
TimestampField: &Int64FieldData{[]int64{}, nil, false},
BoolField: &BoolFieldData{[]bool{}, nil, false},
Int8Field: &Int8FieldData{[]int8{}, nil, false},
Int16Field: &Int16FieldData{[]int16{}, nil, false},
Int32Field: &Int32FieldData{[]int32{}, nil, false},
Int64Field: &Int64FieldData{[]int64{}, nil, false},
FloatField: &FloatFieldData{[]float32{}, nil, false},
DoubleField: &DoubleFieldData{[]float64{}, nil, false},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil, false},
BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8},
FloatVectorField: &FloatVectorFieldData{[]float32{}, 4},
Float16VectorField: &Float16VectorFieldData{[]byte{}, 4},
@ -450,8 +450,8 @@ func TestInsertCodec(t *testing.T) {
Contents: [][]byte{},
},
},
ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}, nil},
JSONField: &JSONFieldData{[][]byte{}, nil},
ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}, nil, false},
JSONField: &JSONFieldData{[][]byte{}, nil, false},
},
}
b, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty)
@ -828,20 +828,20 @@ func TestMemorySize(t *testing.T) {
},
},
}
assert.Equal(t, insertData1.Data[RowIDField].GetMemorySize(), 8)
assert.Equal(t, insertData1.Data[TimestampField].GetMemorySize(), 8)
assert.Equal(t, insertData1.Data[BoolField].GetMemorySize(), 1)
assert.Equal(t, insertData1.Data[Int8Field].GetMemorySize(), 1)
assert.Equal(t, insertData1.Data[Int16Field].GetMemorySize(), 2)
assert.Equal(t, insertData1.Data[Int32Field].GetMemorySize(), 4)
assert.Equal(t, insertData1.Data[Int64Field].GetMemorySize(), 8)
assert.Equal(t, insertData1.Data[FloatField].GetMemorySize(), 4)
assert.Equal(t, insertData1.Data[DoubleField].GetMemorySize(), 8)
assert.Equal(t, insertData1.Data[StringField].GetMemorySize(), 17)
assert.Equal(t, insertData1.Data[RowIDField].GetMemorySize(), 9)
assert.Equal(t, insertData1.Data[TimestampField].GetMemorySize(), 9)
assert.Equal(t, insertData1.Data[BoolField].GetMemorySize(), 2)
assert.Equal(t, insertData1.Data[Int8Field].GetMemorySize(), 2)
assert.Equal(t, insertData1.Data[Int16Field].GetMemorySize(), 3)
assert.Equal(t, insertData1.Data[Int32Field].GetMemorySize(), 5)
assert.Equal(t, insertData1.Data[Int64Field].GetMemorySize(), 9)
assert.Equal(t, insertData1.Data[FloatField].GetMemorySize(), 5)
assert.Equal(t, insertData1.Data[DoubleField].GetMemorySize(), 9)
assert.Equal(t, insertData1.Data[StringField].GetMemorySize(), 18)
assert.Equal(t, insertData1.Data[BinaryVectorField].GetMemorySize(), 5)
assert.Equal(t, insertData1.Data[FloatField].GetMemorySize(), 4)
assert.Equal(t, insertData1.Data[ArrayField].GetMemorySize(), 3*4)
assert.Equal(t, insertData1.Data[JSONField].GetMemorySize(), len([]byte(`{"batch":1}`))+16)
assert.Equal(t, insertData1.Data[FloatField].GetMemorySize(), 5)
assert.Equal(t, insertData1.Data[ArrayField].GetMemorySize(), 3*4+1)
assert.Equal(t, insertData1.Data[JSONField].GetMemorySize(), len([]byte(`{"batch":1}`))+16+1)
insertData2 := &InsertData{
Data: map[int64]FieldData{
@ -886,46 +886,46 @@ func TestMemorySize(t *testing.T) {
},
}
assert.Equal(t, insertData2.Data[RowIDField].GetMemorySize(), 16)
assert.Equal(t, insertData2.Data[TimestampField].GetMemorySize(), 16)
assert.Equal(t, insertData2.Data[BoolField].GetMemorySize(), 2)
assert.Equal(t, insertData2.Data[Int8Field].GetMemorySize(), 2)
assert.Equal(t, insertData2.Data[Int16Field].GetMemorySize(), 4)
assert.Equal(t, insertData2.Data[Int32Field].GetMemorySize(), 8)
assert.Equal(t, insertData2.Data[Int64Field].GetMemorySize(), 16)
assert.Equal(t, insertData2.Data[FloatField].GetMemorySize(), 8)
assert.Equal(t, insertData2.Data[DoubleField].GetMemorySize(), 16)
assert.Equal(t, insertData2.Data[StringField].GetMemorySize(), 35)
assert.Equal(t, insertData2.Data[RowIDField].GetMemorySize(), 17)
assert.Equal(t, insertData2.Data[TimestampField].GetMemorySize(), 17)
assert.Equal(t, insertData2.Data[BoolField].GetMemorySize(), 3)
assert.Equal(t, insertData2.Data[Int8Field].GetMemorySize(), 3)
assert.Equal(t, insertData2.Data[Int16Field].GetMemorySize(), 5)
assert.Equal(t, insertData2.Data[Int32Field].GetMemorySize(), 9)
assert.Equal(t, insertData2.Data[Int64Field].GetMemorySize(), 17)
assert.Equal(t, insertData2.Data[FloatField].GetMemorySize(), 9)
assert.Equal(t, insertData2.Data[DoubleField].GetMemorySize(), 17)
assert.Equal(t, insertData2.Data[StringField].GetMemorySize(), 36)
assert.Equal(t, insertData2.Data[BinaryVectorField].GetMemorySize(), 6)
assert.Equal(t, insertData2.Data[FloatField].GetMemorySize(), 8)
assert.Equal(t, insertData2.Data[FloatField].GetMemorySize(), 9)
insertDataEmpty := &InsertData{
Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}, nil},
BoolField: &BoolFieldData{[]bool{}, nil},
Int8Field: &Int8FieldData{[]int8{}, nil},
Int16Field: &Int16FieldData{[]int16{}, nil},
Int32Field: &Int32FieldData{[]int32{}, nil},
Int64Field: &Int64FieldData{[]int64{}, nil},
FloatField: &FloatFieldData{[]float32{}, nil},
DoubleField: &DoubleFieldData{[]float64{}, nil},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil},
RowIDField: &Int64FieldData{[]int64{}, nil, false},
TimestampField: &Int64FieldData{[]int64{}, nil, false},
BoolField: &BoolFieldData{[]bool{}, nil, false},
Int8Field: &Int8FieldData{[]int8{}, nil, false},
Int16Field: &Int16FieldData{[]int16{}, nil, false},
Int32Field: &Int32FieldData{[]int32{}, nil, false},
Int64Field: &Int64FieldData{[]int64{}, nil, false},
FloatField: &FloatFieldData{[]float32{}, nil, false},
DoubleField: &DoubleFieldData{[]float64{}, nil, false},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil, false},
BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8},
FloatVectorField: &FloatVectorFieldData{[]float32{}, 4},
},
}
assert.Equal(t, insertDataEmpty.Data[RowIDField].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[TimestampField].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[BoolField].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[Int8Field].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[Int16Field].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[Int32Field].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[Int64Field].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[FloatField].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[DoubleField].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[StringField].GetMemorySize(), 0)
assert.Equal(t, insertDataEmpty.Data[RowIDField].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[TimestampField].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[BoolField].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[Int8Field].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[Int16Field].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[Int32Field].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[Int64Field].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[FloatField].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[DoubleField].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[StringField].GetMemorySize(), 1)
assert.Equal(t, insertDataEmpty.Data[BinaryVectorField].GetMemorySize(), 4)
assert.Equal(t, insertDataEmpty.Data[FloatVectorField].GetMemorySize(), 4)
}
@ -979,21 +979,21 @@ func TestAddFieldDataToPayload(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e, _ := w.NextInsertEventWriter()
var err error
err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Array, &ArrayFieldData{
ElementType: schemapb.DataType_VarChar,
@ -1004,7 +1004,7 @@ func TestAddFieldDataToPayload(t *testing.T) {
}},
})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}, nil})
err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}, nil, false})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_BinaryVector, &BinaryVectorFieldData{[]byte{}, 8})
assert.Error(t, err)

File diff suppressed because it is too large Load Diff

View File

@ -114,15 +114,15 @@ func (s *InsertDataSuite) TestInsertData() {
s.Run("init by New", func() {
s.True(s.iDataEmpty.IsEmpty())
s.Equal(0, s.iDataEmpty.GetRowNum())
s.Equal(16, s.iDataEmpty.GetMemorySize())
s.Equal(28, s.iDataEmpty.GetMemorySize())
s.False(s.iDataOneRow.IsEmpty())
s.Equal(1, s.iDataOneRow.GetRowNum())
s.Equal(179, s.iDataOneRow.GetMemorySize())
s.Equal(191, s.iDataOneRow.GetMemorySize())
s.False(s.iDataTwoRows.IsEmpty())
s.Equal(2, s.iDataTwoRows.GetRowNum())
s.Equal(340, s.iDataTwoRows.GetMemorySize())
s.Equal(352, s.iDataTwoRows.GetMemorySize())
for _, field := range s.iDataTwoRows.Data {
s.Equal(2, field.RowNum())
@ -135,52 +135,52 @@ func (s *InsertDataSuite) TestInsertData() {
}
func (s *InsertDataSuite) TestMemorySize() {
s.Equal(s.iDataEmpty.Data[RowIDField].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[TimestampField].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[BoolField].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[Int8Field].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[Int16Field].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[Int32Field].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[Int64Field].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[FloatField].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[DoubleField].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[StringField].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[ArrayField].GetMemorySize(), 0)
s.Equal(s.iDataEmpty.Data[RowIDField].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[TimestampField].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[BoolField].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[Int8Field].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[Int16Field].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[Int32Field].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[Int64Field].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[FloatField].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[DoubleField].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[StringField].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[ArrayField].GetMemorySize(), 1)
s.Equal(s.iDataEmpty.Data[BinaryVectorField].GetMemorySize(), 4)
s.Equal(s.iDataEmpty.Data[FloatVectorField].GetMemorySize(), 4)
s.Equal(s.iDataEmpty.Data[Float16VectorField].GetMemorySize(), 4)
s.Equal(s.iDataEmpty.Data[BFloat16VectorField].GetMemorySize(), 4)
s.Equal(s.iDataEmpty.Data[SparseFloatVectorField].GetMemorySize(), 0)
s.Equal(s.iDataOneRow.Data[RowIDField].GetMemorySize(), 8)
s.Equal(s.iDataOneRow.Data[TimestampField].GetMemorySize(), 8)
s.Equal(s.iDataOneRow.Data[BoolField].GetMemorySize(), 1)
s.Equal(s.iDataOneRow.Data[Int8Field].GetMemorySize(), 1)
s.Equal(s.iDataOneRow.Data[Int16Field].GetMemorySize(), 2)
s.Equal(s.iDataOneRow.Data[Int32Field].GetMemorySize(), 4)
s.Equal(s.iDataOneRow.Data[Int64Field].GetMemorySize(), 8)
s.Equal(s.iDataOneRow.Data[FloatField].GetMemorySize(), 4)
s.Equal(s.iDataOneRow.Data[DoubleField].GetMemorySize(), 8)
s.Equal(s.iDataOneRow.Data[StringField].GetMemorySize(), 19)
s.Equal(s.iDataOneRow.Data[JSONField].GetMemorySize(), len([]byte(`{"batch":1}`))+16)
s.Equal(s.iDataOneRow.Data[ArrayField].GetMemorySize(), 3*4)
s.Equal(s.iDataOneRow.Data[RowIDField].GetMemorySize(), 9)
s.Equal(s.iDataOneRow.Data[TimestampField].GetMemorySize(), 9)
s.Equal(s.iDataOneRow.Data[BoolField].GetMemorySize(), 2)
s.Equal(s.iDataOneRow.Data[Int8Field].GetMemorySize(), 2)
s.Equal(s.iDataOneRow.Data[Int16Field].GetMemorySize(), 3)
s.Equal(s.iDataOneRow.Data[Int32Field].GetMemorySize(), 5)
s.Equal(s.iDataOneRow.Data[Int64Field].GetMemorySize(), 9)
s.Equal(s.iDataOneRow.Data[FloatField].GetMemorySize(), 5)
s.Equal(s.iDataOneRow.Data[DoubleField].GetMemorySize(), 9)
s.Equal(s.iDataOneRow.Data[StringField].GetMemorySize(), 20)
s.Equal(s.iDataOneRow.Data[JSONField].GetMemorySize(), len([]byte(`{"batch":1}`))+16+1)
s.Equal(s.iDataOneRow.Data[ArrayField].GetMemorySize(), 3*4+1)
s.Equal(s.iDataOneRow.Data[BinaryVectorField].GetMemorySize(), 5)
s.Equal(s.iDataOneRow.Data[FloatVectorField].GetMemorySize(), 20)
s.Equal(s.iDataOneRow.Data[Float16VectorField].GetMemorySize(), 12)
s.Equal(s.iDataOneRow.Data[BFloat16VectorField].GetMemorySize(), 12)
s.Equal(s.iDataOneRow.Data[SparseFloatVectorField].GetMemorySize(), 28)
s.Equal(s.iDataTwoRows.Data[RowIDField].GetMemorySize(), 16)
s.Equal(s.iDataTwoRows.Data[TimestampField].GetMemorySize(), 16)
s.Equal(s.iDataTwoRows.Data[BoolField].GetMemorySize(), 2)
s.Equal(s.iDataTwoRows.Data[Int8Field].GetMemorySize(), 2)
s.Equal(s.iDataTwoRows.Data[Int16Field].GetMemorySize(), 4)
s.Equal(s.iDataTwoRows.Data[Int32Field].GetMemorySize(), 8)
s.Equal(s.iDataTwoRows.Data[Int64Field].GetMemorySize(), 16)
s.Equal(s.iDataTwoRows.Data[FloatField].GetMemorySize(), 8)
s.Equal(s.iDataTwoRows.Data[DoubleField].GetMemorySize(), 16)
s.Equal(s.iDataTwoRows.Data[StringField].GetMemorySize(), 38)
s.Equal(s.iDataTwoRows.Data[ArrayField].GetMemorySize(), 24)
s.Equal(s.iDataTwoRows.Data[RowIDField].GetMemorySize(), 17)
s.Equal(s.iDataTwoRows.Data[TimestampField].GetMemorySize(), 17)
s.Equal(s.iDataTwoRows.Data[BoolField].GetMemorySize(), 3)
s.Equal(s.iDataTwoRows.Data[Int8Field].GetMemorySize(), 3)
s.Equal(s.iDataTwoRows.Data[Int16Field].GetMemorySize(), 5)
s.Equal(s.iDataTwoRows.Data[Int32Field].GetMemorySize(), 9)
s.Equal(s.iDataTwoRows.Data[Int64Field].GetMemorySize(), 17)
s.Equal(s.iDataTwoRows.Data[FloatField].GetMemorySize(), 9)
s.Equal(s.iDataTwoRows.Data[DoubleField].GetMemorySize(), 17)
s.Equal(s.iDataTwoRows.Data[StringField].GetMemorySize(), 39)
s.Equal(s.iDataTwoRows.Data[ArrayField].GetMemorySize(), 25)
s.Equal(s.iDataTwoRows.Data[BinaryVectorField].GetMemorySize(), 6)
s.Equal(s.iDataTwoRows.Data[FloatVectorField].GetMemorySize(), 36)
s.Equal(s.iDataTwoRows.Data[Float16VectorField].GetMemorySize(), 20)
@ -230,7 +230,7 @@ func (s *InsertDataSuite) SetupTest() {
s.Require().NoError(err)
s.True(s.iDataEmpty.IsEmpty())
s.Equal(0, s.iDataEmpty.GetRowNum())
s.Equal(16, s.iDataEmpty.GetMemorySize())
s.Equal(28, s.iDataEmpty.GetMemorySize())
row1 := map[FieldID]interface{}{
RowIDField: int64(3),
@ -343,7 +343,7 @@ func (s *ArrayFieldDataSuite) TestArrayFieldData() {
s.NoError(err)
s.Equal(0, insertData.GetRowNum())
s.Equal(0, insertData.GetMemorySize())
s.Equal(11, insertData.GetMemorySize())
s.True(insertData.IsEmpty())
fieldIDToData := map[int64]interface{}{
@ -395,7 +395,7 @@ func (s *ArrayFieldDataSuite) TestArrayFieldData() {
err = insertData.Append(fieldIDToData)
s.NoError(err)
s.Equal(1, insertData.GetRowNum())
s.Equal(114, insertData.GetMemorySize())
s.Equal(126, insertData.GetMemorySize())
s.False(insertData.IsEmpty())
s.Equal(114, insertData.GetRowSize(0))
s.Equal(115, insertData.GetRowSize(0))
}

View File

@ -48,8 +48,9 @@ func (r *fieldReader) Next() (storage.FieldData, error) {
if err != nil {
return nil, err
}
// need append nulls
for _, rows := range rowsSet {
err = fieldData.AppendRows(rows)
err = fieldData.AppendRows(rows, nil)
if err != nil {
return nil, err
}

View File

@ -26,7 +26,6 @@ import (
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"golang.org/x/exp/slices"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
@ -35,7 +34,6 @@ import (
"github.com/milvus-io/milvus/internal/util/testutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type ReaderSuite struct {
@ -57,7 +55,7 @@ func (suite *ReaderSuite) SetupTest() {
suite.vecDataType = schemapb.DataType_FloatVector
}
func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.DataType) {
func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.DataType, nullable bool) {
schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
@ -98,6 +96,7 @@ func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.Data
Value: "128",
},
},
Nullable: nullable,
},
},
}
@ -129,15 +128,10 @@ func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.Data
expectInsertData := insertData
for fieldID, data := range actualInsertData.Data {
suite.Equal(expectRows, data.RowNum())
fieldDataType := typeutil.GetField(schema, fieldID).GetDataType()
for i := 0; i < expectRows; i++ {
expect := expectInsertData.Data[fieldID].GetRow(i + offsetBegin)
actual := data.GetRow(i)
if fieldDataType == schemapb.DataType_Array {
suite.True(slices.Equal(expect.(*schemapb.ScalarField).GetIntData().GetData(), actual.(*schemapb.ScalarField).GetIntData().GetData()))
} else {
suite.Equal(expect, actual)
}
suite.Equal(expect, actual)
}
}
}
@ -148,43 +142,63 @@ func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.Data
}
func (suite *ReaderSuite) TestReadScalarFields() {
suite.run(schemapb.DataType_Bool, schemapb.DataType_None)
suite.run(schemapb.DataType_Int8, schemapb.DataType_None)
suite.run(schemapb.DataType_Int16, schemapb.DataType_None)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None)
suite.run(schemapb.DataType_Int64, schemapb.DataType_None)
suite.run(schemapb.DataType_Float, schemapb.DataType_None)
suite.run(schemapb.DataType_Double, schemapb.DataType_None)
suite.run(schemapb.DataType_String, schemapb.DataType_None)
suite.run(schemapb.DataType_VarChar, schemapb.DataType_None)
suite.run(schemapb.DataType_JSON, schemapb.DataType_None)
suite.run(schemapb.DataType_Bool, schemapb.DataType_None, false)
suite.run(schemapb.DataType_Int8, schemapb.DataType_None, false)
suite.run(schemapb.DataType_Int16, schemapb.DataType_None, false)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
suite.run(schemapb.DataType_Int64, schemapb.DataType_None, false)
suite.run(schemapb.DataType_Float, schemapb.DataType_None, false)
suite.run(schemapb.DataType_Double, schemapb.DataType_None, false)
suite.run(schemapb.DataType_String, schemapb.DataType_None, false)
suite.run(schemapb.DataType_VarChar, schemapb.DataType_None, false)
suite.run(schemapb.DataType_JSON, schemapb.DataType_None, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_Bool)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int8)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int16)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int32)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int64)
suite.run(schemapb.DataType_Array, schemapb.DataType_Float)
suite.run(schemapb.DataType_Array, schemapb.DataType_Double)
suite.run(schemapb.DataType_Array, schemapb.DataType_String)
suite.run(schemapb.DataType_Array, schemapb.DataType_Bool, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int8, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int16, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int32, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int64, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_Float, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_Double, false)
suite.run(schemapb.DataType_Array, schemapb.DataType_String, false)
suite.run(schemapb.DataType_Bool, schemapb.DataType_None, true)
suite.run(schemapb.DataType_Int8, schemapb.DataType_None, true)
suite.run(schemapb.DataType_Int16, schemapb.DataType_None, true)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, true)
suite.run(schemapb.DataType_Int64, schemapb.DataType_None, true)
suite.run(schemapb.DataType_Float, schemapb.DataType_None, true)
suite.run(schemapb.DataType_Double, schemapb.DataType_None, true)
suite.run(schemapb.DataType_String, schemapb.DataType_None, true)
suite.run(schemapb.DataType_VarChar, schemapb.DataType_None, true)
suite.run(schemapb.DataType_JSON, schemapb.DataType_None, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_Bool, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int8, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int16, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int32, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_Int64, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_Float, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_Double, true)
suite.run(schemapb.DataType_Array, schemapb.DataType_String, true)
}
func (suite *ReaderSuite) TestStringPK() {
suite.pkDataType = schemapb.DataType_VarChar
suite.run(schemapb.DataType_Int32, schemapb.DataType_None)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
}
func (suite *ReaderSuite) TestVector() {
suite.vecDataType = schemapb.DataType_BinaryVector
suite.run(schemapb.DataType_Int32, schemapb.DataType_None)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
suite.vecDataType = schemapb.DataType_FloatVector
suite.run(schemapb.DataType_Int32, schemapb.DataType_None)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
suite.vecDataType = schemapb.DataType_Float16Vector
suite.run(schemapb.DataType_Int32, schemapb.DataType_None)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
suite.vecDataType = schemapb.DataType_BFloat16Vector
suite.run(schemapb.DataType_Int32, schemapb.DataType_None)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
suite.vecDataType = schemapb.DataType_SparseFloatVector
suite.run(schemapb.DataType_Int32, schemapb.DataType_None)
suite.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
}
func TestUtil(t *testing.T) {

View File

@ -199,6 +199,9 @@ func (r *rowParser) combineDynamicRow(dynamicValues map[string]any, row Row) err
}
func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
if r.id2Field[fieldID].GetNullable() {
return r.parseNullableEntity(fieldID, obj)
}
switch r.id2Field[fieldID].GetDataType() {
case schemapb.DataType_Bool:
b, ok := obj.(bool)
@ -418,6 +421,147 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
}
}
func (r *rowParser) parseNullableEntity(fieldID int64, obj any) (any, error) {
switch r.id2Field[fieldID].GetDataType() {
case schemapb.DataType_Bool:
if obj == nil {
return nil, nil
}
value, ok := obj.(bool)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
return value, nil
case schemapb.DataType_Int8:
if obj == nil {
return nil, nil
}
value, ok := obj.(json.Number)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
num, err := strconv.ParseInt(value.String(), 0, 8)
if err != nil {
return nil, err
}
return int8(num), nil
case schemapb.DataType_Int16:
if obj == nil {
return nil, nil
}
value, ok := obj.(json.Number)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
num, err := strconv.ParseInt(value.String(), 0, 16)
if err != nil {
return nil, err
}
return int16(num), nil
case schemapb.DataType_Int32:
if obj == nil {
return nil, nil
}
value, ok := obj.(json.Number)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
num, err := strconv.ParseInt(value.String(), 0, 32)
if err != nil {
return nil, err
}
return int32(num), nil
case schemapb.DataType_Int64:
if obj == nil {
return nil, nil
}
value, ok := obj.(json.Number)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
num, err := strconv.ParseInt(value.String(), 0, 64)
if err != nil {
return nil, err
}
return num, nil
case schemapb.DataType_Float:
if obj == nil {
return nil, nil
}
value, ok := obj.(json.Number)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
num, err := strconv.ParseFloat(value.String(), 32)
if err != nil {
return nil, err
}
return float32(num), nil
case schemapb.DataType_Double:
if obj == nil {
return nil, nil
}
value, ok := obj.(json.Number)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
num, err := strconv.ParseFloat(value.String(), 64)
if err != nil {
return nil, err
}
return num, nil
case schemapb.DataType_BinaryVector, schemapb.DataType_FloatVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector:
return nil, merr.WrapErrParameterInvalidMsg("not support nullable in vector")
case schemapb.DataType_String, schemapb.DataType_VarChar:
if obj == nil {
return nil, nil
}
value, ok := obj.(string)
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
return value, nil
case schemapb.DataType_JSON:
if obj == nil {
return nil, nil
}
// for JSON data, we accept two kinds input: string and map[string]interface
// user can write JSON content as {"FieldJSON": "{\"x\": 8}"} or {"FieldJSON": {"x": 8}}
if value, ok := obj.(string); ok {
var dummy interface{}
err := json.Unmarshal([]byte(value), &dummy)
if err != nil {
return nil, err
}
return []byte(value), nil
} else if mp, ok := obj.(map[string]interface{}); ok {
bs, err := json.Marshal(mp)
if err != nil {
return nil, err
}
return bs, nil
} else {
return nil, r.wrapTypeError(obj, fieldID)
}
case schemapb.DataType_Array:
if obj == nil {
return nil, nil
}
arr, ok := obj.([]interface{})
if !ok {
return nil, r.wrapTypeError(obj, fieldID)
}
scalarFieldData, err := r.arrayToFieldData(arr, r.id2Field[fieldID].GetElementType())
if err != nil {
return nil, err
}
return scalarFieldData, nil
default:
return nil, merr.WrapErrImportFailed(fmt.Sprintf("parse json failed, unsupport data type: %s",
r.id2Field[fieldID].GetDataType().String()))
}
}
func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataType) (*schemapb.ScalarField, error) {
switch eleType {
case schemapb.DataType_Bool:

View File

@ -98,6 +98,7 @@ func TestRowParser_Parse_Valid(t *testing.T) {
var mp map[string]interface{}
desc := json.NewDecoder(strings.NewReader(c.name))
desc.UseNumber()
err = desc.Decode(&mp)
assert.NoError(t, err)

View File

@ -89,7 +89,7 @@ func (r *reader) Read() (*storage.InsertData, error) {
if data == nil {
return nil, io.EOF
}
err = insertData.Data[fieldID].AppendRows(data)
err = insertData.Data[fieldID].AppendRows(data, nil)
if err != nil {
return nil, err
}

View File

@ -142,7 +142,7 @@ func (suite *ReaderSuite) run(dt schemapb.DataType) {
}
data = jsonStrs
case schemapb.DataType_BinaryVector:
rows := fieldData.GetRows().([]byte)
rows := fieldData.GetDataRows().([]byte)
const rowBytes = dim / 8
chunked := lo.Chunk(rows, rowBytes)
chunkedRows := make([][rowBytes]byte, len(chunked))
@ -151,7 +151,7 @@ func (suite *ReaderSuite) run(dt schemapb.DataType) {
}
data = chunkedRows
case schemapb.DataType_FloatVector:
rows := fieldData.GetRows().([]float32)
rows := fieldData.GetDataRows().([]float32)
chunked := lo.Chunk(rows, dim)
chunkedRows := make([][dim]float32, len(chunked))
for i, innerSlice := range chunked {
@ -159,7 +159,7 @@ func (suite *ReaderSuite) run(dt schemapb.DataType) {
}
data = chunkedRows
case schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector:
rows := fieldData.GetRows().([]byte)
rows := fieldData.GetDataRows().([]byte)
const rowBytes = dim * 2
chunked := lo.Chunk(rows, rowBytes)
chunkedRows := make([][rowBytes]byte, len(chunked))
@ -168,7 +168,7 @@ func (suite *ReaderSuite) run(dt schemapb.DataType) {
}
data = chunkedRows
default:
data = fieldData.GetRows()
data = fieldData.GetDataRows()
}
reader, err := CreateReader(data)
@ -276,7 +276,7 @@ func (suite *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) {
}
data = jsonStrs
case schemapb.DataType_BinaryVector:
rows := fieldData.GetRows().([]byte)
rows := fieldData.GetDataRows().([]byte)
const rowBytes = dim / 8
chunked := lo.Chunk(rows, rowBytes)
chunkedRows := make([][rowBytes]byte, len(chunked))
@ -285,7 +285,7 @@ func (suite *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) {
}
data = chunkedRows
case schemapb.DataType_FloatVector:
rows := fieldData.GetRows().([]float32)
rows := fieldData.GetDataRows().([]float32)
chunked := lo.Chunk(rows, dim)
chunkedRows := make([][dim]float32, len(chunked))
for i, innerSlice := range chunked {
@ -293,7 +293,7 @@ func (suite *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) {
}
data = chunkedRows
case schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector:
rows := fieldData.GetRows().([]byte)
rows := fieldData.GetDataRows().([]byte)
const rowBytes = dim * 2
chunked := lo.Chunk(rows, rowBytes)
chunkedRows := make([][rowBytes]byte, len(chunked))
@ -302,7 +302,7 @@ func (suite *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) {
}
data = chunkedRows
default:
data = fieldData.GetRows()
data = fieldData.GetDataRows()
}
reader, err := CreateReader(data)

View File

@ -66,58 +66,121 @@ func NewFieldReader(ctx context.Context, reader *pqarrow.FileReader, columnIndex
return cr, nil
}
func (c *FieldReader) Next(count int64) (any, error) {
func (c *FieldReader) Next(count int64) (any, any, error) {
switch c.field.GetDataType() {
case schemapb.DataType_Bool:
return ReadBoolData(c, count)
if c.field.GetNullable() {
return ReadNullableBoolData(c, count)
}
data, err := ReadBoolData(c, count)
return data, nil, err
case schemapb.DataType_Int8:
return ReadIntegerOrFloatData[int8](c, count)
if c.field.GetNullable() {
return ReadNullableIntegerOrFloatData[int8](c, count)
}
data, err := ReadIntegerOrFloatData[int8](c, count)
return data, nil, err
case schemapb.DataType_Int16:
return ReadIntegerOrFloatData[int16](c, count)
if c.field.GetNullable() {
return ReadNullableIntegerOrFloatData[int16](c, count)
}
data, err := ReadIntegerOrFloatData[int16](c, count)
return data, nil, err
case schemapb.DataType_Int32:
return ReadIntegerOrFloatData[int32](c, count)
if c.field.GetNullable() {
return ReadNullableIntegerOrFloatData[int32](c, count)
}
data, err := ReadIntegerOrFloatData[int32](c, count)
return data, nil, err
case schemapb.DataType_Int64:
return ReadIntegerOrFloatData[int64](c, count)
if c.field.GetNullable() {
return ReadNullableIntegerOrFloatData[int64](c, count)
}
data, err := ReadIntegerOrFloatData[int64](c, count)
return data, nil, err
case schemapb.DataType_Float:
if c.field.GetNullable() {
data, validData, err := ReadNullableIntegerOrFloatData[float32](c, count)
if err != nil {
return nil, nil, err
}
if data == nil {
return nil, nil, nil
}
return data, validData, typeutil.VerifyFloats32(data.([]float32))
}
data, err := ReadIntegerOrFloatData[float32](c, count)
if err != nil {
return nil, err
return nil, nil, err
}
if data == nil {
return nil, nil
return nil, nil, nil
}
return data, typeutil.VerifyFloats32(data.([]float32))
return data, nil, typeutil.VerifyFloats32(data.([]float32))
case schemapb.DataType_Double:
if c.field.GetNullable() {
data, validData, err := ReadNullableIntegerOrFloatData[float64](c, count)
if err != nil {
return nil, nil, err
}
if data == nil {
return nil, nil, nil
}
return data, validData, typeutil.VerifyFloats64(data.([]float64))
}
data, err := ReadIntegerOrFloatData[float64](c, count)
if err != nil {
return nil, err
return nil, nil, err
}
if data == nil {
return nil, nil
return nil, nil, nil
}
return data, typeutil.VerifyFloats64(data.([]float64))
return data, nil, typeutil.VerifyFloats64(data.([]float64))
case schemapb.DataType_VarChar, schemapb.DataType_String:
return ReadVarcharData(c, count)
if c.field.GetNullable() {
return ReadNullableVarcharData(c, count)
}
data, err := ReadVarcharData(c, count)
return data, nil, err
case schemapb.DataType_JSON:
return ReadJSONData(c, count)
if c.field.GetNullable() {
return ReadNullableJSONData(c, count)
}
data, err := ReadJSONData(c, count)
return data, nil, err
case schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector:
return ReadBinaryData(c, count)
if c.field.GetNullable() {
return nil, nil, merr.WrapErrParameterInvalidMsg("not support nullable in vector")
}
data, err := ReadBinaryData(c, count)
return data, nil, err
case schemapb.DataType_FloatVector:
if c.field.GetNullable() {
return nil, nil, merr.WrapErrParameterInvalidMsg("not support nullable in vector")
}
arrayData, err := ReadIntegerOrFloatArrayData[float32](c, count)
if err != nil {
return nil, err
return nil, nil, err
}
if arrayData == nil {
return nil, nil
return nil, nil, nil
}
vectors := lo.Flatten(arrayData.([][]float32))
return vectors, nil
return vectors, nil, nil
case schemapb.DataType_SparseFloatVector:
return ReadSparseFloatVectorData(c, count)
if c.field.GetNullable() {
return nil, nil, merr.WrapErrParameterInvalidMsg("not support nullable in vector")
}
data, err := ReadSparseFloatVectorData(c, count)
return data, nil, err
case schemapb.DataType_Array:
return ReadArrayData(c, count)
if c.field.GetNullable() {
return ReadNullableArrayData(c, count)
}
data, err := ReadArrayData(c, count)
return data, nil, err
default:
return nil, merr.WrapErrImportFailed(fmt.Sprintf("unsupported data type '%s' for field '%s'",
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("unsupported data type '%s' for field '%s'",
c.field.GetDataType().String(), c.field.GetName()))
}
}
@ -133,6 +196,9 @@ func ReadBoolData(pcr *FieldReader, count int64) (any, error) {
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
boolReader, ok := chunk.(*array.Boolean)
if boolReader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
if !ok {
return nil, WrapTypeErr("bool", chunk.DataType().Name(), pcr.field)
}
@ -146,6 +212,34 @@ func ReadBoolData(pcr *FieldReader, count int64) (any, error) {
return data, nil
}
func ReadNullableBoolData(pcr *FieldReader, count int64) (any, []bool, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
return nil, nil, err
}
data := make([]bool, 0, count)
validData := make([]bool, 0, count)
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
boolReader, ok := chunk.(*array.Boolean)
if !ok {
return nil, nil, WrapTypeErr("bool", chunk.DataType().Name(), pcr.field)
}
validData = append(validData, bytesToBoolArray(dataNums, boolReader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
data = append(data, boolReader.Value(i))
}
}
if len(data) == 0 {
return nil, nil, nil
}
if len(data) != len(validData) {
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
}
return data, validData, nil
}
func ReadIntegerOrFloatData[T constraints.Integer | constraints.Float](pcr *FieldReader, count int64) (any, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
@ -157,31 +251,49 @@ func ReadIntegerOrFloatData[T constraints.Integer | constraints.Float](pcr *Fiel
switch chunk.DataType().ID() {
case arrow.INT8:
int8Reader := chunk.(*array.Int8)
if int8Reader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
for i := 0; i < dataNums; i++ {
data = append(data, T(int8Reader.Value(i)))
}
case arrow.INT16:
int16Reader := chunk.(*array.Int16)
if int16Reader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
for i := 0; i < dataNums; i++ {
data = append(data, T(int16Reader.Value(i)))
}
case arrow.INT32:
int32Reader := chunk.(*array.Int32)
if int32Reader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
for i := 0; i < dataNums; i++ {
data = append(data, T(int32Reader.Value(i)))
}
case arrow.INT64:
int64Reader := chunk.(*array.Int64)
if int64Reader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
for i := 0; i < dataNums; i++ {
data = append(data, T(int64Reader.Value(i)))
}
case arrow.FLOAT32:
float32Reader := chunk.(*array.Float32)
if float32Reader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
for i := 0; i < dataNums; i++ {
data = append(data, T(float32Reader.Value(i)))
}
case arrow.FLOAT64:
float64Reader := chunk.(*array.Float64)
if float64Reader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
for i := 0; i < dataNums; i++ {
data = append(data, T(float64Reader.Value(i)))
}
@ -195,6 +307,65 @@ func ReadIntegerOrFloatData[T constraints.Integer | constraints.Float](pcr *Fiel
return data, nil
}
func ReadNullableIntegerOrFloatData[T constraints.Integer | constraints.Float](pcr *FieldReader, count int64) (any, []bool, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
return nil, nil, err
}
data := make([]T, 0, count)
validData := make([]bool, 0, count)
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
switch chunk.DataType().ID() {
case arrow.INT8:
int8Reader := chunk.(*array.Int8)
validData = append(validData, bytesToBoolArray(dataNums, int8Reader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
data = append(data, T(int8Reader.Value(i)))
}
case arrow.INT16:
int16Reader := chunk.(*array.Int16)
validData = append(validData, bytesToBoolArray(dataNums, int16Reader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
data = append(data, T(int16Reader.Value(i)))
}
case arrow.INT32:
int32Reader := chunk.(*array.Int32)
validData = append(validData, bytesToBoolArray(dataNums, int32Reader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
data = append(data, T(int32Reader.Value(i)))
}
case arrow.INT64:
int64Reader := chunk.(*array.Int64)
validData = append(validData, bytesToBoolArray(dataNums, int64Reader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
data = append(data, T(int64Reader.Value(i)))
}
case arrow.FLOAT32:
float32Reader := chunk.(*array.Float32)
validData = append(validData, bytesToBoolArray(dataNums, float32Reader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
data = append(data, T(float32Reader.Value(i)))
}
case arrow.FLOAT64:
float64Reader := chunk.(*array.Float64)
validData = append(validData, bytesToBoolArray(dataNums, float64Reader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
data = append(data, T(float64Reader.Value(i)))
}
default:
return nil, nil, WrapTypeErr("integer|float", chunk.DataType().Name(), pcr.field)
}
}
if len(data) == 0 {
return nil, nil, nil
}
if len(data) != len(validData) {
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
}
return data, validData, nil
}
func ReadStringData(pcr *FieldReader, count int64) (any, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
@ -204,6 +375,9 @@ func ReadStringData(pcr *FieldReader, count int64) (any, error) {
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
stringReader, ok := chunk.(*array.String)
if stringReader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
if !ok {
return nil, WrapTypeErr("string", chunk.DataType().Name(), pcr.field)
}
@ -217,6 +391,37 @@ func ReadStringData(pcr *FieldReader, count int64) (any, error) {
return data, nil
}
func ReadNullableStringData(pcr *FieldReader, count int64) (any, []bool, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
return nil, nil, err
}
data := make([]string, 0, count)
validData := make([]bool, 0, count)
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
stringReader, ok := chunk.(*array.String)
if !ok {
return nil, nil, WrapTypeErr("string", chunk.DataType().Name(), pcr.field)
}
validData = append(validData, bytesToBoolArray(dataNums, stringReader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
if stringReader.IsNull(i) {
data = append(data, "")
continue
}
data = append(data, stringReader.ValueStr(i))
}
}
if len(data) == 0 {
return nil, nil, nil
}
if len(data) != len(validData) {
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
}
return data, validData, nil
}
func ReadVarcharData(pcr *FieldReader, count int64) (any, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
@ -230,6 +435,9 @@ func ReadVarcharData(pcr *FieldReader, count int64) (any, error) {
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
stringReader, ok := chunk.(*array.String)
if stringReader.NullN() > 0 {
return nil, merr.WrapErrParameterInvalidMsg("not nullable, but has null value")
}
if !ok {
return nil, WrapTypeErr("string", chunk.DataType().Name(), pcr.field)
}
@ -246,6 +454,44 @@ func ReadVarcharData(pcr *FieldReader, count int64) (any, error) {
return data, nil
}
func ReadNullableVarcharData(pcr *FieldReader, count int64) (any, []bool, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
return nil, nil, err
}
data := make([]string, 0, count)
maxLength, err := parameterutil.GetMaxLength(pcr.field)
if err != nil {
return nil, nil, err
}
validData := make([]bool, 0, count)
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
stringReader, ok := chunk.(*array.String)
if !ok {
return nil, nil, WrapTypeErr("string", chunk.DataType().Name(), pcr.field)
}
validData = append(validData, bytesToBoolArray(dataNums, stringReader.NullBitmapBytes())...)
for i := 0; i < dataNums; i++ {
if stringReader.IsNull(i) {
data = append(data, "")
continue
}
if err = common.CheckVarcharLength(stringReader.Value(i), maxLength); err != nil {
return nil, nil, err
}
data = append(data, stringReader.ValueStr(i))
}
}
if len(data) == 0 {
return nil, nil, nil
}
if len(data) != len(validData) {
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
}
return data, validData, nil
}
func ReadJSONData(pcr *FieldReader, count int64) (any, error) {
// JSON field read data from string array Parquet
data, err := ReadStringData(pcr, count)
@ -274,6 +520,38 @@ func ReadJSONData(pcr *FieldReader, count int64) (any, error) {
return byteArr, nil
}
func ReadNullableJSONData(pcr *FieldReader, count int64) (any, []bool, error) {
// JSON field read data from string array Parquet
data, validData, err := ReadNullableStringData(pcr, count)
if err != nil {
return nil, nil, err
}
if data == nil {
return nil, nil, nil
}
byteArr := make([][]byte, 0)
for i, str := range data.([]string) {
if !validData[i] {
byteArr = append(byteArr, []byte(nil))
continue
}
var dummy interface{}
err = json.Unmarshal([]byte(str), &dummy)
if err != nil {
return nil, nil, err
}
if pcr.field.GetIsDynamic() {
var dummy2 map[string]interface{}
err = json.Unmarshal([]byte(str), &dummy2)
if err != nil {
return nil, nil, err
}
}
byteArr = append(byteArr, []byte(str))
}
return byteArr, validData, nil
}
func ReadBinaryData(pcr *FieldReader, count int64) (any, error) {
dataType := pcr.field.GetDataType()
chunked, err := pcr.columnReader.NextBatch(count)
@ -398,6 +676,46 @@ func ReadBoolArrayData(pcr *FieldReader, count int64) (any, error) {
return data, nil
}
func ReadNullableBoolArrayData(pcr *FieldReader, count int64) (any, []bool, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
return nil, nil, err
}
data := make([][]bool, 0, count)
validData := make([]bool, 0, count)
for _, chunk := range chunked.Chunks() {
listReader, ok := chunk.(*array.List)
if !ok {
return nil, nil, WrapTypeErr("list", chunk.DataType().Name(), pcr.field)
}
boolReader, ok := listReader.ListValues().(*array.Boolean)
if !ok {
return nil, nil, WrapTypeErr("boolArray", chunk.DataType().Name(), pcr.field)
}
offsets := listReader.Offsets()
for i := 1; i < len(offsets); i++ {
start, end := offsets[i-1], offsets[i]
elementData := make([]bool, 0, end-start)
for j := start; j < end; j++ {
elementData = append(elementData, boolReader.Value(int(j)))
}
data = append(data, elementData)
elementDataValid := true
if start == end {
elementDataValid = false
}
validData = append(validData, elementDataValid)
}
}
if len(data) == 0 {
return nil, nil, nil
}
if len(data) != len(validData) {
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
}
return data, validData, nil
}
func ReadIntegerOrFloatArrayData[T constraints.Integer | constraints.Float](pcr *FieldReader, count int64) (any, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
@ -469,6 +787,86 @@ func ReadIntegerOrFloatArrayData[T constraints.Integer | constraints.Float](pcr
return data, nil
}
func ReadNullableIntegerOrFloatArrayData[T constraints.Integer | constraints.Float](pcr *FieldReader, count int64) (any, []bool, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
return nil, nil, err
}
data := make([][]T, 0, count)
validData := make([]bool, 0, count)
getDataFunc := func(offsets []int32, getValue func(int) T) {
for i := 1; i < len(offsets); i++ {
start, end := offsets[i-1], offsets[i]
elementData := make([]T, 0, end-start)
for j := start; j < end; j++ {
elementData = append(elementData, getValue(int(j)))
}
data = append(data, elementData)
elementDataValid := true
if start == end {
elementDataValid = false
}
validData = append(validData, elementDataValid)
}
}
for _, chunk := range chunked.Chunks() {
listReader, ok := chunk.(*array.List)
if !ok {
return nil, nil, WrapTypeErr("list", chunk.DataType().Name(), pcr.field)
}
offsets := listReader.Offsets()
dataType := pcr.field.GetDataType()
if typeutil.IsVectorType(dataType) {
if err = checkVectorAligned(offsets, pcr.dim, dataType); err != nil {
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("length of vector is not aligned: %s, data type: %s", err.Error(), dataType.String()))
}
}
valueReader := listReader.ListValues()
switch valueReader.DataType().ID() {
case arrow.INT8:
int8Reader := valueReader.(*array.Int8)
getDataFunc(offsets, func(i int) T {
return T(int8Reader.Value(i))
})
case arrow.INT16:
int16Reader := valueReader.(*array.Int16)
getDataFunc(offsets, func(i int) T {
return T(int16Reader.Value(i))
})
case arrow.INT32:
int32Reader := valueReader.(*array.Int32)
getDataFunc(offsets, func(i int) T {
return T(int32Reader.Value(i))
})
case arrow.INT64:
int64Reader := valueReader.(*array.Int64)
getDataFunc(offsets, func(i int) T {
return T(int64Reader.Value(i))
})
case arrow.FLOAT32:
float32Reader := valueReader.(*array.Float32)
getDataFunc(offsets, func(i int) T {
return T(float32Reader.Value(i))
})
case arrow.FLOAT64:
float64Reader := valueReader.(*array.Float64)
getDataFunc(offsets, func(i int) T {
return T(float64Reader.Value(i))
})
default:
return nil, nil, WrapTypeErr("integerArray|floatArray", chunk.DataType().Name(), pcr.field)
}
}
if len(data) == 0 {
return nil, nil, nil
}
if len(data) != len(validData) {
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
}
return data, validData, nil
}
func ReadStringArrayData(pcr *FieldReader, count int64) (any, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
@ -500,6 +898,46 @@ func ReadStringArrayData(pcr *FieldReader, count int64) (any, error) {
return data, nil
}
func ReadNullableStringArrayData(pcr *FieldReader, count int64) (any, []bool, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {
return nil, nil, err
}
data := make([][]string, 0, count)
validData := make([]bool, 0, count)
for _, chunk := range chunked.Chunks() {
listReader, ok := chunk.(*array.List)
if !ok {
return nil, nil, WrapTypeErr("list", chunk.DataType().Name(), pcr.field)
}
stringReader, ok := listReader.ListValues().(*array.String)
if !ok {
return nil, nil, WrapTypeErr("stringArray", chunk.DataType().Name(), pcr.field)
}
offsets := listReader.Offsets()
for i := 1; i < len(offsets); i++ {
start, end := offsets[i-1], offsets[i]
elementData := make([]string, 0, end-start)
for j := start; j < end; j++ {
elementData = append(elementData, stringReader.Value(int(j)))
}
data = append(data, elementData)
elementDataValid := true
if start == end {
elementDataValid = false
}
validData = append(validData, elementDataValid)
}
}
if len(data) == 0 {
return nil, nil, nil
}
if len(data) != len(validData) {
return nil, nil, merr.WrapErrParameterInvalid(len(data), len(validData), "length of data is not equal to length of valid_data")
}
return data, validData, nil
}
func ReadArrayData(pcr *FieldReader, count int64) (any, error) {
data := make([]*schemapb.ScalarField, 0, count)
maxCapacity, err := parameterutil.GetMaxCapacity(pcr.field)
@ -674,3 +1112,185 @@ func ReadArrayData(pcr *FieldReader, count int64) (any, error) {
}
return data, nil
}
func ReadNullableArrayData(pcr *FieldReader, count int64) (any, []bool, error) {
data := make([]*schemapb.ScalarField, 0, count)
maxCapacity, err := parameterutil.GetMaxCapacity(pcr.field)
if err != nil {
return nil, nil, err
}
elementType := pcr.field.GetElementType()
switch elementType {
case schemapb.DataType_Bool:
boolArray, validData, err := ReadNullableBoolArrayData(pcr, count)
if err != nil {
return nil, nil, err
}
if boolArray == nil {
return nil, nil, nil
}
for _, elementArray := range boolArray.([][]bool) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
case schemapb.DataType_Int8:
int8Array, validData, err := ReadNullableIntegerOrFloatArrayData[int32](pcr, count)
if err != nil {
return nil, nil, err
}
if int8Array == nil {
return nil, nil, nil
}
for _, elementArray := range int8Array.([][]int32) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
case schemapb.DataType_Int16:
int16Array, validData, err := ReadNullableIntegerOrFloatArrayData[int32](pcr, count)
if err != nil {
return nil, nil, err
}
if int16Array == nil {
return nil, nil, nil
}
for _, elementArray := range int16Array.([][]int32) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
case schemapb.DataType_Int32:
int32Array, validData, err := ReadNullableIntegerOrFloatArrayData[int32](pcr, count)
if err != nil {
return nil, nil, err
}
if int32Array == nil {
return nil, nil, nil
}
for _, elementArray := range int32Array.([][]int32) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
case schemapb.DataType_Int64:
int64Array, validData, err := ReadNullableIntegerOrFloatArrayData[int64](pcr, count)
if err != nil {
return nil, nil, err
}
if int64Array == nil {
return nil, nil, nil
}
for _, elementArray := range int64Array.([][]int64) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
case schemapb.DataType_Float:
float32Array, validData, err := ReadNullableIntegerOrFloatArrayData[float32](pcr, count)
if err != nil {
return nil, nil, err
}
if float32Array == nil {
return nil, nil, nil
}
for _, elementArray := range float32Array.([][]float32) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
case schemapb.DataType_Double:
float64Array, validData, err := ReadNullableIntegerOrFloatArrayData[float64](pcr, count)
if err != nil {
return nil, nil, err
}
if float64Array == nil {
return nil, nil, nil
}
for _, elementArray := range float64Array.([][]float64) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
case schemapb.DataType_VarChar, schemapb.DataType_String:
stringArray, validData, err := ReadNullableStringArrayData(pcr, count)
if err != nil {
return nil, nil, err
}
if stringArray == nil {
return nil, nil, nil
}
for _, elementArray := range stringArray.([][]string) {
if err = common.CheckArrayCapacity(len(elementArray), maxCapacity); err != nil {
return nil, nil, err
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: elementArray,
},
},
})
}
return data, validData, nil
default:
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("unsupported data type '%s' for array field '%s'",
elementType.String(), pcr.field.GetName()))
}
}

View File

@ -99,14 +99,14 @@ func (r *reader) Read() (*storage.InsertData, error) {
OUTER:
for {
for fieldID, cr := range r.frs {
data, err := cr.Next(r.count)
data, validData, err := cr.Next(r.count)
if err != nil {
return nil, err
}
if data == nil {
break OUTER
}
err = insertData.Data[fieldID].AppendRows(data)
err = insertData.Data[fieldID].AppendRows(data, validData)
if err != nil {
return nil, err
}

View File

@ -98,7 +98,7 @@ func writeParquet(w io.Writer, schema *schemapb.CollectionSchema, numRows int) (
return insertData, nil
}
func (s *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.DataType) {
func (s *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.DataType, nullable bool) {
schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
@ -139,6 +139,7 @@ func (s *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.DataType
Value: "256",
},
},
Nullable: nullable,
},
},
}
@ -166,7 +167,7 @@ func (s *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.DataType
for i := 0; i < expectRows; i++ {
expect := expectInsertData.Data[fieldID].GetRow(i + offsetBegin)
actual := data.GetRow(i)
if fieldDataType == schemapb.DataType_Array {
if fieldDataType == schemapb.DataType_Array && expect != nil {
switch elementType {
case schemapb.DataType_Bool:
actualArray := actual.(*schemapb.ScalarField).GetBoolData().GetData()
@ -264,45 +265,66 @@ func (s *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) {
}
func (s *ReaderSuite) TestReadScalarFields() {
s.run(schemapb.DataType_Bool, schemapb.DataType_None)
s.run(schemapb.DataType_Int8, schemapb.DataType_None)
s.run(schemapb.DataType_Int16, schemapb.DataType_None)
s.run(schemapb.DataType_Int32, schemapb.DataType_None)
s.run(schemapb.DataType_Int64, schemapb.DataType_None)
s.run(schemapb.DataType_Float, schemapb.DataType_None)
s.run(schemapb.DataType_Double, schemapb.DataType_None)
s.run(schemapb.DataType_String, schemapb.DataType_None)
s.run(schemapb.DataType_VarChar, schemapb.DataType_None)
s.run(schemapb.DataType_JSON, schemapb.DataType_None)
s.run(schemapb.DataType_Bool, schemapb.DataType_None, false)
s.run(schemapb.DataType_Int8, schemapb.DataType_None, false)
s.run(schemapb.DataType_Int16, schemapb.DataType_None, false)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
s.run(schemapb.DataType_Int64, schemapb.DataType_None, false)
s.run(schemapb.DataType_Float, schemapb.DataType_None, false)
s.run(schemapb.DataType_Double, schemapb.DataType_None, false)
s.run(schemapb.DataType_String, schemapb.DataType_None, false)
s.run(schemapb.DataType_VarChar, schemapb.DataType_None, false)
s.run(schemapb.DataType_JSON, schemapb.DataType_None, false)
s.run(schemapb.DataType_Array, schemapb.DataType_Bool)
s.run(schemapb.DataType_Array, schemapb.DataType_Int8)
s.run(schemapb.DataType_Array, schemapb.DataType_Int16)
s.run(schemapb.DataType_Array, schemapb.DataType_Int32)
s.run(schemapb.DataType_Array, schemapb.DataType_Int64)
s.run(schemapb.DataType_Array, schemapb.DataType_Float)
s.run(schemapb.DataType_Array, schemapb.DataType_Double)
s.run(schemapb.DataType_Array, schemapb.DataType_String)
s.run(schemapb.DataType_Array, schemapb.DataType_Bool, false)
s.run(schemapb.DataType_Array, schemapb.DataType_Int8, false)
s.run(schemapb.DataType_Array, schemapb.DataType_Int16, false)
s.run(schemapb.DataType_Array, schemapb.DataType_Int32, false)
s.run(schemapb.DataType_Array, schemapb.DataType_Int64, false)
s.run(schemapb.DataType_Array, schemapb.DataType_Float, false)
s.run(schemapb.DataType_Array, schemapb.DataType_Double, false)
s.run(schemapb.DataType_Array, schemapb.DataType_String, false)
s.run(schemapb.DataType_Bool, schemapb.DataType_None, true)
s.run(schemapb.DataType_Int8, schemapb.DataType_None, true)
s.run(schemapb.DataType_Int16, schemapb.DataType_None, true)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, true)
s.run(schemapb.DataType_Int64, schemapb.DataType_None, true)
s.run(schemapb.DataType_Float, schemapb.DataType_None, true)
s.run(schemapb.DataType_Double, schemapb.DataType_None, true)
s.run(schemapb.DataType_String, schemapb.DataType_None, true)
s.run(schemapb.DataType_VarChar, schemapb.DataType_None, true)
s.run(schemapb.DataType_JSON, schemapb.DataType_None, true)
s.run(schemapb.DataType_Array, schemapb.DataType_Bool, true)
s.run(schemapb.DataType_Array, schemapb.DataType_Int8, true)
s.run(schemapb.DataType_Array, schemapb.DataType_Int16, true)
s.run(schemapb.DataType_Array, schemapb.DataType_Int32, true)
s.run(schemapb.DataType_Array, schemapb.DataType_Int64, true)
s.run(schemapb.DataType_Array, schemapb.DataType_Float, true)
s.run(schemapb.DataType_Array, schemapb.DataType_Double, true)
s.run(schemapb.DataType_Array, schemapb.DataType_String, true)
s.failRun(schemapb.DataType_JSON, true)
}
func (s *ReaderSuite) TestStringPK() {
s.pkDataType = schemapb.DataType_VarChar
s.run(schemapb.DataType_Int32, schemapb.DataType_None)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, true)
}
func (s *ReaderSuite) TestVector() {
s.vecDataType = schemapb.DataType_BinaryVector
s.run(schemapb.DataType_Int32, schemapb.DataType_None)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
s.vecDataType = schemapb.DataType_FloatVector
s.run(schemapb.DataType_Int32, schemapb.DataType_None)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
s.vecDataType = schemapb.DataType_Float16Vector
s.run(schemapb.DataType_Int32, schemapb.DataType_None)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
s.vecDataType = schemapb.DataType_BFloat16Vector
s.run(schemapb.DataType_Int32, schemapb.DataType_None)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
s.vecDataType = schemapb.DataType_SparseFloatVector
s.run(schemapb.DataType_Int32, schemapb.DataType_None)
s.run(schemapb.DataType_Int32, schemapb.DataType_None, false)
}
func TestUtil(t *testing.T) {

View File

@ -261,3 +261,20 @@ func estimateReadCountPerBatch(bufferSize int, schema *schemapb.CollectionSchema
}
return int64(bufferSize) / int64(sizePerRecord), nil
}
// todo(smellthemoon): use byte to store valid_data
func bytesToBoolArray(length int, bytes []byte) []bool {
bools := make([]bool, 0, length)
for i := 0; i < length; i++ {
bit := (bytes[uint(i)/8] & BitMask[byte(i)%8]) != 0
bools = append(bools, bit)
}
return bools
}
var (
BitMask = [8]byte{1, 2, 4, 8, 16, 32, 64, 128}
FlippedBitMask = [8]byte{254, 253, 251, 247, 239, 223, 191, 127}
)

View File

@ -113,33 +113,19 @@ func CreateInsertData(schema *schemapb.CollectionSchema, rows int) (*storage.Ins
}
switch f.GetDataType() {
case schemapb.DataType_Bool:
insertData.Data[f.FieldID] = &storage.BoolFieldData{
Data: testutils.GenerateBoolArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateBoolArray(rows))
case schemapb.DataType_Int8:
insertData.Data[f.FieldID] = &storage.Int8FieldData{
Data: testutils.GenerateInt8Array(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateInt8Array(rows))
case schemapb.DataType_Int16:
insertData.Data[f.FieldID] = &storage.Int16FieldData{
Data: testutils.GenerateInt16Array(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateInt16Array(rows))
case schemapb.DataType_Int32:
insertData.Data[f.FieldID] = &storage.Int32FieldData{
Data: testutils.GenerateInt32Array(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateInt32Array(rows))
case schemapb.DataType_Int64:
insertData.Data[f.FieldID] = &storage.Int64FieldData{
Data: testutils.GenerateInt64Array(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateInt64Array(rows))
case schemapb.DataType_Float:
insertData.Data[f.FieldID] = &storage.FloatFieldData{
Data: testutils.GenerateFloat32Array(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateFloat32Array(rows))
case schemapb.DataType_Double:
insertData.Data[f.FieldID] = &storage.DoubleFieldData{
Data: testutils.GenerateFloat64Array(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateFloat64Array(rows))
case schemapb.DataType_BinaryVector:
dim, err := typeutil.GetDim(f)
if err != nil {
@ -185,43 +171,30 @@ func CreateInsertData(schema *schemapb.CollectionSchema, rows int) (*storage.Ins
},
}
case schemapb.DataType_String, schemapb.DataType_VarChar:
insertData.Data[f.FieldID] = &storage.StringFieldData{
Data: testutils.GenerateStringArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateStringArray(rows))
case schemapb.DataType_JSON:
insertData.Data[f.FieldID] = &storage.JSONFieldData{
Data: testutils.GenerateJSONArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateJSONArray(rows))
case schemapb.DataType_Array:
switch f.GetElementType() {
case schemapb.DataType_Bool:
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfBoolArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateArrayOfBoolArray(rows))
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfIntArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateArrayOfIntArray(rows))
case schemapb.DataType_Int64:
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfLongArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateArrayOfLongArray(rows))
case schemapb.DataType_Float:
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfFloatArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateArrayOfFloatArray(rows))
case schemapb.DataType_Double:
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfDoubleArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateArrayOfDoubleArray(rows))
case schemapb.DataType_String, schemapb.DataType_VarChar:
insertData.Data[f.FieldID] = &storage.ArrayFieldData{
Data: testutils.GenerateArrayOfStringArray(rows),
}
insertData.Data[f.FieldID].AppendDataRows(testutils.GenerateArrayOfStringArray(rows))
}
default:
panic(fmt.Sprintf("unsupported data type: %s", f.GetDataType().String()))
}
if f.GetNullable() {
insertData.Data[f.FieldID].AppendValidDataRows(testutils.GenerateBoolArray(rows))
}
}
return insertData, nil
}
@ -240,42 +213,51 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
case schemapb.DataType_Bool:
builder := array.NewBooleanBuilder(mem)
boolData := insertData.Data[fieldID].(*storage.BoolFieldData).Data
builder.AppendValues(boolData, nil)
validData := insertData.Data[fieldID].(*storage.BoolFieldData).ValidData
builder.AppendValues(boolData, validData)
columns = append(columns, builder.NewBooleanArray())
case schemapb.DataType_Int8:
builder := array.NewInt8Builder(mem)
int8Data := insertData.Data[fieldID].(*storage.Int8FieldData).Data
builder.AppendValues(int8Data, nil)
validData := insertData.Data[fieldID].(*storage.Int8FieldData).ValidData
builder.AppendValues(int8Data, validData)
columns = append(columns, builder.NewInt8Array())
case schemapb.DataType_Int16:
builder := array.NewInt16Builder(mem)
int16Data := insertData.Data[fieldID].(*storage.Int16FieldData).Data
builder.AppendValues(int16Data, nil)
validData := insertData.Data[fieldID].(*storage.Int16FieldData).ValidData
builder.AppendValues(int16Data, validData)
columns = append(columns, builder.NewInt16Array())
case schemapb.DataType_Int32:
builder := array.NewInt32Builder(mem)
int32Data := insertData.Data[fieldID].(*storage.Int32FieldData).Data
builder.AppendValues(int32Data, nil)
validData := insertData.Data[fieldID].(*storage.Int32FieldData).ValidData
builder.AppendValues(int32Data, validData)
columns = append(columns, builder.NewInt32Array())
case schemapb.DataType_Int64:
builder := array.NewInt64Builder(mem)
int64Data := insertData.Data[fieldID].(*storage.Int64FieldData).Data
builder.AppendValues(int64Data, nil)
validData := insertData.Data[fieldID].(*storage.Int64FieldData).ValidData
builder.AppendValues(int64Data, validData)
columns = append(columns, builder.NewInt64Array())
case schemapb.DataType_Float:
builder := array.NewFloat32Builder(mem)
floatData := insertData.Data[fieldID].(*storage.FloatFieldData).Data
builder.AppendValues(floatData, nil)
validData := insertData.Data[fieldID].(*storage.FloatFieldData).ValidData
builder.AppendValues(floatData, validData)
columns = append(columns, builder.NewFloat32Array())
case schemapb.DataType_Double:
builder := array.NewFloat64Builder(mem)
doubleData := insertData.Data[fieldID].(*storage.DoubleFieldData).Data
builder.AppendValues(doubleData, nil)
validData := insertData.Data[fieldID].(*storage.DoubleFieldData).ValidData
builder.AppendValues(doubleData, validData)
columns = append(columns, builder.NewFloat64Array())
case schemapb.DataType_String, schemapb.DataType_VarChar:
builder := array.NewStringBuilder(mem)
stringData := insertData.Data[fieldID].(*storage.StringFieldData).Data
builder.AppendValues(stringData, nil)
validData := insertData.Data[fieldID].(*storage.StringFieldData).ValidData
builder.AppendValues(stringData, validData)
columns = append(columns, builder.NewStringArray())
case schemapb.DataType_BinaryVector:
builder := array.NewListBuilder(mem, &arrow.Uint8Type{})
@ -358,12 +340,14 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
case schemapb.DataType_JSON:
builder := array.NewStringBuilder(mem)
jsonData := insertData.Data[fieldID].(*storage.JSONFieldData).Data
validData := insertData.Data[fieldID].(*storage.JSONFieldData).ValidData
builder.AppendValues(lo.Map(jsonData, func(bs []byte, _ int) string {
return string(bs)
}), nil)
}), validData)
columns = append(columns, builder.NewStringArray())
case schemapb.DataType_Array:
data := insertData.Data[fieldID].(*storage.ArrayFieldData).Data
validData := insertData.Data[fieldID].(*storage.ArrayFieldData).ValidData
rows := len(data)
offsets := make([]int32, 0, rows)
valid := make([]bool, 0, rows)
@ -374,12 +358,16 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.BooleanType{})
valueBuilder := builder.ValueBuilder().(*array.BooleanBuilder)
for i := 0; i < rows; i++ {
boolData := data[i].Data.(*schemapb.ScalarField_BoolData).BoolData.GetData()
valueBuilder.AppendValues(boolData, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(boolData))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
boolData := data[i].Data.(*schemapb.ScalarField_BoolData).BoolData.GetData()
valueBuilder.AppendValues(boolData, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(boolData))
valid = append(valid, true)
}
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -387,16 +375,20 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.Int8Type{})
valueBuilder := builder.ValueBuilder().(*array.Int8Builder)
for i := 0; i < rows; i++ {
intData := data[i].Data.(*schemapb.ScalarField_IntData).IntData.GetData()
int8Data := make([]int8, 0)
for j := 0; j < len(intData); j++ {
int8Data = append(int8Data, int8(intData[j]))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
intData := data[i].Data.(*schemapb.ScalarField_IntData).IntData.GetData()
int8Data := make([]int8, 0)
for j := 0; j < len(intData); j++ {
int8Data = append(int8Data, int8(intData[j]))
}
valueBuilder.AppendValues(int8Data, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(int8Data))
valid = append(valid, true)
}
valueBuilder.AppendValues(int8Data, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(int8Data))
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -404,16 +396,20 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.Int16Type{})
valueBuilder := builder.ValueBuilder().(*array.Int16Builder)
for i := 0; i < rows; i++ {
intData := data[i].Data.(*schemapb.ScalarField_IntData).IntData.GetData()
int16Data := make([]int16, 0)
for j := 0; j < len(intData); j++ {
int16Data = append(int16Data, int16(intData[j]))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
intData := data[i].Data.(*schemapb.ScalarField_IntData).IntData.GetData()
int16Data := make([]int16, 0)
for j := 0; j < len(intData); j++ {
int16Data = append(int16Data, int16(intData[j]))
}
valueBuilder.AppendValues(int16Data, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(int16Data))
valid = append(valid, true)
}
valueBuilder.AppendValues(int16Data, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(int16Data))
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -421,12 +417,16 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.Int32Type{})
valueBuilder := builder.ValueBuilder().(*array.Int32Builder)
for i := 0; i < rows; i++ {
intData := data[i].Data.(*schemapb.ScalarField_IntData).IntData.GetData()
valueBuilder.AppendValues(intData, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(intData))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
intData := data[i].Data.(*schemapb.ScalarField_IntData).IntData.GetData()
valueBuilder.AppendValues(intData, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(intData))
valid = append(valid, true)
}
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -434,12 +434,16 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.Int64Type{})
valueBuilder := builder.ValueBuilder().(*array.Int64Builder)
for i := 0; i < rows; i++ {
longData := data[i].Data.(*schemapb.ScalarField_LongData).LongData.GetData()
valueBuilder.AppendValues(longData, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(longData))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
longData := data[i].Data.(*schemapb.ScalarField_LongData).LongData.GetData()
valueBuilder.AppendValues(longData, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(longData))
valid = append(valid, true)
}
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -447,12 +451,16 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.Float32Type{})
valueBuilder := builder.ValueBuilder().(*array.Float32Builder)
for i := 0; i < rows; i++ {
floatData := data[i].Data.(*schemapb.ScalarField_FloatData).FloatData.GetData()
valueBuilder.AppendValues(floatData, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(floatData))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
floatData := data[i].Data.(*schemapb.ScalarField_FloatData).FloatData.GetData()
valueBuilder.AppendValues(floatData, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(floatData))
valid = append(valid, true)
}
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -460,12 +468,16 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.Float64Type{})
valueBuilder := builder.ValueBuilder().(*array.Float64Builder)
for i := 0; i < rows; i++ {
doubleData := data[i].Data.(*schemapb.ScalarField_DoubleData).DoubleData.GetData()
valueBuilder.AppendValues(doubleData, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(doubleData))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
doubleData := data[i].Data.(*schemapb.ScalarField_DoubleData).DoubleData.GetData()
valueBuilder.AppendValues(doubleData, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(doubleData))
valid = append(valid, true)
}
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -473,12 +485,16 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser
builder := array.NewListBuilder(mem, &arrow.StringType{})
valueBuilder := builder.ValueBuilder().(*array.StringBuilder)
for i := 0; i < rows; i++ {
stringData := data[i].Data.(*schemapb.ScalarField_StringData).StringData.GetData()
valueBuilder.AppendValues(stringData, nil)
offsets = append(offsets, currOffset)
valid = append(valid, true)
currOffset = currOffset + int32(len(stringData))
if field.GetNullable() && !validData[i] {
offsets = append(offsets, currOffset)
valid = append(valid, false)
} else {
stringData := data[i].Data.(*schemapb.ScalarField_StringData).StringData.GetData()
valueBuilder.AppendValues(stringData, nil)
offsets = append(offsets, currOffset)
currOffset = currOffset + int32(len(stringData))
valid = append(valid, true)
}
}
builder.AppendValues(offsets, valid)
columns = append(columns, builder.NewListArray())
@ -504,6 +520,10 @@ func CreateInsertDataRowsForJSON(schema *schemapb.CollectionSchema, insertData *
if field.GetAutoID() {
continue
}
if v.GetRow(i) == nil {
data[fieldID] = nil
continue
}
switch dataType {
case schemapb.DataType_Array:
switch elemType {

View File

@ -156,7 +156,7 @@ func (s *BulkInsertSuite) TestImportWithPartitionKey() {
// query partition key, TermExpr
queryNum := 10
partitionKeyData := insertData.Data[int64(102)].GetRows().([]string)
partitionKeyData := insertData.Data[int64(102)].GetDataRows().([]string)
queryData := partitionKeyData[:queryNum]
strs := lo.Map(queryData, func(str string, _ int) string {
return fmt.Sprintf("\"%s\"", str)

View File

@ -125,7 +125,7 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche
dType := field.GetDataType()
switch dType {
case schemapb.DataType_BinaryVector:
rows := fieldData.GetRows().([]byte)
rows := fieldData.GetDataRows().([]byte)
if dim != fieldData.(*storage.BinaryVectorFieldData).Dim {
panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.BinaryVectorFieldData).Dim))
}
@ -137,7 +137,7 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche
}
data = chunkedRows
case schemapb.DataType_FloatVector:
rows := fieldData.GetRows().([]float32)
rows := fieldData.GetDataRows().([]float32)
if dim != fieldData.(*storage.FloatVectorFieldData).Dim {
panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.FloatVectorFieldData).Dim))
}
@ -148,7 +148,7 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche
}
data = chunkedRows
case schemapb.DataType_Float16Vector:
rows := insertData.Data[fieldID].GetRows().([]byte)
rows := insertData.Data[fieldID].GetDataRows().([]byte)
if dim != fieldData.(*storage.Float16VectorFieldData).Dim {
panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.Float16VectorFieldData).Dim))
}
@ -160,7 +160,7 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche
}
data = chunkedRows
case schemapb.DataType_BFloat16Vector:
rows := insertData.Data[fieldID].GetRows().([]byte)
rows := insertData.Data[fieldID].GetDataRows().([]byte)
if dim != fieldData.(*storage.BFloat16VectorFieldData).Dim {
panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.BFloat16VectorFieldData).Dim))
}
@ -174,7 +174,7 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche
case schemapb.DataType_SparseFloatVector:
data = insertData.Data[fieldID].(*storage.SparseFloatVectorFieldData).GetContents()
default:
data = insertData.Data[fieldID].GetRows()
data = insertData.Data[fieldID].GetDataRows()
}
err := writeFn(path, data)