enhance: support null in go payload (#32296)

#31728

---------

Signed-off-by: lixinguo <xinguo.li@zilliz.com>
Co-authored-by: lixinguo <xinguo.li@zilliz.com>
pull/33989/head
smellthemoon 2024-06-19 17:08:00 +08:00 committed by GitHub
parent eb3197eb1e
commit 2a1356985d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 3048 additions and 1195 deletions

View File

@ -34,7 +34,7 @@ GetFixPartSize(DescriptorEventData& data) {
sizeof(data.fix_part.segment_id) + sizeof(data.fix_part.field_id) +
sizeof(data.fix_part.start_timestamp) +
sizeof(data.fix_part.end_timestamp) +
sizeof(data.fix_part.data_type);
sizeof(data.fix_part.data_type) + sizeof(data.fix_part.nullable);
}
int
GetFixPartSize(BaseEventData& data) {
@ -107,6 +107,8 @@ DescriptorEventDataFixPart::DescriptorEventDataFixPart(BinlogReaderPtr reader) {
assert(ast.ok());
ast = reader->Read(sizeof(field_id), &field_id);
assert(ast.ok());
ast = reader->Read(sizeof(nullable), &nullable);
assert(ast.ok());
ast = reader->Read(sizeof(start_timestamp), &start_timestamp);
assert(ast.ok());
ast = reader->Read(sizeof(end_timestamp), &end_timestamp);
@ -120,7 +122,7 @@ DescriptorEventDataFixPart::Serialize() {
auto fix_part_size = sizeof(collection_id) + sizeof(partition_id) +
sizeof(segment_id) + sizeof(field_id) +
sizeof(start_timestamp) + sizeof(end_timestamp) +
sizeof(data_type);
sizeof(data_type) + sizeof(nullable);
std::vector<uint8_t> res(fix_part_size);
int offset = 0;
memcpy(res.data() + offset, &collection_id, sizeof(collection_id));
@ -131,6 +133,8 @@ DescriptorEventDataFixPart::Serialize() {
offset += sizeof(segment_id);
memcpy(res.data() + offset, &field_id, sizeof(field_id));
offset += sizeof(field_id);
memcpy(res.data() + offset, &nullable, sizeof(nullable));
offset += sizeof(nullable);
memcpy(res.data() + offset, &start_timestamp, sizeof(start_timestamp));
offset += sizeof(start_timestamp);
memcpy(res.data() + offset, &end_timestamp, sizeof(end_timestamp));

View File

@ -46,6 +46,8 @@ struct DescriptorEventDataFixPart {
int64_t partition_id;
int64_t segment_id;
int64_t field_id;
//(todo:smellthemoon) set nullable false temporarily, will change it
bool nullable = false;
Timestamp start_timestamp;
Timestamp end_timestamp;
milvus::proto::schema::DataType data_type;

View File

@ -61,6 +61,8 @@ InsertData::serialize_to_remote_file() {
des_fix_part.start_timestamp = time_range_.first;
des_fix_part.end_timestamp = time_range_.second;
des_fix_part.data_type = milvus::proto::schema::DataType(data_type);
//(todo:smellthemoon) set nullable false temporarily, will change it
des_fix_part.nullable = false;
for (auto i = int8_t(EventType::DescriptorEvent);
i < int8_t(EventType::EventTypeEnd);
i++) {

View File

@ -77,6 +77,7 @@ PayloadReader::init(std::shared_ptr<arrow::io::BufferReader> input) {
*rb_reader) {
AssertInfo(maybe_batch.ok(), "get batch record success");
auto array = maybe_batch.ValueOrDie()->column(column_index);
// to read
field_data_->FillFieldData(array);
}
AssertInfo(field_data_->IsFull(), "field data hasn't been filled done");

View File

@ -321,6 +321,81 @@ func (s *SyncTaskSuiteV2) TestBuildRecord() {
s.EqualValues(2, b.NewRecord().NumRows())
}
func (s *SyncTaskSuiteV2) TestBuildRecordNullable() {
fieldSchemas := []*schemapb.FieldSchema{
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
}
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
s.NoError(err)
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
defer b.Release()
data := &storage.InsertData{
Data: map[int64]storage.FieldData{
1: &storage.BoolFieldData{Data: []bool{true, false}, ValidData: []bool{true, true}},
2: &storage.Int8FieldData{Data: []int8{3, 4}, ValidData: []bool{true, true}},
3: &storage.Int16FieldData{Data: []int16{3, 4}, ValidData: []bool{true, true}},
4: &storage.Int32FieldData{Data: []int32{3, 4}, ValidData: []bool{true, true}},
5: &storage.Int64FieldData{Data: []int64{3, 4}, ValidData: []bool{true, true}},
6: &storage.FloatFieldData{Data: []float32{3, 4}, ValidData: []bool{true, true}},
7: &storage.DoubleFieldData{Data: []float64{3, 4}, ValidData: []bool{true, true}},
8: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
9: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
11: &storage.FloatVectorFieldData{
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
Dim: 4,
},
12: &storage.ArrayFieldData{
ElementType: schemapb.DataType_Int32,
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
},
},
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
},
},
},
ValidData: []bool{true, true},
},
13: &storage.JSONFieldData{
Data: [][]byte{
[]byte(`{"batch":2}`),
[]byte(`{"key":"world"}`),
},
ValidData: []bool{true, true},
},
14: &storage.Float16VectorFieldData{
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
Dim: 4,
},
},
}
err = typeutil.BuildRecord(b, data, fieldSchemas)
s.NoError(err)
s.EqualValues(2, b.NewRecord().NumRows())
}
func TestSyncTaskV2(t *testing.T) {
suite.Run(t, new(SyncTaskSuiteV2))
}

View File

@ -632,7 +632,7 @@ func SaveDeltaLog(collectionID int64,
for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*storage.Int64PrimaryKey).Value
ts := dData.Tss[i]
eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts))
eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts), true)
sizeTotal += binary.Size(int64PkValue)
sizeTotal += binary.Size(ts)
}

View File

@ -1451,7 +1451,7 @@ func (loader *segmentLoader) patchEntryNumber(ctx context.Context, segment *Loca
return err
}
rowIDs, err := er.GetInt64FromPayload()
rowIDs, _, err := er.GetInt64FromPayload()
if err != nil {
return err
}

View File

@ -50,7 +50,7 @@ func (reader *BinlogReader) NextEventReader() (*EventReader, error) {
reader.eventReader.Close()
}
var err error
reader.eventReader, err = newEventReader(reader.descriptorEvent.PayloadDataType, reader.buffer)
reader.eventReader, err = newEventReader(reader.descriptorEvent.PayloadDataType, reader.buffer, reader.descriptorEvent.Nullable)
if err != nil {
return nil, err
}

View File

@ -37,25 +37,25 @@ import (
/* #nosec G103 */
func TestInsertBinlog(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40)
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e1, err := w.NextInsertEventWriter()
e1, err := w.NextInsertEventWriter(false)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6})
err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6})
err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
e2, err := w.NextInsertEventWriter()
e2, err := w.NextInsertEventWriter(false)
assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9})
err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true})
err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12})
err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err)
e2.SetEventTimestamp(300, 400)
@ -123,6 +123,11 @@ func TestInsertBinlog(t *testing.T) {
assert.Equal(t, fieldID, int64(40))
pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000))
@ -201,11 +206,12 @@ func TestInsertBinlog(t *testing.T) {
// insert e1, payload
e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload)
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload()
e1a, valids, err := e1r.GetInt64FromPayload()
assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
assert.Nil(t, valids)
e1r.Close()
// start of e2
@ -243,11 +249,12 @@ func TestInsertBinlog(t *testing.T) {
// insert e2, payload
e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload)
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload()
e2a, valids, err := e2r.GetInt64FromPayload()
assert.NoError(t, err)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
assert.Nil(t, valids)
e2r.Close()
assert.Equal(t, int(e2NxtPos), len(buf))
@ -258,8 +265,9 @@ func TestInsertBinlog(t *testing.T) {
event1, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload()
p1, valids, err := event1.GetInt64FromPayload()
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, InsertEventType)
ed1, ok := (event1.eventData).(*insertEventData)
@ -270,9 +278,10 @@ func TestInsertBinlog(t *testing.T) {
event2, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload()
p2, valids, err := event2.GetInt64FromPayload()
assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Nil(t, valids)
assert.Equal(t, event2.TypeCode, InsertEventType)
ed2, ok := (event2.eventData).(*insertEventData)
assert.True(t, ok)
@ -288,21 +297,21 @@ func TestDeleteBinlog(t *testing.T) {
e1, err := w.NextDeleteEventWriter()
assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6})
err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6})
err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
e2, err := w.NextDeleteEventWriter()
assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9})
err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true})
err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12})
err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err)
e2.SetEventTimestamp(300, 400)
@ -370,6 +379,11 @@ func TestDeleteBinlog(t *testing.T) {
assert.Equal(t, fieldID, int64(-1))
pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000))
@ -448,11 +462,12 @@ func TestDeleteBinlog(t *testing.T) {
// insert e1, payload
e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload)
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload()
e1a, valids, err := e1r.GetInt64FromPayload()
assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
assert.Nil(t, valids)
e1r.Close()
// start of e2
@ -490,10 +505,11 @@ func TestDeleteBinlog(t *testing.T) {
// insert e2, payload
e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload)
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload()
e2a, valids, err := e2r.GetInt64FromPayload()
assert.NoError(t, err)
assert.Nil(t, valids)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
e2r.Close()
@ -505,7 +521,8 @@ func TestDeleteBinlog(t *testing.T) {
event1, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload()
p1, valids, err := event1.GetInt64FromPayload()
assert.Nil(t, valids)
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, DeleteEventType)
@ -517,7 +534,8 @@ func TestDeleteBinlog(t *testing.T) {
event2, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload()
p2, valids, err := event2.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Equal(t, event2.TypeCode, DeleteEventType)
@ -535,21 +553,21 @@ func TestDDLBinlog1(t *testing.T) {
e1, err := w.NextCreateCollectionEventWriter()
assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6})
err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6})
err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
e2, err := w.NextDropCollectionEventWriter()
assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9})
err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true})
err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12})
err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err)
e2.SetEventTimestamp(300, 400)
@ -617,6 +635,11 @@ func TestDDLBinlog1(t *testing.T) {
assert.Equal(t, fieldID, int64(-1))
pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000))
@ -695,9 +718,10 @@ func TestDDLBinlog1(t *testing.T) {
// insert e1, payload
e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload)
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload()
e1a, valids, err := e1r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
e1r.Close()
@ -737,9 +761,10 @@ func TestDDLBinlog1(t *testing.T) {
// insert e2, payload
e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload)
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload()
e2a, valids, err := e2r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
e2r.Close()
@ -752,7 +777,8 @@ func TestDDLBinlog1(t *testing.T) {
event1, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload()
p1, valids, err := event1.GetInt64FromPayload()
assert.Nil(t, valids)
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, CreateCollectionEventType)
@ -764,7 +790,8 @@ func TestDDLBinlog1(t *testing.T) {
event2, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload()
p2, valids, err := event2.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Equal(t, event2.TypeCode, DropCollectionEventType)
@ -782,21 +809,21 @@ func TestDDLBinlog2(t *testing.T) {
e1, err := w.NextCreatePartitionEventWriter()
assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6})
err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6})
err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
e2, err := w.NextDropPartitionEventWriter()
assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9})
err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true})
err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12})
err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err)
e2.SetEventTimestamp(300, 400)
@ -863,6 +890,11 @@ func TestDDLBinlog2(t *testing.T) {
assert.Equal(t, fieldID, int64(-1))
pos += int(unsafe.Sizeof(fieldID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(1000))
@ -941,9 +973,10 @@ func TestDDLBinlog2(t *testing.T) {
// insert e1, payload
e1Payload := buf[pos:e1NxtPos]
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload)
e1r, err := NewPayloadReader(schemapb.DataType_Int64, e1Payload, false)
assert.NoError(t, err)
e1a, err := e1r.GetInt64FromPayload()
e1a, valids, err := e1r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, e1a, []int64{1, 2, 3, 4, 5, 6})
e1r.Close()
@ -983,9 +1016,10 @@ func TestDDLBinlog2(t *testing.T) {
// insert e2, payload
e2Payload := buf[pos:]
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload)
e2r, err := NewPayloadReader(schemapb.DataType_Int64, e2Payload, false)
assert.NoError(t, err)
e2a, err := e2r.GetInt64FromPayload()
e2a, valids, err := e2r.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, e2a, []int64{7, 8, 9, 10, 11, 12})
e2r.Close()
@ -998,7 +1032,8 @@ func TestDDLBinlog2(t *testing.T) {
event1, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event1)
p1, err := event1.GetInt64FromPayload()
p1, valids, err := event1.GetInt64FromPayload()
assert.Nil(t, valids)
assert.Equal(t, p1, []int64{1, 2, 3, 4, 5, 6})
assert.NoError(t, err)
assert.Equal(t, event1.TypeCode, CreatePartitionEventType)
@ -1010,7 +1045,8 @@ func TestDDLBinlog2(t *testing.T) {
event2, err := r.NextEventReader()
assert.NoError(t, err)
assert.NotNil(t, event2)
p2, err := event2.GetInt64FromPayload()
p2, valids, err := event2.GetInt64FromPayload()
assert.Nil(t, valids)
assert.NoError(t, err)
assert.Equal(t, p2, []int64{7, 8, 9, 10, 11, 12})
assert.Equal(t, event2.TypeCode, DropPartitionEventType)
@ -1042,7 +1078,7 @@ func TestIndexFileBinlog(t *testing.T) {
e, err := w.NextIndexFileEventWriter()
assert.NoError(t, err)
err = e.AddByteToPayload(payload)
err = e.AddByteToPayload(payload, nil)
assert.NoError(t, err)
e.SetEventTimestamp(timestamp, timestamp)
@ -1104,6 +1140,11 @@ func TestIndexFileBinlog(t *testing.T) {
assert.Equal(t, fieldID, fID)
pos += int(unsafe.Sizeof(fID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(timestamp))
@ -1171,7 +1212,7 @@ func TestIndexFileBinlogV2(t *testing.T) {
e, err := w.NextIndexFileEventWriter()
assert.NoError(t, err)
err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload))
err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload), true)
assert.NoError(t, err)
e.SetEventTimestamp(timestamp, timestamp)
@ -1233,6 +1274,11 @@ func TestIndexFileBinlogV2(t *testing.T) {
assert.Equal(t, fieldID, fID)
pos += int(unsafe.Sizeof(fID))
// descriptor data fix, nullable
nullable := UnsafeReadBool(buf, pos)
assert.Equal(t, nullable, false)
pos += int(unsafe.Sizeof(nullable))
// descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(timestamp))
@ -1309,17 +1355,17 @@ func TestNewBinlogReaderError(t *testing.T) {
assert.Nil(t, reader)
assert.Error(t, err)
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40)
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
w.SetEventTimeStamp(1000, 2000)
e1, err := w.NextInsertEventWriter()
e1, err := w.NextInsertEventWriter(false)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6})
err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6})
err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
@ -1348,7 +1394,7 @@ func TestNewBinlogReaderError(t *testing.T) {
}
func TestNewBinlogWriterTsError(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40)
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
_, err := w.GetBuffer()
assert.Error(t, err)
@ -1376,21 +1422,21 @@ func TestNewBinlogWriterTsError(t *testing.T) {
}
func TestInsertBinlogWriterCloseError(t *testing.T) {
insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40)
e1, err := insertWriter.NextInsertEventWriter()
insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e1, err := insertWriter.NextInsertEventWriter(false)
assert.NoError(t, err)
sizeTotal := 2000000
insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
insertWriter.SetEventTimeStamp(1000, 2000)
err = insertWriter.Finish()
assert.NoError(t, err)
assert.NotNil(t, insertWriter.buffer)
insertEventWriter, err := insertWriter.NextInsertEventWriter()
insertEventWriter, err := insertWriter.NextInsertEventWriter(false)
assert.Nil(t, insertEventWriter)
assert.Error(t, err)
insertWriter.Close()
@ -1402,7 +1448,7 @@ func TestDeleteBinlogWriteCloseError(t *testing.T) {
assert.NoError(t, err)
sizeTotal := 2000000
deleteWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
deleteWriter.SetEventTimeStamp(1000, 2000)
@ -1423,7 +1469,7 @@ func TestDDBinlogWriteCloseError(t *testing.T) {
sizeTotal := 2000000
ddBinlogWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(100, 200)
@ -1499,7 +1545,7 @@ func (e *testEvent) SetOffset(offset int32) {
var _ EventWriter = (*testEvent)(nil)
func TestWriterListError(t *testing.T) {
insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40)
insertWriter := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
sizeTotal := 2000000
insertWriter.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
errorEvent := &testEvent{}

View File

@ -150,7 +150,7 @@ type InsertBinlogWriter struct {
}
// NextInsertEventWriter returns an event writer to write insert data to an event.
func (writer *InsertBinlogWriter) NextInsertEventWriter(dim ...int) (*insertEventWriter, error) {
func (writer *InsertBinlogWriter) NextInsertEventWriter(nullable bool, dim ...int) (*insertEventWriter, error) {
if writer.isClosed() {
return nil, fmt.Errorf("binlog has closed")
}
@ -161,9 +161,9 @@ func (writer *InsertBinlogWriter) NextInsertEventWriter(dim ...int) (*insertEven
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
}
event, err = newInsertEventWriter(writer.PayloadDataType, dim[0])
event, err = newInsertEventWriter(writer.PayloadDataType, nullable, dim[0])
} else {
event, err = newInsertEventWriter(writer.PayloadDataType)
event, err = newInsertEventWriter(writer.PayloadDataType, nullable)
}
if err != nil {
return nil, err
@ -271,13 +271,14 @@ func (writer *IndexFileBinlogWriter) NextIndexFileEventWriter() (*indexFileEvent
}
// NewInsertBinlogWriter creates InsertBinlogWriter to write binlog file.
func NewInsertBinlogWriter(dataType schemapb.DataType, collectionID, partitionID, segmentID, FieldID int64) *InsertBinlogWriter {
func NewInsertBinlogWriter(dataType schemapb.DataType, collectionID, partitionID, segmentID, FieldID int64, nullable bool) *InsertBinlogWriter {
descriptorEvent := newDescriptorEvent()
descriptorEvent.PayloadDataType = dataType
descriptorEvent.CollectionID = collectionID
descriptorEvent.PartitionID = partitionID
descriptorEvent.SegmentID = segmentID
descriptorEvent.FieldID = FieldID
descriptorEvent.Nullable = nullable
w := &InsertBinlogWriter{
baseBinlogWriter: baseBinlogWriter{

View File

@ -26,15 +26,15 @@ import (
)
func TestBinlogWriterReader(t *testing.T) {
binlogWriter := NewInsertBinlogWriter(schemapb.DataType_Int32, 10, 20, 30, 40)
binlogWriter := NewInsertBinlogWriter(schemapb.DataType_Int32, 10, 20, 30, 40, false)
tp := binlogWriter.GetBinlogType()
assert.Equal(t, tp, InsertBinlog)
binlogWriter.SetEventTimeStamp(1000, 2000)
defer binlogWriter.Close()
eventWriter, err := binlogWriter.NextInsertEventWriter()
eventWriter, err := binlogWriter.NextInsertEventWriter(false)
assert.NoError(t, err)
err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3})
err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil)
assert.NoError(t, err)
_, err = binlogWriter.GetBuffer()
assert.Error(t, err)
@ -50,7 +50,7 @@ func TestBinlogWriterReader(t *testing.T) {
nums, err = binlogWriter.GetRowNums()
assert.NoError(t, err)
assert.EqualValues(t, 3, nums)
err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3})
err = eventWriter.AddInt32ToPayload([]int32{1, 2, 3}, nil)
assert.Error(t, err)
nums, err = binlogWriter.GetRowNums()
assert.NoError(t, err)
@ -64,9 +64,9 @@ func TestBinlogWriterReader(t *testing.T) {
assert.NoError(t, err)
eventReader, err := binlogReader.NextEventReader()
assert.NoError(t, err)
_, err = eventReader.GetInt8FromPayload()
_, _, err = eventReader.GetInt8FromPayload()
assert.Error(t, err)
payload, err := eventReader.GetInt32FromPayload()
payload, _, err := eventReader.GetInt32FromPayload()
assert.NoError(t, err)
assert.EqualValues(t, 3, len(payload))
assert.EqualValues(t, 1, payload[0])

View File

@ -247,11 +247,14 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique
for _, field := range insertCodec.Schema.Schema.Fields {
// encode fields
writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID)
writer = NewInsertBinlogWriter(field.DataType, insertCodec.Schema.ID, partitionID, segmentID, field.FieldID, field.GetNullable())
var eventWriter *insertEventWriter
var err error
var dim int64
if typeutil.IsVectorType(field.DataType) {
if field.GetNullable() {
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("vectorType not support null, fieldName: %s", field.GetName()))
}
switch field.DataType {
case schemapb.DataType_FloatVector,
schemapb.DataType_BinaryVector,
@ -261,14 +264,14 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique
if err != nil {
return nil, err
}
eventWriter, err = writer.NextInsertEventWriter(int(dim))
eventWriter, err = writer.NextInsertEventWriter(field.GetNullable(), int(dim))
case schemapb.DataType_SparseFloatVector:
eventWriter, err = writer.NextInsertEventWriter()
eventWriter, err = writer.NextInsertEventWriter(field.GetNullable())
default:
return nil, fmt.Errorf("undefined data type %d", field.DataType)
}
} else {
eventWriter, err = writer.NextInsertEventWriter()
eventWriter, err = writer.NextInsertEventWriter(field.GetNullable())
}
if err != nil {
writer.Close()
@ -323,48 +326,60 @@ func AddFieldDataToPayload(eventWriter *insertEventWriter, dataType schemapb.Dat
var err error
switch dataType {
case schemapb.DataType_Bool:
if err = eventWriter.AddBoolToPayload(singleData.(*BoolFieldData).Data); err != nil {
if err = eventWriter.AddBoolToPayload(singleData.(*BoolFieldData).Data, singleData.(*BoolFieldData).ValidData); err != nil {
return err
}
case schemapb.DataType_Int8:
if err = eventWriter.AddInt8ToPayload(singleData.(*Int8FieldData).Data); err != nil {
if err = eventWriter.AddInt8ToPayload(singleData.(*Int8FieldData).Data, singleData.(*Int8FieldData).ValidData); err != nil {
return err
}
case schemapb.DataType_Int16:
if err = eventWriter.AddInt16ToPayload(singleData.(*Int16FieldData).Data); err != nil {
if err = eventWriter.AddInt16ToPayload(singleData.(*Int16FieldData).Data, singleData.(*Int16FieldData).ValidData); err != nil {
return err
}
case schemapb.DataType_Int32:
if err = eventWriter.AddInt32ToPayload(singleData.(*Int32FieldData).Data); err != nil {
if err = eventWriter.AddInt32ToPayload(singleData.(*Int32FieldData).Data, singleData.(*Int32FieldData).ValidData); err != nil {
return err
}
case schemapb.DataType_Int64:
if err = eventWriter.AddInt64ToPayload(singleData.(*Int64FieldData).Data); err != nil {
if err = eventWriter.AddInt64ToPayload(singleData.(*Int64FieldData).Data, singleData.(*Int64FieldData).ValidData); err != nil {
return err
}
case schemapb.DataType_Float:
if err = eventWriter.AddFloatToPayload(singleData.(*FloatFieldData).Data); err != nil {
if err = eventWriter.AddFloatToPayload(singleData.(*FloatFieldData).Data, singleData.(*FloatFieldData).ValidData); err != nil {
return err
}
case schemapb.DataType_Double:
if err = eventWriter.AddDoubleToPayload(singleData.(*DoubleFieldData).Data); err != nil {
if err = eventWriter.AddDoubleToPayload(singleData.(*DoubleFieldData).Data, singleData.(*DoubleFieldData).ValidData); err != nil {
return err
}
case schemapb.DataType_String, schemapb.DataType_VarChar:
for _, singleString := range singleData.(*StringFieldData).Data {
if err = eventWriter.AddOneStringToPayload(singleString); err != nil {
for i, singleString := range singleData.(*StringFieldData).Data {
isValid := true
if len(singleData.(*StringFieldData).ValidData) != 0 {
isValid = singleData.(*StringFieldData).ValidData[i]
}
if err = eventWriter.AddOneStringToPayload(singleString, isValid); err != nil {
return err
}
}
case schemapb.DataType_Array:
for _, singleArray := range singleData.(*ArrayFieldData).Data {
if err = eventWriter.AddOneArrayToPayload(singleArray); err != nil {
for i, singleArray := range singleData.(*ArrayFieldData).Data {
isValid := true
if len(singleData.(*ArrayFieldData).ValidData) != 0 {
isValid = singleData.(*ArrayFieldData).ValidData[i]
}
if err = eventWriter.AddOneArrayToPayload(singleArray, isValid); err != nil {
return err
}
}
case schemapb.DataType_JSON:
for _, singleJSON := range singleData.(*JSONFieldData).Data {
if err = eventWriter.AddOneJSONToPayload(singleJSON); err != nil {
for i, singleJSON := range singleData.(*JSONFieldData).Data {
isValid := true
if len(singleData.(*JSONFieldData).ValidData) != 0 {
isValid = singleData.(*JSONFieldData).ValidData[i]
}
if err = eventWriter.AddOneJSONToPayload(singleJSON, isValid); err != nil {
return err
}
}
@ -448,7 +463,7 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
}
switch dataType {
case schemapb.DataType_Bool:
singleData, err := eventReader.GetBoolFromPayload()
singleData, validData, err := eventReader.GetBoolFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -463,11 +478,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
boolFieldData := insertData.Data[fieldID].(*BoolFieldData)
boolFieldData.Data = append(boolFieldData.Data, singleData...)
boolFieldData.ValidData = append(boolFieldData.ValidData, validData...)
totalLength += len(singleData)
insertData.Data[fieldID] = boolFieldData
case schemapb.DataType_Int8:
singleData, err := eventReader.GetInt8FromPayload()
singleData, validData, err := eventReader.GetInt8FromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -482,11 +498,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int8FieldData := insertData.Data[fieldID].(*Int8FieldData)
int8FieldData.Data = append(int8FieldData.Data, singleData...)
int8FieldData.ValidData = append(int8FieldData.ValidData, validData...)
totalLength += len(singleData)
insertData.Data[fieldID] = int8FieldData
case schemapb.DataType_Int16:
singleData, err := eventReader.GetInt16FromPayload()
singleData, validData, err := eventReader.GetInt16FromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -501,11 +518,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int16FieldData := insertData.Data[fieldID].(*Int16FieldData)
int16FieldData.Data = append(int16FieldData.Data, singleData...)
int16FieldData.ValidData = append(int16FieldData.ValidData, validData...)
totalLength += len(singleData)
insertData.Data[fieldID] = int16FieldData
case schemapb.DataType_Int32:
singleData, err := eventReader.GetInt32FromPayload()
singleData, validData, err := eventReader.GetInt32FromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -520,11 +538,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int32FieldData := insertData.Data[fieldID].(*Int32FieldData)
int32FieldData.Data = append(int32FieldData.Data, singleData...)
int32FieldData.ValidData = append(int32FieldData.ValidData, validData...)
totalLength += len(singleData)
insertData.Data[fieldID] = int32FieldData
case schemapb.DataType_Int64:
singleData, err := eventReader.GetInt64FromPayload()
singleData, validData, err := eventReader.GetInt64FromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -539,11 +558,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
int64FieldData := insertData.Data[fieldID].(*Int64FieldData)
int64FieldData.Data = append(int64FieldData.Data, singleData...)
int64FieldData.ValidData = append(int64FieldData.ValidData, validData...)
totalLength += len(singleData)
insertData.Data[fieldID] = int64FieldData
case schemapb.DataType_Float:
singleData, err := eventReader.GetFloatFromPayload()
singleData, validData, err := eventReader.GetFloatFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -558,11 +578,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
floatFieldData := insertData.Data[fieldID].(*FloatFieldData)
floatFieldData.Data = append(floatFieldData.Data, singleData...)
floatFieldData.ValidData = append(floatFieldData.ValidData, validData...)
totalLength += len(singleData)
insertData.Data[fieldID] = floatFieldData
case schemapb.DataType_Double:
singleData, err := eventReader.GetDoubleFromPayload()
singleData, validData, err := eventReader.GetDoubleFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -577,11 +598,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
doubleFieldData := insertData.Data[fieldID].(*DoubleFieldData)
doubleFieldData.Data = append(doubleFieldData.Data, singleData...)
doubleFieldData.ValidData = append(doubleFieldData.ValidData, validData...)
totalLength += len(singleData)
insertData.Data[fieldID] = doubleFieldData
case schemapb.DataType_String, schemapb.DataType_VarChar:
stringPayload, err := eventReader.GetStringFromPayload()
stringPayload, validData, err := eventReader.GetStringFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -594,14 +616,15 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
}
}
stringFieldData := insertData.Data[fieldID].(*StringFieldData)
stringFieldData.DataType = dataType
stringFieldData.Data = append(stringFieldData.Data, stringPayload...)
stringFieldData.DataType = dataType
stringFieldData.ValidData = append(stringFieldData.ValidData, validData...)
totalLength += len(stringPayload)
insertData.Data[fieldID] = stringFieldData
case schemapb.DataType_Array:
arrayPayload, err := eventReader.GetArrayFromPayload()
arrayPayload, validData, err := eventReader.GetArrayFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -616,11 +639,12 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
arrayFieldData := insertData.Data[fieldID].(*ArrayFieldData)
arrayFieldData.Data = append(arrayFieldData.Data, arrayPayload...)
arrayFieldData.ValidData = append(arrayFieldData.ValidData, validData...)
totalLength += len(arrayPayload)
insertData.Data[fieldID] = arrayFieldData
case schemapb.DataType_JSON:
jsonPayload, err := eventReader.GetJSONFromPayload()
jsonPayload, validData, err := eventReader.GetJSONFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -635,6 +659,7 @@ func (insertCodec *InsertCodec) DeserializeInto(fieldBinlogs []*Blob, rowNum int
jsonFieldData := insertData.Data[fieldID].(*JSONFieldData)
jsonFieldData.Data = append(jsonFieldData.Data, jsonPayload...)
jsonFieldData.ValidData = append(jsonFieldData.ValidData, validData...)
totalLength += len(jsonPayload)
insertData.Data[fieldID] = jsonFieldData
@ -934,7 +959,7 @@ func (deleteCodec *DeleteCodec) Serialize(collectionID UniqueID, partitionID Uni
if err != nil {
return nil, err
}
err = eventWriter.AddOneStringToPayload(string(serializedPayload))
err = eventWriter.AddOneStringToPayload(string(serializedPayload), true)
if err != nil {
return nil, err
}
@ -1084,7 +1109,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
for _, singleTs := range ts {
int64Ts = append(int64Ts, int64(singleTs))
}
err = eventWriter.AddInt64ToPayload(int64Ts)
err = eventWriter.AddInt64ToPayload(int64Ts, nil)
if err != nil {
return nil, err
}
@ -1120,7 +1145,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil {
return nil, err
}
err = eventWriter.AddOneStringToPayload(req)
err = eventWriter.AddOneStringToPayload(req, true)
if err != nil {
return nil, err
}
@ -1130,7 +1155,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil {
return nil, err
}
err = eventWriter.AddOneStringToPayload(req)
err = eventWriter.AddOneStringToPayload(req, true)
if err != nil {
return nil, err
}
@ -1140,7 +1165,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil {
return nil, err
}
err = eventWriter.AddOneStringToPayload(req)
err = eventWriter.AddOneStringToPayload(req, true)
if err != nil {
return nil, err
}
@ -1150,7 +1175,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil {
return nil, err
}
err = eventWriter.AddOneStringToPayload(req)
err = eventWriter.AddOneStringToPayload(req, true)
if err != nil {
return nil, err
}
@ -1211,7 +1236,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [
}
switch dataType {
case schemapb.DataType_Int64:
int64Ts, err := eventReader.GetInt64FromPayload()
int64Ts, _, err := eventReader.GetInt64FromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
@ -1221,7 +1246,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [
resultTs = append(resultTs, Timestamp(singleTs))
}
case schemapb.DataType_String:
stringPayload, err := eventReader.GetStringFromPayload()
stringPayload, _, err := eventReader.GetStringFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()

View File

@ -201,6 +201,62 @@ func genTestCollectionMeta() *etcdpb.CollectionMeta {
}
}
func TestInsertCodecFailed(t *testing.T) {
t.Run("vector field not support null", func(t *testing.T) {
tests := []struct {
description string
dataType schemapb.DataType
}{
{"nullable FloatVector field", schemapb.DataType_FloatVector},
{"nullable Float16Vector field", schemapb.DataType_Float16Vector},
{"nullable BinaryVector field", schemapb.DataType_BinaryVector},
{"nullable BFloat16Vector field", schemapb.DataType_BFloat16Vector},
{"nullable SparseFloatVector field", schemapb.DataType_SparseFloatVector},
}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
schema := &etcdpb.CollectionMeta{
ID: CollectionID,
CreateTime: 1,
SegmentIDs: []int64{SegmentID},
PartitionTags: []string{"partition_0", "partition_1"},
Schema: &schemapb.CollectionSchema{
Name: "schema",
Description: "schema",
Fields: []*schemapb.FieldSchema{
{
FieldID: RowIDField,
Name: "row_id",
Description: "row_id",
DataType: schemapb.DataType_Int64,
},
{
FieldID: TimestampField,
Name: "Timestamp",
Description: "Timestamp",
DataType: schemapb.DataType_Int64,
},
{
DataType: test.dataType,
},
},
},
}
insertCodec := NewInsertCodecWithSchema(schema)
insertDataEmpty := &InsertData{
Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}, nil},
},
}
_, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty)
assert.Error(t, err)
})
}
})
}
func TestInsertCodec(t *testing.T) {
schema := genTestCollectionMeta()
insertCodec := NewInsertCodecWithSchema(schema)
@ -374,16 +430,16 @@ func TestInsertCodec(t *testing.T) {
insertDataEmpty := &InsertData{
Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}},
TimestampField: &Int64FieldData{[]int64{}},
BoolField: &BoolFieldData{[]bool{}},
Int8Field: &Int8FieldData{[]int8{}},
Int16Field: &Int16FieldData{[]int16{}},
Int32Field: &Int32FieldData{[]int32{}},
Int64Field: &Int64FieldData{[]int64{}},
FloatField: &FloatFieldData{[]float32{}},
DoubleField: &DoubleFieldData{[]float64{}},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar},
RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}, nil},
BoolField: &BoolFieldData{[]bool{}, nil},
Int8Field: &Int8FieldData{[]int8{}, nil},
Int16Field: &Int16FieldData{[]int16{}, nil},
Int32Field: &Int32FieldData{[]int32{}, nil},
Int64Field: &Int64FieldData{[]int64{}, nil},
FloatField: &FloatFieldData{[]float32{}, nil},
DoubleField: &DoubleFieldData{[]float64{}, nil},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil},
BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8},
FloatVectorField: &FloatVectorFieldData{[]float32{}, 4},
Float16VectorField: &Float16VectorFieldData{[]byte{}, 4},
@ -394,8 +450,8 @@ func TestInsertCodec(t *testing.T) {
Contents: [][]byte{},
},
},
ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}},
JSONField: &JSONFieldData{[][]byte{}},
ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}, nil},
JSONField: &JSONFieldData{[][]byte{}, nil},
},
}
b, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty)
@ -557,7 +613,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value
ts := dData.Tss[i]
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts))
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d", int64PkValue, ts), true)
assert.NoError(t, err)
sizeTotal += binary.Size(int64PkValue)
sizeTotal += binary.Size(ts)
@ -595,7 +651,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value
ts := dData.Tss[i]
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d,?", int64PkValue, ts))
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,%d,?", int64PkValue, ts), true)
assert.NoError(t, err)
}
eventWriter.SetEventTimestamp(100, 200)
@ -626,7 +682,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ {
ts := dData.Tss[i]
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("abc,%d", ts))
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("abc,%d", ts), true)
assert.NoError(t, err)
}
eventWriter.SetEventTimestamp(100, 200)
@ -657,7 +713,7 @@ func TestUpgradeDeleteLog(t *testing.T) {
for i := int64(0); i < dData.RowCount; i++ {
int64PkValue := dData.Pks[i].(*Int64PrimaryKey).Value
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,abc", int64PkValue))
err = eventWriter.AddOneStringToPayload(fmt.Sprintf("%d,abc", int64PkValue), true)
assert.NoError(t, err)
}
eventWriter.SetEventTimestamp(100, 200)
@ -845,16 +901,16 @@ func TestMemorySize(t *testing.T) {
insertDataEmpty := &InsertData{
Data: map[int64]FieldData{
RowIDField: &Int64FieldData{[]int64{}},
TimestampField: &Int64FieldData{[]int64{}},
BoolField: &BoolFieldData{[]bool{}},
Int8Field: &Int8FieldData{[]int8{}},
Int16Field: &Int16FieldData{[]int16{}},
Int32Field: &Int32FieldData{[]int32{}},
Int64Field: &Int64FieldData{[]int64{}},
FloatField: &FloatFieldData{[]float32{}},
DoubleField: &DoubleFieldData{[]float64{}},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar},
RowIDField: &Int64FieldData{[]int64{}, nil},
TimestampField: &Int64FieldData{[]int64{}, nil},
BoolField: &BoolFieldData{[]bool{}, nil},
Int8Field: &Int8FieldData{[]int8{}, nil},
Int16Field: &Int16FieldData{[]int16{}, nil},
Int32Field: &Int32FieldData{[]int32{}, nil},
Int64Field: &Int64FieldData{[]int64{}, nil},
FloatField: &FloatFieldData{[]float32{}, nil},
DoubleField: &DoubleFieldData{[]float64{}, nil},
StringField: &StringFieldData{[]string{}, schemapb.DataType_VarChar, nil},
BinaryVectorField: &BinaryVectorFieldData{[]byte{}, 8},
FloatVectorField: &FloatVectorFieldData{[]float32{}, 4},
},
@ -920,24 +976,24 @@ func TestDeleteData(t *testing.T) {
}
func TestAddFieldDataToPayload(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40)
e, _ := w.NextInsertEventWriter()
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
e, _ := w.NextInsertEventWriter(false)
var err error
err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}})
err = AddFieldDataToPayload(e, schemapb.DataType_Bool, &BoolFieldData{[]bool{}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}})
err = AddFieldDataToPayload(e, schemapb.DataType_Int8, &Int8FieldData{[]int8{}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}})
err = AddFieldDataToPayload(e, schemapb.DataType_Int16, &Int16FieldData{[]int16{}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}})
err = AddFieldDataToPayload(e, schemapb.DataType_Int32, &Int32FieldData{[]int32{}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}})
err = AddFieldDataToPayload(e, schemapb.DataType_Int64, &Int64FieldData{[]int64{}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}})
err = AddFieldDataToPayload(e, schemapb.DataType_Float, &FloatFieldData{[]float32{}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}})
err = AddFieldDataToPayload(e, schemapb.DataType_Double, &DoubleFieldData{[]float64{}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar})
err = AddFieldDataToPayload(e, schemapb.DataType_String, &StringFieldData{[]string{"test"}, schemapb.DataType_VarChar, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_Array, &ArrayFieldData{
ElementType: schemapb.DataType_VarChar,
@ -948,7 +1004,7 @@ func TestAddFieldDataToPayload(t *testing.T) {
}},
})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}})
err = AddFieldDataToPayload(e, schemapb.DataType_JSON, &JSONFieldData{[][]byte{[]byte(`"batch":2}`)}, nil})
assert.Error(t, err)
err = AddFieldDataToPayload(e, schemapb.DataType_BinaryVector, &BinaryVectorFieldData{[]byte{}, 8})
assert.Error(t, err)

View File

@ -46,6 +46,7 @@ type DescriptorEventDataFixPart struct {
PartitionID int64
SegmentID int64
FieldID int64
Nullable bool
StartTimestamp typeutil.Timestamp
EndTimestamp typeutil.Timestamp
PayloadDataType schemapb.DataType
@ -350,6 +351,7 @@ func newDescriptorEventData() *descriptorEventData {
StartTimestamp: 0,
EndTimestamp: 0,
PayloadDataType: -1,
Nullable: false,
},
PostHeaderLengths: []uint8{},
Extras: make(map[string]interface{}),

View File

@ -85,7 +85,7 @@ func (reader *EventReader) Close() {
}
}
func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer) (*EventReader, error) {
func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer, nullable bool) (*EventReader, error) {
reader := &EventReader{
eventHeader: eventHeader{
baseEventHeader{},
@ -103,7 +103,7 @@ func newEventReader(datatype schemapb.DataType, buffer *bytes.Buffer) (*EventRea
next := int(reader.EventLength - reader.eventHeader.GetMemoryUsageInBytes() - reader.GetEventDataFixPartSize())
payloadBuffer := buffer.Next(next)
payloadReader, err := NewPayloadReader(datatype, payloadBuffer)
payloadReader, err := NewPayloadReader(datatype, payloadBuffer, nullable)
if err != nil {
return nil, err
}

View File

@ -89,17 +89,25 @@ func TestDescriptorEvent(t *testing.T) {
int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID)))
assert.Equal(t, fieldID, int64(-1))
startTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+
nullable := UnsafeReadBool(buffer, binary.Size(eventHeader{})+
int(unsafe.Sizeof(collID))+
int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID)))
assert.Equal(t, nullable, false)
startTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+
int(unsafe.Sizeof(collID))+
int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID))+
int(unsafe.Sizeof(nullable)))
assert.Equal(t, startTs, int64(0))
endTs := UnsafeReadInt64(buffer, binary.Size(eventHeader{})+
int(unsafe.Sizeof(collID))+
int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID))+
int(unsafe.Sizeof(nullable))+
int(unsafe.Sizeof(startTs)))
assert.Equal(t, endTs, int64(0))
colType := UnsafeReadInt32(buffer, binary.Size(eventHeader{})+
@ -107,6 +115,7 @@ func TestDescriptorEvent(t *testing.T) {
int(unsafe.Sizeof(partID))+
int(unsafe.Sizeof(segID))+
int(unsafe.Sizeof(fieldID))+
int(unsafe.Sizeof(nullable))+
int(unsafe.Sizeof(startTs))+
int(unsafe.Sizeof(endTs)))
assert.Equal(t, colType, int32(-1))
@ -116,6 +125,7 @@ func TestDescriptorEvent(t *testing.T) {
int(unsafe.Sizeof(partID)) +
int(unsafe.Sizeof(segID)) +
int(unsafe.Sizeof(fieldID)) +
int(unsafe.Sizeof(nullable)) +
int(unsafe.Sizeof(startTs)) +
int(unsafe.Sizeof(endTs)) +
int(unsafe.Sizeof(colType))
@ -161,177 +171,178 @@ func TestInsertEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(dt, pBuf)
pR, err := NewPayloadReader(dt, pBuf, false)
assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload()
values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, values, ev)
pR.Close()
r, err := newEventReader(dt, bytes.NewBuffer(wBuf))
r, err := newEventReader(dt, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload()
payload, nulls, _, err := r.GetDataFromPayload()
assert.NoError(t, err)
assert.Nil(t, nulls)
assert.Equal(t, payload, ev)
r.Close()
}
t.Run("insert_bool", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Bool)
w, err := newInsertEventWriter(schemapb.DataType_Bool, false)
assert.NoError(t, err)
insertT(t, schemapb.DataType_Bool, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]bool{true, false, true})
return w.AddDataToPayload([]bool{true, false, true}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]bool{false, true, false})
return w.AddDataToPayload([]bool{false, true, false}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5})
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
},
[]bool{true, false, true, false, true, false})
})
t.Run("insert_int8", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int8)
w, err := newInsertEventWriter(schemapb.DataType_Int8, false)
assert.NoError(t, err)
insertT(t, schemapb.DataType_Int8, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int8{1, 2, 3})
return w.AddDataToPayload([]int8{1, 2, 3}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int8{4, 5, 6})
return w.AddDataToPayload([]int8{4, 5, 6}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5})
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
},
[]int8{1, 2, 3, 4, 5, 6})
})
t.Run("insert_int16", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int16)
w, err := newInsertEventWriter(schemapb.DataType_Int16, false)
assert.NoError(t, err)
insertT(t, schemapb.DataType_Int16, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int16{1, 2, 3})
return w.AddDataToPayload([]int16{1, 2, 3}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int16{4, 5, 6})
return w.AddDataToPayload([]int16{4, 5, 6}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5})
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
},
[]int16{1, 2, 3, 4, 5, 6})
})
t.Run("insert_int32", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int32)
w, err := newInsertEventWriter(schemapb.DataType_Int32, false)
assert.NoError(t, err)
insertT(t, schemapb.DataType_Int32, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int32{1, 2, 3})
return w.AddDataToPayload([]int32{1, 2, 3}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int32{4, 5, 6})
return w.AddDataToPayload([]int32{4, 5, 6}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5})
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
},
[]int32{1, 2, 3, 4, 5, 6})
})
t.Run("insert_int64", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Int64)
w, err := newInsertEventWriter(schemapb.DataType_Int64, false)
assert.NoError(t, err)
insertT(t, schemapb.DataType_Int64, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int64{1, 2, 3})
return w.AddDataToPayload([]int64{1, 2, 3}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int64{4, 5, 6})
return w.AddDataToPayload([]int64{4, 5, 6}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5})
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
},
[]int64{1, 2, 3, 4, 5, 6})
})
t.Run("insert_float32", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Float)
w, err := newInsertEventWriter(schemapb.DataType_Float, false)
assert.NoError(t, err)
insertT(t, schemapb.DataType_Float, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{1, 2, 3})
return w.AddDataToPayload([]float32{1, 2, 3}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{4, 5, 6})
return w.AddDataToPayload([]float32{4, 5, 6}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5})
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
},
[]float32{1, 2, 3, 4, 5, 6})
})
t.Run("insert_float64", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_Double)
w, err := newInsertEventWriter(schemapb.DataType_Double, false)
assert.NoError(t, err)
insertT(t, schemapb.DataType_Double, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]float64{1, 2, 3})
return w.AddDataToPayload([]float64{1, 2, 3}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]float64{4, 5, 6})
return w.AddDataToPayload([]float64{4, 5, 6}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5})
return w.AddDataToPayload([]int{1, 2, 3, 4, 5}, nil)
},
[]float64{1, 2, 3, 4, 5, 6})
})
t.Run("insert_binary_vector", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, 16)
w, err := newInsertEventWriter(schemapb.DataType_BinaryVector, false, 16)
assert.NoError(t, err)
insertT(t, schemapb.DataType_BinaryVector, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]byte{1, 2, 3, 4}, 16)
return w.AddDataToPayload([]byte{1, 2, 3, 4}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]byte{5, 6, 7, 8}, 16)
return w.AddDataToPayload([]byte{5, 6, 7, 8}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, 16)
return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, nil)
},
[]byte{1, 2, 3, 4, 5, 6, 7, 8})
})
t.Run("insert_float_vector", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_FloatVector, 2)
w, err := newInsertEventWriter(schemapb.DataType_FloatVector, false, 2)
assert.NoError(t, err)
insertT(t, schemapb.DataType_FloatVector, w,
func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{1, 2, 3, 4}, 2)
return w.AddDataToPayload([]float32{1, 2, 3, 4}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]float32{5, 6, 7, 8}, 2)
return w.AddDataToPayload([]float32{5, 6, 7, 8}, nil)
},
func(w *insertEventWriter) error {
return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, 2)
return w.AddDataToPayload([]int{1, 2, 3, 4, 5, 6}, nil)
},
[]float32{1, 2, 3, 4, 5, 6, 7, 8})
})
t.Run("insert_string", func(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_String)
w, err := newInsertEventWriter(schemapb.DataType_String, false)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234")
err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err)
err = w.AddOneStringToPayload("567890")
err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg")
err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3})
err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -349,20 +360,20 @@ func TestInsertEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err)
s, err := pR.GetStringFromPayload()
s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
assert.Equal(t, s[2], "abcdefg")
pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
s, err = pR.GetStringFromPayload()
s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -379,13 +390,13 @@ func TestDeleteEvent(t *testing.T) {
w, err := newDeleteEventWriter(schemapb.DataType_String)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234")
err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err)
err = w.AddOneStringToPayload("567890")
err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg")
err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3})
err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -403,10 +414,10 @@ func TestDeleteEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err)
s, err := pR.GetStringFromPayload()
s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -414,10 +425,10 @@ func TestDeleteEvent(t *testing.T) {
pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
s, err = pR.GetStringFromPayload()
s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -439,11 +450,11 @@ func TestCreateCollectionEvent(t *testing.T) {
w, err := newCreateCollectionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3})
err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6})
err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6})
err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -461,16 +472,16 @@ func TestCreateCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload()
values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload()
payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -481,13 +492,13 @@ func TestCreateCollectionEvent(t *testing.T) {
w, err := newCreateCollectionEventWriter(schemapb.DataType_String)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234")
err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err)
err = w.AddOneStringToPayload("567890")
err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg")
err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3})
err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -505,10 +516,10 @@ func TestCreateCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err)
s, err := pR.GetStringFromPayload()
s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -516,10 +527,10 @@ func TestCreateCollectionEvent(t *testing.T) {
pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), true)
assert.NoError(t, err)
s, err = pR.GetStringFromPayload()
s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -541,11 +552,11 @@ func TestDropCollectionEvent(t *testing.T) {
w, err := newDropCollectionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3})
err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6})
err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6})
err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -563,16 +574,16 @@ func TestDropCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload()
values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload()
payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -583,13 +594,13 @@ func TestDropCollectionEvent(t *testing.T) {
w, err := newDropCollectionEventWriter(schemapb.DataType_String)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234")
err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err)
err = w.AddOneStringToPayload("567890")
err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg")
err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3})
err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -607,10 +618,10 @@ func TestDropCollectionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err)
s, err := pR.GetStringFromPayload()
s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -618,10 +629,10 @@ func TestDropCollectionEvent(t *testing.T) {
pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
s, err = r.GetStringFromPayload()
s, _, err = r.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -643,11 +654,11 @@ func TestCreatePartitionEvent(t *testing.T) {
w, err := newCreatePartitionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3})
err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6})
err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6})
err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -665,16 +676,16 @@ func TestCreatePartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload()
values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload()
payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -685,13 +696,13 @@ func TestCreatePartitionEvent(t *testing.T) {
w, err := newCreatePartitionEventWriter(schemapb.DataType_String)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234")
err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err)
err = w.AddOneStringToPayload("567890")
err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg")
err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3})
err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -709,10 +720,10 @@ func TestCreatePartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err)
s, err := pR.GetStringFromPayload()
s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -720,10 +731,10 @@ func TestCreatePartitionEvent(t *testing.T) {
pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
s, err = pR.GetStringFromPayload()
s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -745,11 +756,11 @@ func TestDropPartitionEvent(t *testing.T) {
w, err := newDropPartitionEventWriter(schemapb.DataType_Int64)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload([]int64{1, 2, 3})
err = w.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{4, 5, 6})
err = w.AddDataToPayload([]int{4, 5, 6}, nil)
assert.Error(t, err)
err = w.AddDataToPayload([]int64{4, 5, 6})
err = w.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -767,16 +778,16 @@ func TestDropPartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(createCollectionEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_Int64, pBuf, false)
assert.NoError(t, err)
values, _, err := pR.GetDataFromPayload()
values, _, _, err := pR.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, values, []int64{1, 2, 3, 4, 5, 6})
pR.Close()
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_Int64, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
payload, _, err := r.GetDataFromPayload()
payload, _, _, err := r.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, payload, []int64{1, 2, 3, 4, 5, 6})
@ -787,13 +798,13 @@ func TestDropPartitionEvent(t *testing.T) {
w, err := newDropPartitionEventWriter(schemapb.DataType_String)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234")
err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err)
err = w.AddOneStringToPayload("567890")
err = w.AddOneStringToPayload("567890", true)
assert.NoError(t, err)
err = w.AddOneStringToPayload("abcdefg")
err = w.AddOneStringToPayload("abcdefg", true)
assert.NoError(t, err)
err = w.AddDataToPayload([]int{1, 2, 3})
err = w.AddDataToPayload([]int{1, 2, 3}, nil)
assert.Error(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -811,10 +822,10 @@ func TestDropPartitionEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(insertEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err)
s, err := pR.GetStringFromPayload()
s, _, err := pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -822,10 +833,10 @@ func TestDropPartitionEvent(t *testing.T) {
pR.Close()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
s, err = pR.GetStringFromPayload()
s, _, err = pR.GetStringFromPayload()
assert.NoError(t, err)
assert.Equal(t, s[0], "1234")
assert.Equal(t, s[1], "567890")
@ -843,7 +854,7 @@ func TestIndexFileEvent(t *testing.T) {
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytes()
err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload))
err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload), true)
assert.NoError(t, err)
err = w.Finish()
@ -862,10 +873,10 @@ func TestIndexFileEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf, false)
assert.NoError(t, err)
assert.Equal(t, pR.numRows, int64(1))
value, err := pR.GetStringFromPayload()
value, _, err := pR.GetStringFromPayload()
assert.Equal(t, len(value), 1)
@ -880,7 +891,7 @@ func TestIndexFileEvent(t *testing.T) {
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytes()
err = w.AddByteToPayload(payload)
err = w.AddByteToPayload(payload, nil)
assert.NoError(t, err)
err = w.Finish()
@ -899,10 +910,10 @@ func TestIndexFileEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf, false)
assert.Equal(t, pR.numRows, int64(len(payload)))
assert.NoError(t, err)
value, err := pR.GetByteFromPayload()
value, _, err := pR.GetByteFromPayload()
assert.NoError(t, err)
assert.Equal(t, payload, value)
pR.Close()
@ -914,7 +925,7 @@ func TestIndexFileEvent(t *testing.T) {
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytesWithLength(1000)
err = w.AddByteToPayload(payload)
err = w.AddByteToPayload(payload, nil)
assert.NoError(t, err)
err = w.Finish()
@ -933,10 +944,10 @@ func TestIndexFileEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf)
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf, false)
assert.Equal(t, pR.numRows, int64(len(payload)))
assert.NoError(t, err)
value, err := pR.GetByteFromPayload()
value, _, err := pR.GetByteFromPayload()
assert.NoError(t, err)
assert.Equal(t, payload, value)
pR.Close()
@ -1044,7 +1055,7 @@ func TestReadFixPartError(t *testing.T) {
func TestEventReaderError(t *testing.T) {
buf := new(bytes.Buffer)
r, err := newEventReader(schemapb.DataType_Int64, buf)
r, err := newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r)
assert.Error(t, err)
@ -1052,7 +1063,7 @@ func TestEventReaderError(t *testing.T) {
err = header.Write(buf)
assert.NoError(t, err)
r, err = newEventReader(schemapb.DataType_Int64, buf)
r, err = newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r)
assert.Error(t, err)
@ -1061,7 +1072,7 @@ func TestEventReaderError(t *testing.T) {
err = header.Write(buf)
assert.NoError(t, err)
r, err = newEventReader(schemapb.DataType_Int64, buf)
r, err = newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r)
assert.Error(t, err)
@ -1078,16 +1089,16 @@ func TestEventReaderError(t *testing.T) {
err = binary.Write(buf, common.Endian, insertData)
assert.NoError(t, err)
r, err = newEventReader(schemapb.DataType_Int64, buf)
r, err = newEventReader(schemapb.DataType_Int64, buf, false)
assert.Nil(t, r)
assert.Error(t, err)
}
func TestEventClose(t *testing.T) {
w, err := newInsertEventWriter(schemapb.DataType_String)
w, err := newInsertEventWriter(schemapb.DataType_String, false)
assert.NoError(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
err = w.AddDataToPayload("1234")
err = w.AddDataToPayload("1234", nil)
assert.NoError(t, err)
err = w.Finish()
assert.NoError(t, err)
@ -1098,7 +1109,7 @@ func TestEventClose(t *testing.T) {
w.Close()
wBuf := buf.Bytes()
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf))
r, err := newEventReader(schemapb.DataType_String, bytes.NewBuffer(wBuf), false)
assert.NoError(t, err)
r.Close()

View File

@ -212,16 +212,16 @@ func newDescriptorEvent() *descriptorEvent {
}
}
func newInsertEventWriter(dataType schemapb.DataType, dim ...int) (*insertEventWriter, error) {
func newInsertEventWriter(dataType schemapb.DataType, nullable bool, dim ...int) (*insertEventWriter, error) {
var payloadWriter PayloadWriterInterface
var err error
if typeutil.IsVectorType(dataType) && !typeutil.IsSparseFloatVectorType(dataType) {
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
}
payloadWriter, err = NewPayloadWriter(dataType, dim[0])
payloadWriter, err = NewPayloadWriter(dataType, nullable, dim[0])
} else {
payloadWriter, err = NewPayloadWriter(dataType)
payloadWriter, err = NewPayloadWriter(dataType, nullable)
}
if err != nil {
return nil, err
@ -244,7 +244,7 @@ func newInsertEventWriter(dataType schemapb.DataType, dim ...int) (*insertEventW
}
func newDeleteEventWriter(dataType schemapb.DataType) (*deleteEventWriter, error) {
payloadWriter, err := NewPayloadWriter(dataType)
payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil {
return nil, err
}
@ -270,7 +270,7 @@ func newCreateCollectionEventWriter(dataType schemapb.DataType) (*createCollecti
return nil, errors.New("incorrect data type")
}
payloadWriter, err := NewPayloadWriter(dataType)
payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil {
return nil, err
}
@ -296,7 +296,7 @@ func newDropCollectionEventWriter(dataType schemapb.DataType) (*dropCollectionEv
return nil, errors.New("incorrect data type")
}
payloadWriter, err := NewPayloadWriter(dataType)
payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil {
return nil, err
}
@ -322,7 +322,7 @@ func newCreatePartitionEventWriter(dataType schemapb.DataType) (*createPartition
return nil, errors.New("incorrect data type")
}
payloadWriter, err := NewPayloadWriter(dataType)
payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil {
return nil, err
}
@ -348,7 +348,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
return nil, errors.New("incorrect data type")
}
payloadWriter, err := NewPayloadWriter(dataType)
payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil {
return nil, err
}
@ -370,7 +370,7 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
}
func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) {
payloadWriter, err := NewPayloadWriter(dataType)
payloadWriter, err := NewPayloadWriter(dataType, false)
if err != nil {
return nil, err
}

View File

@ -59,17 +59,17 @@ func TestSizeofStruct(t *testing.T) {
}
func TestEventWriter(t *testing.T) {
insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32)
insertEvent, err := newInsertEventWriter(schemapb.DataType_Int32, false)
assert.NoError(t, err)
insertEvent.Close()
insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32)
insertEvent, err = newInsertEventWriter(schemapb.DataType_Int32, false)
assert.NoError(t, err)
defer insertEvent.Close()
err = insertEvent.AddInt64ToPayload([]int64{1, 1})
err = insertEvent.AddInt64ToPayload([]int64{1, 1}, nil)
assert.Error(t, err)
err = insertEvent.AddInt32ToPayload([]int32{1, 2, 3})
err = insertEvent.AddInt32ToPayload([]int32{1, 2, 3}, nil)
assert.NoError(t, err)
nums, err := insertEvent.GetPayloadLengthFromWriter()
assert.NoError(t, err)
@ -79,7 +79,7 @@ func TestEventWriter(t *testing.T) {
length, err := insertEvent.GetMemoryUsageInBytes()
assert.NoError(t, err)
assert.EqualValues(t, length, insertEvent.EventLength)
err = insertEvent.AddInt32ToPayload([]int32{1})
err = insertEvent.AddInt32ToPayload([]int32{1}, nil)
assert.Error(t, err)
buffer := new(bytes.Buffer)
insertEvent.SetEventTimestamp(100, 200)

View File

@ -59,7 +59,7 @@ func (codec *IndexFileBinlogCodec) serializeImpl(
}
defer eventWriter.Close()
err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value))
err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value), true)
if err != nil {
return nil, err
}
@ -221,7 +221,8 @@ func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
switch dataType {
// just for backward compatibility
case schemapb.DataType_Int8:
content, err := eventReader.GetByteFromPayload()
// todo: smellthemoon, valid_data may need to check when create index
content, _, err := eventReader.GetByteFromPayload()
if err != nil {
log.Warn("failed to get byte from payload",
zap.Error(err))
@ -239,7 +240,7 @@ func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
}
case schemapb.DataType_String:
content, err := eventReader.GetStringFromPayload()
content, _, err := eventReader.GetStringFromPayload()
if err != nil {
log.Warn("failed to get string from payload", zap.Error(err))
eventReader.Close()

View File

@ -149,6 +149,7 @@ type FieldData interface {
AppendRow(row interface{}) error
AppendRows(rows interface{}) error
GetDataType() schemapb.DataType
GetNullable() bool
}
func NewFieldData(dataType schemapb.DataType, fieldSchema *schemapb.FieldSchema, cap int) (FieldData, error) {
@ -193,88 +194,142 @@ func NewFieldData(dataType schemapb.DataType, fieldSchema *schemapb.FieldSchema,
case schemapb.DataType_SparseFloatVector:
return &SparseFloatVectorFieldData{}, nil
case schemapb.DataType_Bool:
return &BoolFieldData{
data := &BoolFieldData{
Data: make([]bool, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int8:
return &Int8FieldData{
data := &Int8FieldData{
Data: make([]int8, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int16:
return &Int16FieldData{
data := &Int16FieldData{
Data: make([]int16, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int32:
return &Int32FieldData{
data := &Int32FieldData{
Data: make([]int32, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Int64:
return &Int64FieldData{
data := &Int64FieldData{
Data: make([]int64, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Float:
return &FloatFieldData{
data := &FloatFieldData{
Data: make([]float32, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Double:
return &DoubleFieldData{
data := &DoubleFieldData{
Data: make([]float64, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_JSON:
return &JSONFieldData{
data := &JSONFieldData{
Data: make([][]byte, 0, cap),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_Array:
return &ArrayFieldData{
data := &ArrayFieldData{
Data: make([]*schemapb.ScalarField, 0, cap),
ElementType: fieldSchema.GetElementType(),
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
case schemapb.DataType_String, schemapb.DataType_VarChar:
return &StringFieldData{
data := &StringFieldData{
Data: make([]string, 0, cap),
DataType: dataType,
}, nil
}
if fieldSchema.GetNullable() {
data.ValidData = make([]bool, 0, cap)
}
return data, nil
default:
return nil, fmt.Errorf("Unexpected schema data type: %d", dataType)
}
}
type BoolFieldData struct {
Data []bool
Data []bool
ValidData []bool
}
type Int8FieldData struct {
Data []int8
Data []int8
ValidData []bool
}
type Int16FieldData struct {
Data []int16
Data []int16
ValidData []bool
}
type Int32FieldData struct {
Data []int32
Data []int32
ValidData []bool
}
type Int64FieldData struct {
Data []int64
Data []int64
ValidData []bool
}
type FloatFieldData struct {
Data []float32
Data []float32
ValidData []bool
}
type DoubleFieldData struct {
Data []float64
Data []float64
ValidData []bool
}
type StringFieldData struct {
Data []string
DataType schemapb.DataType
Data []string
DataType schemapb.DataType
ValidData []bool
}
type ArrayFieldData struct {
ElementType schemapb.DataType
Data []*schemapb.ScalarField
ValidData []bool
}
type JSONFieldData struct {
Data [][]byte
Data [][]byte
ValidData []bool
}
type BinaryVectorFieldData struct {
Data []byte
@ -671,13 +726,33 @@ func (data *SparseFloatVectorFieldData) AppendRows(rows interface{}) error {
}
// GetMemorySize implements FieldData.GetMemorySize
func (data *BoolFieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *Int8FieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *Int16FieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *Int32FieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *Int64FieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *FloatFieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *DoubleFieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *BoolFieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *Int8FieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *Int16FieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *Int32FieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *Int64FieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *FloatFieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *DoubleFieldData) GetMemorySize() int {
return binary.Size(data.Data) + binary.Size(data.ValidData)
}
func (data *BinaryVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 }
func (data *FloatVectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 }
func (data *Float16VectorFieldData) GetMemorySize() int { return binary.Size(data.Data) + 4 }
@ -802,3 +877,63 @@ func (data *ArrayFieldData) GetRowSize(i int) int {
func (data *SparseFloatVectorFieldData) GetRowSize(i int) int {
return len(data.Contents[i])
}
func (data *BoolFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int8FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int16FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int32FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *Int64FieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *FloatFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *DoubleFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *BFloat16VectorFieldData) GetNullable() bool {
return false
}
func (data *BinaryVectorFieldData) GetNullable() bool {
return false
}
func (data *FloatVectorFieldData) GetNullable() bool {
return false
}
func (data *SparseFloatVectorFieldData) GetNullable() bool {
return false
}
func (data *Float16VectorFieldData) GetNullable() bool {
return false
}
func (data *StringFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *ArrayFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}
func (data *JSONFieldData) GetNullable() bool {
return len(data.ValidData) != 0
}

View File

@ -41,6 +41,39 @@ func (s *InsertDataSuite) TestInsertData() {
s.Nil(idata)
})
s.Run("nullable field schema", func() {
tests := []struct {
description string
dataType schemapb.DataType
}{
{"nullable bool field", schemapb.DataType_Bool},
{"nullable int8 field", schemapb.DataType_Int8},
{"nullable int16 field", schemapb.DataType_Int16},
{"nullable int32 field", schemapb.DataType_Int32},
{"nullable int64 field", schemapb.DataType_Int64},
{"nullable float field", schemapb.DataType_Float},
{"nullable double field", schemapb.DataType_Double},
{"nullable json field", schemapb.DataType_JSON},
{"nullable array field", schemapb.DataType_Array},
{"nullable string/varchar field", schemapb.DataType_String},
}
for _, test := range tests {
s.Run(test.description, func() {
schema := &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
DataType: test.dataType,
Nullable: true,
},
},
}
_, err := NewInsertData(schema)
s.Nil(err)
})
}
})
s.Run("invalid schema", func() {
tests := []struct {
description string
@ -183,6 +216,14 @@ func (s *InsertDataSuite) TestGetDataType() {
}
}
func (s *InsertDataSuite) TestGetNullable() {
for _, field := range s.schema.GetFields() {
fieldData, ok := s.iDataOneRow.Data[field.GetFieldID()]
s.True(ok)
s.Equal(field.GetNullable(), fieldData.GetNullable())
}
}
func (s *InsertDataSuite) SetupTest() {
var err error
s.iDataEmpty, err = NewInsertData(s.schema)

View File

@ -26,18 +26,18 @@ import (
// PayloadWriterInterface abstracts PayloadWriter
type PayloadWriterInterface interface {
AddDataToPayload(msgs any, dim ...int) error
AddBoolToPayload(msgs []bool) error
AddByteToPayload(msgs []byte) error
AddInt8ToPayload(msgs []int8) error
AddInt16ToPayload(msgs []int16) error
AddInt32ToPayload(msgs []int32) error
AddInt64ToPayload(msgs []int64) error
AddFloatToPayload(msgs []float32) error
AddDoubleToPayload(msgs []float64) error
AddOneStringToPayload(msgs string) error
AddOneArrayToPayload(msg *schemapb.ScalarField) error
AddOneJSONToPayload(msg []byte) error
AddDataToPayload(msgs any, valids []bool) error
AddBoolToPayload(msgs []bool, valids []bool) error
AddByteToPayload(msgs []byte, valids []bool) error
AddInt8ToPayload(msgs []int8, valids []bool) error
AddInt16ToPayload(msgs []int16, valids []bool) error
AddInt32ToPayload(msgs []int32, valids []bool) error
AddInt64ToPayload(msgs []int64, valids []bool) error
AddFloatToPayload(msgs []float32, valids []bool) error
AddDoubleToPayload(msgs []float64, valids []bool) error
AddOneStringToPayload(msgs string, isValid bool) error
AddOneArrayToPayload(msg *schemapb.ScalarField, isValid bool) error
AddOneJSONToPayload(msg []byte, isValid bool) error
AddBinaryVectorToPayload(binVec []byte, dim int) error
AddFloatVectorToPayload(binVec []float32, dim int) error
AddFloat16VectorToPayload(binVec []byte, dim int) error
@ -53,18 +53,18 @@ type PayloadWriterInterface interface {
// PayloadReaderInterface abstracts PayloadReader
type PayloadReaderInterface interface {
GetDataFromPayload() (any, int, error)
GetBoolFromPayload() ([]bool, error)
GetByteFromPayload() ([]byte, error)
GetInt8FromPayload() ([]int8, error)
GetInt16FromPayload() ([]int16, error)
GetInt32FromPayload() ([]int32, error)
GetInt64FromPayload() ([]int64, error)
GetFloatFromPayload() ([]float32, error)
GetDoubleFromPayload() ([]float64, error)
GetStringFromPayload() ([]string, error)
GetArrayFromPayload() ([]*schemapb.ScalarField, error)
GetJSONFromPayload() ([][]byte, error)
GetDataFromPayload() (any, []bool, int, error)
GetBoolFromPayload() ([]bool, []bool, error)
GetByteFromPayload() ([]byte, []bool, error)
GetInt8FromPayload() ([]int8, []bool, error)
GetInt16FromPayload() ([]int16, []bool, error)
GetInt32FromPayload() ([]int32, []bool, error)
GetInt64FromPayload() ([]int64, []bool, error)
GetFloatFromPayload() ([]float32, []bool, error)
GetDoubleFromPayload() ([]float64, []bool, error)
GetStringFromPayload() ([]string, []bool, error)
GetArrayFromPayload() ([]*schemapb.ScalarField, []bool, error)
GetJSONFromPayload() ([][]byte, []bool, error)
GetBinaryVectorFromPayload() ([]byte, int, error)
GetFloat16VectorFromPayload() ([]byte, int, error)
GetBFloat16VectorFromPayload() ([]byte, int, error)

View File

@ -4,29 +4,35 @@ import (
"bytes"
"context"
"fmt"
"time"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/apache/arrow/go/v12/parquet"
"github.com/apache/arrow/go/v12/parquet/file"
"github.com/apache/arrow/go/v12/parquet/pqarrow"
"github.com/cockroachdb/errors"
"github.com/golang/protobuf/proto"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
// PayloadReader reads data from payload
type PayloadReader struct {
reader *file.Reader
colType schemapb.DataType
numRows int64
reader *file.Reader
colType schemapb.DataType
numRows int64
nullable bool
}
var _ PayloadReaderInterface = (*PayloadReader)(nil)
func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, error) {
func NewPayloadReader(colType schemapb.DataType, buf []byte, nullable bool) (*PayloadReader, error) {
if len(buf) == 0 {
return nil, errors.New("create Payload reader failed, buffer is empty")
}
@ -34,59 +40,66 @@ func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, er
if err != nil {
return nil, err
}
return &PayloadReader{reader: parquetReader, colType: colType, numRows: parquetReader.NumRows()}, nil
return &PayloadReader{reader: parquetReader, colType: colType, numRows: parquetReader.NumRows(), nullable: nullable}, nil
}
// GetDataFromPayload returns data,length from payload, returns err if failed
// Return:
//
// `interface{}`: all types.
// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type.
// `error`: error.
func (r *PayloadReader) GetDataFromPayload() (interface{}, int, error) {
// `interface{}`: all types.
// `[]bool`: validData, only meaningful to ScalarField.
// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type.
// `error`: error.
func (r *PayloadReader) GetDataFromPayload() (interface{}, []bool, int, error) {
switch r.colType {
case schemapb.DataType_Bool:
val, err := r.GetBoolFromPayload()
return val, 0, err
val, validData, err := r.GetBoolFromPayload()
return val, validData, 0, err
case schemapb.DataType_Int8:
val, err := r.GetInt8FromPayload()
return val, 0, err
val, validData, err := r.GetInt8FromPayload()
return val, validData, 0, err
case schemapb.DataType_Int16:
val, err := r.GetInt16FromPayload()
return val, 0, err
val, validData, err := r.GetInt16FromPayload()
return val, validData, 0, err
case schemapb.DataType_Int32:
val, err := r.GetInt32FromPayload()
return val, 0, err
val, validData, err := r.GetInt32FromPayload()
return val, validData, 0, err
case schemapb.DataType_Int64:
val, err := r.GetInt64FromPayload()
return val, 0, err
val, validData, err := r.GetInt64FromPayload()
return val, validData, 0, err
case schemapb.DataType_Float:
val, err := r.GetFloatFromPayload()
return val, 0, err
val, validData, err := r.GetFloatFromPayload()
return val, validData, 0, err
case schemapb.DataType_Double:
val, err := r.GetDoubleFromPayload()
return val, 0, err
val, validData, err := r.GetDoubleFromPayload()
return val, validData, 0, err
case schemapb.DataType_BinaryVector:
return r.GetBinaryVectorFromPayload()
val, dim, err := r.GetBinaryVectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_FloatVector:
return r.GetFloatVectorFromPayload()
val, dim, err := r.GetFloatVectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_Float16Vector:
return r.GetFloat16VectorFromPayload()
val, dim, err := r.GetFloat16VectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_BFloat16Vector:
return r.GetBFloat16VectorFromPayload()
val, dim, err := r.GetBFloat16VectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_SparseFloatVector:
return r.GetSparseFloatVectorFromPayload()
val, dim, err := r.GetSparseFloatVectorFromPayload()
return val, nil, dim, err
case schemapb.DataType_String, schemapb.DataType_VarChar:
val, err := r.GetStringFromPayload()
return val, 0, err
val, validData, err := r.GetStringFromPayload()
return val, validData, 0, err
case schemapb.DataType_Array:
val, err := r.GetArrayFromPayload()
return val, 0, err
val, validData, err := r.GetArrayFromPayload()
return val, validData, 0, err
case schemapb.DataType_JSON:
val, err := r.GetJSONFromPayload()
return val, 0, err
val, validData, err := r.GetJSONFromPayload()
return val, validData, 0, err
default:
return nil, 0, errors.New("unknown type")
return nil, nil, 0, merr.WrapErrParameterInvalidMsg("unknown type")
}
}
@ -96,169 +109,327 @@ func (r *PayloadReader) ReleasePayloadReader() error {
}
// GetBoolFromPayload returns bool slice from payload.
func (r *PayloadReader) GetBoolFromPayload() ([]bool, error) {
func (r *PayloadReader) GetBoolFromPayload() ([]bool, []bool, error) {
if r.colType != schemapb.DataType_Bool {
return nil, fmt.Errorf("failed to get bool from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get bool from datatype %v", r.colType.String()))
}
values := make([]bool, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[bool, *array.Boolean](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[bool, *file.BooleanColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, nil
return values, nil, nil
}
// GetByteFromPayload returns byte slice from payload
func (r *PayloadReader) GetByteFromPayload() ([]byte, error) {
func (r *PayloadReader) GetByteFromPayload() ([]byte, []bool, error) {
if r.colType != schemapb.DataType_Int8 {
return nil, fmt.Errorf("failed to get byte from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get byte from datatype %v", r.colType.String()))
}
if r.nullable {
values := make([]int32, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int32, *array.Int32](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
ret := make([]byte, r.numRows)
for i := int64(0); i < r.numRows; i++ {
ret[i] = byte(values[i])
}
return ret, validData, nil
}
values := make([]int32, r.numRows)
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
ret := make([]byte, r.numRows)
for i := int64(0); i < r.numRows; i++ {
ret[i] = byte(values[i])
}
return ret, nil
return ret, nil, nil
}
// GetInt8FromPayload returns int8 slice from payload
func (r *PayloadReader) GetInt8FromPayload() ([]int8, error) {
func (r *PayloadReader) GetInt8FromPayload() ([]int8, []bool, error) {
if r.colType != schemapb.DataType_Int8 {
return nil, fmt.Errorf("failed to get int8 from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int8 from datatype %v", r.colType.String()))
}
if r.nullable {
values := make([]int8, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int8, *array.Int8](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
values := make([]int32, r.numRows)
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
ret := make([]int8, r.numRows)
for i := int64(0); i < r.numRows; i++ {
ret[i] = int8(values[i])
}
return ret, nil
return ret, nil, nil
}
func (r *PayloadReader) GetInt16FromPayload() ([]int16, error) {
func (r *PayloadReader) GetInt16FromPayload() ([]int16, []bool, error) {
if r.colType != schemapb.DataType_Int16 {
return nil, fmt.Errorf("failed to get int16 from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int16 from datatype %v", r.colType.String()))
}
if r.nullable {
values := make([]int16, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int16, *array.Int16](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
values := make([]int32, r.numRows)
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
ret := make([]int16, r.numRows)
for i := int64(0); i < r.numRows; i++ {
ret[i] = int16(values[i])
}
return ret, nil
return ret, nil, nil
}
func (r *PayloadReader) GetInt32FromPayload() ([]int32, error) {
func (r *PayloadReader) GetInt32FromPayload() ([]int32, []bool, error) {
if r.colType != schemapb.DataType_Int32 {
return nil, fmt.Errorf("failed to get int32 from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int32 from datatype %v", r.colType.String()))
}
values := make([]int32, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int32, *array.Int32](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, nil
return values, nil, nil
}
func (r *PayloadReader) GetInt64FromPayload() ([]int64, error) {
func (r *PayloadReader) GetInt64FromPayload() ([]int64, []bool, error) {
if r.colType != schemapb.DataType_Int64 {
return nil, fmt.Errorf("failed to get int64 from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get int64 from datatype %v", r.colType.String()))
}
values := make([]int64, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[int64, *array.Int64](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[int64, *file.Int64ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, nil
return values, nil, nil
}
func (r *PayloadReader) GetFloatFromPayload() ([]float32, error) {
func (r *PayloadReader) GetFloatFromPayload() ([]float32, []bool, error) {
if r.colType != schemapb.DataType_Float {
return nil, fmt.Errorf("failed to get float32 from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get float32 from datatype %v", r.colType.String()))
}
values := make([]float32, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[float32, *array.Float32](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[float32, *file.Float32ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, nil
return values, nil, nil
}
func (r *PayloadReader) GetDoubleFromPayload() ([]float64, error) {
func (r *PayloadReader) GetDoubleFromPayload() ([]float64, []bool, error) {
if r.colType != schemapb.DataType_Double {
return nil, fmt.Errorf("failed to get float32 from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get double from datatype %v", r.colType.String()))
}
values := make([]float64, r.numRows)
if r.nullable {
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[float64, *array.Float64](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
valuesRead, err := ReadDataFromAllRowGroups[float64, *file.Float64ColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, err
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, fmt.Errorf("expect %d rows, but got valuesRead = %d", r.numRows, valuesRead)
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, nil
return values, nil, nil
}
func (r *PayloadReader) GetStringFromPayload() ([]string, error) {
func (r *PayloadReader) GetStringFromPayload() ([]string, []bool, error) {
if r.colType != schemapb.DataType_String && r.colType != schemapb.DataType_VarChar {
return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String())
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get string from datatype %v", r.colType.String()))
}
return readByteAndConvert(r, func(bytes parquet.ByteArray) string {
if r.nullable {
values := make([]string, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[string, *array.String](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
return values, validData, nil
}
value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) string {
return bytes.String()
})
if err != nil {
return nil, nil, err
}
return value, nil, nil
}
func (r *PayloadReader) GetArrayFromPayload() ([]*schemapb.ScalarField, []bool, error) {
if r.colType != schemapb.DataType_Array {
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get array from datatype %v", r.colType.String()))
}
if r.nullable {
return readNullableByteAndConvert(r, func(bytes []byte) *schemapb.ScalarField {
v := &schemapb.ScalarField{}
proto.Unmarshal(bytes, v)
return v
})
}
value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) *schemapb.ScalarField {
v := &schemapb.ScalarField{}
proto.Unmarshal(bytes, v)
return v
})
if err != nil {
return nil, nil, err
}
return value, nil, nil
}
func (r *PayloadReader) GetJSONFromPayload() ([][]byte, []bool, error) {
if r.colType != schemapb.DataType_JSON {
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("failed to get json from datatype %v", r.colType.String()))
}
if r.nullable {
return readNullableByteAndConvert(r, func(bytes []byte) []byte {
return bytes
})
}
value, err := readByteAndConvert(r, func(bytes parquet.ByteArray) []byte {
return bytes
})
if err != nil {
return nil, nil, err
}
return value, nil, nil
}
func (r *PayloadReader) GetByteArrayDataSet() (*DataSet[parquet.ByteArray, *file.ByteArrayColumnChunkReader], error) {
@ -282,25 +453,23 @@ func (r *PayloadReader) GetArrowRecordReader() (pqarrow.RecordReader, error) {
return rr, nil
}
func (r *PayloadReader) GetArrayFromPayload() ([]*schemapb.ScalarField, error) {
if r.colType != schemapb.DataType_Array {
return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String())
}
return readByteAndConvert(r, func(bytes parquet.ByteArray) *schemapb.ScalarField {
v := &schemapb.ScalarField{}
proto.Unmarshal(bytes, v)
return v
})
}
func (r *PayloadReader) GetJSONFromPayload() ([][]byte, error) {
if r.colType != schemapb.DataType_JSON {
return nil, fmt.Errorf("failed to get string from datatype %v", r.colType.String())
func readNullableByteAndConvert[T any](r *PayloadReader, convert func([]byte) T) ([]T, []bool, error) {
values := make([][]byte, r.numRows)
validData := make([]bool, r.numRows)
valuesRead, err := ReadData[[]byte, *array.Binary](r.reader, values, validData, r.numRows)
if err != nil {
return nil, nil, err
}
return readByteAndConvert(r, func(bytes parquet.ByteArray) []byte {
return bytes
})
if valuesRead != r.numRows {
return nil, nil, merr.WrapErrParameterInvalid(r.numRows, valuesRead, "valuesRead is not equal to rows")
}
ret := make([]T, r.numRows)
for i := 0; i < int(r.numRows); i++ {
ret[i] = convert(values[i])
}
return ret, validData, nil
}
func readByteAndConvert[T any](r *PayloadReader, convert func(parquet.ByteArray) T) ([]T, error) {
@ -568,3 +737,67 @@ func (s *DataSet[T, E]) NextBatch(batch int64) ([]T, error) {
s.cnt += batch
return result, nil
}
func ReadData[T any, E interface {
Value(int) T
NullBitmapBytes() []byte
}](reader *file.Reader, value []T, validData []bool, numRows int64) (int64, error) {
var offset int
fileReader, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
// defer fileReader.ParquetReader().Close()
if err != nil {
log.Warn("create arrow parquet file reader failed", zap.Error(err))
return -1, err
}
schema, err := fileReader.Schema()
if err != nil {
log.Warn("can't schema from file", zap.Error(err))
return -1, err
}
for i, field := range schema.Fields() {
// Spawn a new context to ignore cancellation from parental context.
newCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
columnReader, err := fileReader.GetColumn(newCtx, i)
if err != nil {
log.Warn("get column reader failed", zap.String("fieldName", field.Name), zap.Error(err))
return -1, err
}
chunked, err := columnReader.NextBatch(numRows)
if err != nil {
return -1, err
}
for _, chunk := range chunked.Chunks() {
dataNums := chunk.Data().Len()
reader, ok := chunk.(E)
if !ok {
log.Warn("the column data in parquet is not equal to field", zap.String("fieldName", field.Name), zap.String("actual type", chunk.DataType().Name()))
return -1, merr.WrapErrImportFailed(fmt.Sprintf("the column data in parquet is not equal to field: %s, but: %s", field.Name, chunk.DataType().Name()))
}
nullBitset := bytesToBoolArray(dataNums, reader.NullBitmapBytes())
for i := 0; i < dataNums; i++ {
value[offset] = reader.Value(i)
validData[offset] = nullBitset[i]
offset++
}
}
}
return int64(offset), nil
}
// todo(smellthemoon): use byte to store valid_data
func bytesToBoolArray(length int, bytes []byte) []bool {
bools := make([]bool, 0, length)
for i := 0; i < length; i++ {
bit := (bytes[uint(i)/8] & BitMask[byte(i)%8]) != 0
bools = append(bools, bit)
}
return bools
}
var (
BitMask = [8]byte{1, 2, 4, 8, 16, 32, 64, 128}
FlippedBitMask = [8]byte{254, 253, 251, 247, 239, 223, 191, 127}
)

View File

@ -31,7 +31,7 @@ func (s *ReadDataFromAllRowGroupsSuite) SetupSuite() {
s.size = 1 << 10
data := make([]int8, s.size)
err = ew.AddInt8ToPayload(data)
err = ew.AddInt8ToPayload(data, nil)
s.Require().NoError(err)
ew.SetEventTimestamp(1, 1)

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -46,18 +47,29 @@ type NativePayloadWriter struct {
flushedRows int
output *bytes.Buffer
releaseOnce sync.Once
dim int
nullable bool
}
func NewPayloadWriter(colType schemapb.DataType, dim ...int) (PayloadWriterInterface, error) {
func NewPayloadWriter(colType schemapb.DataType, nullable bool, dim ...int) (PayloadWriterInterface, error) {
var arrowType arrow.DataType
var dimension int
// writer for sparse float vector doesn't require dim
if typeutil.IsVectorType(colType) && !typeutil.IsSparseFloatVectorType(colType) {
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers")
}
if nullable {
return nil, merr.WrapErrParameterInvalidMsg("vector type not supprot nullable")
}
arrowType = milvusDataTypeToArrowType(colType, dim[0])
dimension = dim[0]
} else {
if len(dim) != 0 {
return nil, merr.WrapErrParameterInvalidMsg("incorrect input numbers")
}
arrowType = milvusDataTypeToArrowType(colType, 1)
dimension = 1
}
builder := array.NewBuilder(memory.DefaultAllocator, arrowType)
@ -69,117 +81,148 @@ func NewPayloadWriter(colType schemapb.DataType, dim ...int) (PayloadWriterInter
finished: false,
flushedRows: 0,
output: new(bytes.Buffer),
dim: dimension,
nullable: nullable,
}, nil
}
func (w *NativePayloadWriter) AddDataToPayload(data interface{}, dim ...int) error {
switch len(dim) {
case 0:
switch w.dataType {
case schemapb.DataType_Bool:
val, ok := data.([]bool)
if !ok {
return errors.New("incorrect data type")
}
return w.AddBoolToPayload(val)
case schemapb.DataType_Int8:
val, ok := data.([]int8)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt8ToPayload(val)
case schemapb.DataType_Int16:
val, ok := data.([]int16)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt16ToPayload(val)
case schemapb.DataType_Int32:
val, ok := data.([]int32)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt32ToPayload(val)
case schemapb.DataType_Int64:
val, ok := data.([]int64)
if !ok {
return errors.New("incorrect data type")
}
return w.AddInt64ToPayload(val)
case schemapb.DataType_Float:
val, ok := data.([]float32)
if !ok {
return errors.New("incorrect data type")
}
return w.AddFloatToPayload(val)
case schemapb.DataType_Double:
val, ok := data.([]float64)
if !ok {
return errors.New("incorrect data type")
}
return w.AddDoubleToPayload(val)
case schemapb.DataType_String, schemapb.DataType_VarChar:
val, ok := data.(string)
if !ok {
return errors.New("incorrect data type")
}
return w.AddOneStringToPayload(val)
case schemapb.DataType_Array:
val, ok := data.(*schemapb.ScalarField)
if !ok {
return errors.New("incorrect data type")
}
return w.AddOneArrayToPayload(val)
case schemapb.DataType_JSON:
val, ok := data.([]byte)
if !ok {
return errors.New("incorrect data type")
}
return w.AddOneJSONToPayload(val)
default:
return errors.New("incorrect datatype")
func (w *NativePayloadWriter) AddDataToPayload(data interface{}, validData []bool) error {
switch w.dataType {
case schemapb.DataType_Bool:
val, ok := data.([]bool)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
case 1:
switch w.dataType {
case schemapb.DataType_BinaryVector:
val, ok := data.([]byte)
if !ok {
return errors.New("incorrect data type")
}
return w.AddBinaryVectorToPayload(val, dim[0])
case schemapb.DataType_FloatVector:
val, ok := data.([]float32)
if !ok {
return errors.New("incorrect data type")
}
return w.AddFloatVectorToPayload(val, dim[0])
case schemapb.DataType_Float16Vector:
val, ok := data.([]byte)
if !ok {
return errors.New("incorrect data type")
}
return w.AddFloat16VectorToPayload(val, dim[0])
case schemapb.DataType_BFloat16Vector:
val, ok := data.([]byte)
if !ok {
return errors.New("incorrect data type")
}
return w.AddBFloat16VectorToPayload(val, dim[0])
case schemapb.DataType_SparseFloatVector:
val, ok := data.(*SparseFloatVectorFieldData)
if !ok {
return errors.New("incorrect data type")
}
return w.AddSparseFloatVectorToPayload(val)
default:
return errors.New("incorrect datatype")
return w.AddBoolToPayload(val, validData)
case schemapb.DataType_Int8:
val, ok := data.([]int8)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddInt8ToPayload(val, validData)
case schemapb.DataType_Int16:
val, ok := data.([]int16)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddInt16ToPayload(val, validData)
case schemapb.DataType_Int32:
val, ok := data.([]int32)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddInt32ToPayload(val, validData)
case schemapb.DataType_Int64:
val, ok := data.([]int64)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddInt64ToPayload(val, validData)
case schemapb.DataType_Float:
val, ok := data.([]float32)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddFloatToPayload(val, validData)
case schemapb.DataType_Double:
val, ok := data.([]float64)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddDoubleToPayload(val, validData)
case schemapb.DataType_String, schemapb.DataType_VarChar:
val, ok := data.(string)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
isValid := true
if len(validData) > 1 {
return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload")
}
if len(validData) == 0 && w.nullable {
return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true")
}
if len(validData) == 1 {
if !w.nullable {
return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false")
}
isValid = validData[0]
}
return w.AddOneStringToPayload(val, isValid)
case schemapb.DataType_Array:
val, ok := data.(*schemapb.ScalarField)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
isValid := true
if len(validData) > 1 {
return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload")
}
if len(validData) == 0 && w.nullable {
return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true")
}
if len(validData) == 1 {
if !w.nullable {
return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false")
}
isValid = validData[0]
}
return w.AddOneArrayToPayload(val, isValid)
case schemapb.DataType_JSON:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
isValid := true
if len(validData) > 1 {
return merr.WrapErrParameterInvalidMsg("wrong input length when add data to payload")
}
if len(validData) == 0 && w.nullable {
return merr.WrapErrParameterInvalidMsg("need pass valid_data when nullable==true")
}
if len(validData) == 1 {
if !w.nullable {
return merr.WrapErrParameterInvalidMsg("no need pass valid_data when nullable==false")
}
isValid = validData[0]
}
return w.AddOneJSONToPayload(val, isValid)
case schemapb.DataType_BinaryVector:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddBinaryVectorToPayload(val, w.dim)
case schemapb.DataType_FloatVector:
val, ok := data.([]float32)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddFloatVectorToPayload(val, w.dim)
case schemapb.DataType_Float16Vector:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddFloat16VectorToPayload(val, w.dim)
case schemapb.DataType_BFloat16Vector:
val, ok := data.([]byte)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddBFloat16VectorToPayload(val, w.dim)
case schemapb.DataType_SparseFloatVector:
val, ok := data.(*SparseFloatVectorFieldData)
if !ok {
return merr.WrapErrParameterInvalidMsg("incorrect data type")
}
return w.AddSparseFloatVectorToPayload(val)
default:
return errors.New("incorrect input numbers")
return errors.New("unsupported datatype")
}
}
func (w *NativePayloadWriter) AddBoolToPayload(data []bool) error {
func (w *NativePayloadWriter) AddBoolToPayload(data []bool, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished bool payload")
}
@ -188,16 +231,26 @@ func (w *NativePayloadWriter) AddBoolToPayload(data []bool) error {
return errors.New("can't add empty msgs into bool payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.BooleanBuilder)
if !ok {
return errors.New("failed to cast ArrayBuilder")
}
builder.AppendValues(data, nil)
builder.AppendValues(data, validData)
return nil
}
func (w *NativePayloadWriter) AddByteToPayload(data []byte) error {
func (w *NativePayloadWriter) AddByteToPayload(data []byte, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished byte payload")
}
@ -206,6 +259,16 @@ func (w *NativePayloadWriter) AddByteToPayload(data []byte) error {
return errors.New("can't add empty msgs into byte payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int8Builder)
if !ok {
return errors.New("failed to cast ByteBuilder")
@ -214,12 +277,15 @@ func (w *NativePayloadWriter) AddByteToPayload(data []byte) error {
builder.Reserve(len(data))
for i := range data {
builder.Append(int8(data[i]))
if w.nullable && !validData[i] {
builder.AppendNull()
}
}
return nil
}
func (w *NativePayloadWriter) AddInt8ToPayload(data []int8) error {
func (w *NativePayloadWriter) AddInt8ToPayload(data []int8, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished int8 payload")
}
@ -228,16 +294,26 @@ func (w *NativePayloadWriter) AddInt8ToPayload(data []int8) error {
return errors.New("can't add empty msgs into int8 payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int8Builder)
if !ok {
return errors.New("failed to cast Int8Builder")
}
builder.AppendValues(data, nil)
builder.AppendValues(data, validData)
return nil
}
func (w *NativePayloadWriter) AddInt16ToPayload(data []int16) error {
func (w *NativePayloadWriter) AddInt16ToPayload(data []int16, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished int16 payload")
}
@ -246,16 +322,26 @@ func (w *NativePayloadWriter) AddInt16ToPayload(data []int16) error {
return errors.New("can't add empty msgs into int16 payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int16Builder)
if !ok {
return errors.New("failed to cast Int16Builder")
}
builder.AppendValues(data, nil)
builder.AppendValues(data, validData)
return nil
}
func (w *NativePayloadWriter) AddInt32ToPayload(data []int32) error {
func (w *NativePayloadWriter) AddInt32ToPayload(data []int32, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished int32 payload")
}
@ -264,16 +350,26 @@ func (w *NativePayloadWriter) AddInt32ToPayload(data []int32) error {
return errors.New("can't add empty msgs into int32 payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int32Builder)
if !ok {
return errors.New("failed to cast Int32Builder")
}
builder.AppendValues(data, nil)
builder.AppendValues(data, validData)
return nil
}
func (w *NativePayloadWriter) AddInt64ToPayload(data []int64) error {
func (w *NativePayloadWriter) AddInt64ToPayload(data []int64, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished int64 payload")
}
@ -282,16 +378,26 @@ func (w *NativePayloadWriter) AddInt64ToPayload(data []int64) error {
return errors.New("can't add empty msgs into int64 payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Int64Builder)
if !ok {
return errors.New("failed to cast Int64Builder")
}
builder.AppendValues(data, nil)
builder.AppendValues(data, validData)
return nil
}
func (w *NativePayloadWriter) AddFloatToPayload(data []float32) error {
func (w *NativePayloadWriter) AddFloatToPayload(data []float32, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished float payload")
}
@ -300,16 +406,26 @@ func (w *NativePayloadWriter) AddFloatToPayload(data []float32) error {
return errors.New("can't add empty msgs into float payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Float32Builder)
if !ok {
return errors.New("failed to cast FloatBuilder")
}
builder.AppendValues(data, nil)
builder.AppendValues(data, validData)
return nil
}
func (w *NativePayloadWriter) AddDoubleToPayload(data []float64) error {
func (w *NativePayloadWriter) AddDoubleToPayload(data []float64, validData []bool) error {
if w.finished {
return errors.New("can't append data to finished double payload")
}
@ -318,35 +434,57 @@ func (w *NativePayloadWriter) AddDoubleToPayload(data []float64) error {
return errors.New("can't add empty msgs into double payload")
}
if !w.nullable && len(validData) != 0 {
msg := fmt.Sprintf("length of validData(%d) must be 0 when not nullable", len(validData))
return merr.WrapErrParameterInvalidMsg(msg)
}
if w.nullable && len(data) != len(validData) {
msg := fmt.Sprintf("length of validData(%d) must equal to data(%d) when nullable", len(validData), len(data))
return merr.WrapErrParameterInvalidMsg(msg)
}
builder, ok := w.builder.(*array.Float64Builder)
if !ok {
return errors.New("failed to cast DoubleBuilder")
}
builder.AppendValues(data, nil)
builder.AppendValues(data, validData)
return nil
}
func (w *NativePayloadWriter) AddOneStringToPayload(data string) error {
func (w *NativePayloadWriter) AddOneStringToPayload(data string, isValid bool) error {
if w.finished {
return errors.New("can't append data to finished string payload")
}
if !w.nullable && !isValid {
return merr.WrapErrParameterInvalidMsg("not support null when nullable is false")
}
builder, ok := w.builder.(*array.StringBuilder)
if !ok {
return errors.New("failed to cast StringBuilder")
}
builder.Append(data)
if !isValid {
builder.AppendNull()
} else {
builder.Append(data)
}
return nil
}
func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField) error {
func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField, isValid bool) error {
if w.finished {
return errors.New("can't append data to finished array payload")
}
if !w.nullable && !isValid {
return merr.WrapErrParameterInvalidMsg("not support null when nullable is false")
}
bytes, err := proto.Marshal(data)
if err != nil {
return errors.New("Marshal ListValue failed")
@ -357,22 +495,34 @@ func (w *NativePayloadWriter) AddOneArrayToPayload(data *schemapb.ScalarField) e
return errors.New("failed to cast BinaryBuilder")
}
builder.Append(bytes)
if !isValid {
builder.AppendNull()
} else {
builder.Append(bytes)
}
return nil
}
func (w *NativePayloadWriter) AddOneJSONToPayload(data []byte) error {
func (w *NativePayloadWriter) AddOneJSONToPayload(data []byte, isValid bool) error {
if w.finished {
return errors.New("can't append data to finished json payload")
}
if !w.nullable && !isValid {
return merr.WrapErrParameterInvalidMsg("not support null when nullable is false")
}
builder, ok := w.builder.(*array.BinaryBuilder)
if !ok {
return errors.New("failed to cast JsonBuilder")
}
builder.Append(data)
if !isValid {
builder.AppendNull()
} else {
builder.Append(data)
}
return nil
}
@ -507,8 +657,9 @@ func (w *NativePayloadWriter) FinishPayloadWriter() error {
w.finished = true
field := arrow.Field{
Name: "val",
Type: w.arrowType,
Name: "val",
Type: w.arrowType,
Nullable: w.nullable,
}
schema := arrow.NewSchema([]arrow.Field{
field,

View File

@ -9,241 +9,248 @@ import (
)
func TestPayloadWriter_Failed(t *testing.T) {
t.Run("wrong input", func(t *testing.T) {
_, err := NewPayloadWriter(schemapb.DataType_FloatVector, false)
require.Error(t, err)
_, err = NewPayloadWriter(schemapb.DataType_Bool, false, 1)
require.Error(t, err)
})
t.Run("Test Bool", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool)
w, err := NewPayloadWriter(schemapb.DataType_Bool, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddBoolToPayload([]bool{})
err = w.AddBoolToPayload([]bool{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddBoolToPayload([]bool{false})
err = w.AddBoolToPayload([]bool{false}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float)
w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddBoolToPayload([]bool{false})
err = w.AddBoolToPayload([]bool{false}, nil)
require.Error(t, err)
})
t.Run("Test Byte", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8)
w, err := NewPayloadWriter(schemapb.DataType_Int8, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddByteToPayload([]byte{})
err = w.AddByteToPayload([]byte{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddByteToPayload([]byte{0})
err = w.AddByteToPayload([]byte{0}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float)
w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddByteToPayload([]byte{0})
err = w.AddByteToPayload([]byte{0}, nil)
require.Error(t, err)
})
t.Run("Test Int8", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int8)
w, err := NewPayloadWriter(schemapb.DataType_Int8, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt8ToPayload([]int8{})
err = w.AddInt8ToPayload([]int8{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddInt8ToPayload([]int8{0})
err = w.AddInt8ToPayload([]int8{0}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float)
w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt8ToPayload([]int8{0})
err = w.AddInt8ToPayload([]int8{0}, nil)
require.Error(t, err)
})
t.Run("Test Int16", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int16)
w, err := NewPayloadWriter(schemapb.DataType_Int16, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt16ToPayload([]int16{})
err = w.AddInt16ToPayload([]int16{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddInt16ToPayload([]int16{0})
err = w.AddInt16ToPayload([]int16{0}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float)
w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt16ToPayload([]int16{0})
err = w.AddInt16ToPayload([]int16{0}, nil)
require.Error(t, err)
})
t.Run("Test Int32", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int32)
w, err := NewPayloadWriter(schemapb.DataType_Int32, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt32ToPayload([]int32{})
err = w.AddInt32ToPayload([]int32{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddInt32ToPayload([]int32{0})
err = w.AddInt32ToPayload([]int32{0}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float)
w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt32ToPayload([]int32{0})
err = w.AddInt32ToPayload([]int32{0}, nil)
require.Error(t, err)
})
t.Run("Test Int64", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Int64)
w, err := NewPayloadWriter(schemapb.DataType_Int64, Params.CommonCfg.MaxBloomFalsePositive.PanicIfEmpty)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt64ToPayload([]int64{})
err = w.AddInt64ToPayload([]int64{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddInt64ToPayload([]int64{0})
err = w.AddInt64ToPayload([]int64{0}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Float)
w, err = NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddInt64ToPayload([]int64{0})
err = w.AddInt64ToPayload([]int64{0}, nil)
require.Error(t, err)
})
t.Run("Test Float", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Float)
w, err := NewPayloadWriter(schemapb.DataType_Float, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddFloatToPayload([]float32{})
err = w.AddFloatToPayload([]float32{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddFloatToPayload([]float32{0})
err = w.AddFloatToPayload([]float32{0}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddFloatToPayload([]float32{0})
err = w.AddFloatToPayload([]float32{0}, nil)
require.Error(t, err)
})
t.Run("Test Double", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Double)
w, err := NewPayloadWriter(schemapb.DataType_Double, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddDoubleToPayload([]float64{})
err = w.AddDoubleToPayload([]float64{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddDoubleToPayload([]float64{0})
err = w.AddDoubleToPayload([]float64{0}, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddDoubleToPayload([]float64{0})
err = w.AddDoubleToPayload([]float64{0}, nil)
require.Error(t, err)
})
t.Run("Test String", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_String)
w, err := NewPayloadWriter(schemapb.DataType_String, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddOneStringToPayload("test")
err = w.AddOneStringToPayload("test", false)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddOneStringToPayload("test")
err = w.AddOneStringToPayload("test", false)
require.Error(t, err)
})
t.Run("Test Array", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Array)
w, err := NewPayloadWriter(schemapb.DataType_Array, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddOneArrayToPayload(&schemapb.ScalarField{})
err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddOneArrayToPayload(&schemapb.ScalarField{})
err = w.AddOneArrayToPayload(&schemapb.ScalarField{}, false)
require.Error(t, err)
})
t.Run("Test Json", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_JSON)
w, err := NewPayloadWriter(schemapb.DataType_JSON, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddOneJSONToPayload([]byte{0, 1})
err = w.AddOneJSONToPayload([]byte{0, 1}, false)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddOneJSONToPayload([]byte{0, 1})
err = w.AddOneJSONToPayload([]byte{0, 1}, false)
require.Error(t, err)
})
t.Run("Test BinaryVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, 8)
w, err := NewPayloadWriter(schemapb.DataType_BinaryVector, false, 8)
require.Nil(t, err)
require.NotNil(t, w)
@ -258,7 +265,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
err = w.AddBinaryVectorToPayload(data, 8)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err)
require.NotNil(t, w)
@ -267,7 +274,7 @@ func TestPayloadWriter_Failed(t *testing.T) {
})
t.Run("Test FloatVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, 8)
w, err := NewPayloadWriter(schemapb.DataType_FloatVector, false, 8)
require.Nil(t, err)
require.NotNil(t, w)
@ -276,20 +283,20 @@ func TestPayloadWriter_Failed(t *testing.T) {
data[i] = 1
}
err = w.AddFloatToPayload([]float32{})
err = w.AddFloatToPayload([]float32{}, nil)
require.Error(t, err)
err = w.FinishPayloadWriter()
require.NoError(t, err)
err = w.AddFloatToPayload(data)
err = w.AddFloatToPayload(data, nil)
require.Error(t, err)
w, err = NewPayloadWriter(schemapb.DataType_Int64)
w, err = NewPayloadWriter(schemapb.DataType_Int64, false)
require.Nil(t, err)
require.NotNil(t, w)
err = w.AddFloatToPayload(data)
err = w.AddFloatToPayload(data, nil)
require.Error(t, err)
})
}

View File

@ -224,7 +224,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Println("\tpayload values:")
switch colType {
case schemapb.DataType_Bool:
val, err := reader.GetBoolFromPayload()
val, _, err := reader.GetBoolFromPayload()
if err != nil {
return err
}
@ -232,7 +232,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %v\n", i, v)
}
case schemapb.DataType_Int8:
val, err := reader.GetInt8FromPayload()
val, _, err := reader.GetInt8FromPayload()
if err != nil {
return err
}
@ -240,7 +240,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v)
}
case schemapb.DataType_Int16:
val, err := reader.GetInt16FromPayload()
val, _, err := reader.GetInt16FromPayload()
if err != nil {
return err
}
@ -248,7 +248,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v)
}
case schemapb.DataType_Int32:
val, err := reader.GetInt32FromPayload()
val, _, err := reader.GetInt32FromPayload()
if err != nil {
return err
}
@ -256,7 +256,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v)
}
case schemapb.DataType_Int64:
val, err := reader.GetInt64FromPayload()
val, _, err := reader.GetInt64FromPayload()
if err != nil {
return err
}
@ -264,7 +264,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %d\n", i, v)
}
case schemapb.DataType_Float:
val, err := reader.GetFloatFromPayload()
val, _, err := reader.GetFloatFromPayload()
if err != nil {
return err
}
@ -272,7 +272,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
fmt.Printf("\t\t%d : %f\n", i, v)
}
case schemapb.DataType_Double:
val, err := reader.GetDoubleFromPayload()
val, _, err := reader.GetDoubleFromPayload()
if err != nil {
return err
}
@ -285,7 +285,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
return err
}
val, err := reader.GetStringFromPayload()
val, _, err := reader.GetStringFromPayload()
if err != nil {
return err
}
@ -358,13 +358,16 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
if err != nil {
return err
}
val, err := reader.GetJSONFromPayload()
val, valids, err := reader.GetJSONFromPayload()
if err != nil {
return err
}
for i := 0; i < rows; i++ {
fmt.Printf("\t\t%d : %s\n", i, val[i])
}
for i, v := range valids {
fmt.Printf("\t\t%d : %v\n", i, v)
}
case schemapb.DataType_SparseFloatVector:
sparseData, _, err := reader.GetSparseFloatVectorFromPayload()
if err != nil {
@ -388,7 +391,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
fmt.Println("\tpayload values:")
switch colType {
case schemapb.DataType_Int64:
val, err := reader.GetInt64FromPayload()
val, _, err := reader.GetInt64FromPayload()
if err != nil {
return err
}
@ -402,7 +405,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
return err
}
val, err := reader.GetStringFromPayload()
val, _, err := reader.GetStringFromPayload()
if err != nil {
return err
}
@ -448,7 +451,7 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, dataType schemapb.DataType) error {
if dataType == schemapb.DataType_Int8 {
if key == IndexParamsKey {
content, err := reader.GetByteFromPayload()
content, _, err := reader.GetByteFromPayload()
if err != nil {
return err
}
@ -459,7 +462,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data
}
if key == "SLICE_META" {
content, err := reader.GetByteFromPayload()
content, _, err := reader.GetByteFromPayload()
if err != nil {
return err
}
@ -473,7 +476,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data
}
} else {
if key == IndexParamsKey {
content, err := reader.GetStringFromPayload()
content, _, err := reader.GetStringFromPayload()
if err != nil {
return err
}
@ -484,7 +487,7 @@ func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, data
}
if key == "SLICE_META" {
content, err := reader.GetStringFromPayload()
content, _, err := reader.GetStringFromPayload()
if err != nil {
return err
}

View File

@ -36,27 +36,27 @@ import (
)
func TestPrintBinlogFilesInt64(t *testing.T) {
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40)
w := NewInsertBinlogWriter(schemapb.DataType_Int64, 10, 20, 30, 40, false)
curTS := time.Now().UnixNano() / int64(time.Millisecond)
e1, err := w.NextInsertEventWriter()
e1, err := w.NextInsertEventWriter(false)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int64{1, 2, 3})
err = e1.AddDataToPayload([]int64{1, 2, 3}, nil)
assert.NoError(t, err)
err = e1.AddDataToPayload([]int32{4, 5, 6})
err = e1.AddDataToPayload([]int32{4, 5, 6}, nil)
assert.Error(t, err)
err = e1.AddDataToPayload([]int64{4, 5, 6})
err = e1.AddDataToPayload([]int64{4, 5, 6}, nil)
assert.NoError(t, err)
e1.SetEventTimestamp(tsoutil.ComposeTS(curTS+10*60*1000, 0), tsoutil.ComposeTS(curTS+20*60*1000, 0))
e2, err := w.NextInsertEventWriter()
e2, err := w.NextInsertEventWriter(false)
assert.NoError(t, err)
err = e2.AddDataToPayload([]int64{7, 8, 9})
err = e2.AddDataToPayload([]int64{7, 8, 9}, nil)
assert.NoError(t, err)
err = e2.AddDataToPayload([]bool{true, false, true})
err = e2.AddDataToPayload([]bool{true, false, true}, nil)
assert.Error(t, err)
err = e2.AddDataToPayload([]int64{10, 11, 12})
err = e2.AddDataToPayload([]int64{10, 11, 12}, nil)
assert.NoError(t, err)
e2.SetEventTimestamp(tsoutil.ComposeTS(curTS+30*60*1000, 0), tsoutil.ComposeTS(curTS+40*60*1000, 0))

View File

@ -59,3 +59,9 @@ func UnsafeReadFloat64(buf []byte, idx int) float64 {
ptr := unsafe.Pointer(&(buf[idx]))
return *((*float64)(ptr))
}
/* #nosec G103 */
func UnsafeReadBool(buf []byte, idx int) bool {
ptr := unsafe.Pointer(&(buf[idx]))
return *((*bool)(ptr))
}

View File

@ -567,30 +567,38 @@ func ColumnBasedInsertMsgToInsertData(msg *msgstream.InsertMsg, collSchema *sche
case schemapb.DataType_Bool:
srcData := srcField.GetScalars().GetBoolData().GetData()
validData := srcField.GetValidData()
fieldData = &BoolFieldData{
Data: lo.Map(srcData, func(v bool, _ int) bool { return v }),
Data: lo.Map(srcData, func(v bool, _ int) bool { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_Int8:
srcData := srcField.GetScalars().GetIntData().GetData()
validData := srcField.GetValidData()
fieldData = &Int8FieldData{
Data: lo.Map(srcData, func(v int32, _ int) int8 { return int8(v) }),
Data: lo.Map(srcData, func(v int32, _ int) int8 { return int8(v) }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_Int16:
srcData := srcField.GetScalars().GetIntData().GetData()
validData := srcField.GetValidData()
fieldData = &Int16FieldData{
Data: lo.Map(srcData, func(v int32, _ int) int16 { return int16(v) }),
Data: lo.Map(srcData, func(v int32, _ int) int16 { return int16(v) }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_Int32:
srcData := srcField.GetScalars().GetIntData().GetData()
validData := srcField.GetValidData()
fieldData = &Int32FieldData{
Data: lo.Map(srcData, func(v int32, _ int) int32 { return v }),
Data: lo.Map(srcData, func(v int32, _ int) int32 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_Int64:
@ -605,45 +613,57 @@ func ColumnBasedInsertMsgToInsertData(msg *msgstream.InsertMsg, collSchema *sche
}
default:
srcData := srcField.GetScalars().GetLongData().GetData()
validData := srcField.GetValidData()
fieldData = &Int64FieldData{
Data: lo.Map(srcData, func(v int64, _ int) int64 { return v }),
Data: lo.Map(srcData, func(v int64, _ int) int64 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
}
case schemapb.DataType_Float:
srcData := srcField.GetScalars().GetFloatData().GetData()
validData := srcField.GetValidData()
fieldData = &FloatFieldData{
Data: lo.Map(srcData, func(v float32, _ int) float32 { return v }),
Data: lo.Map(srcData, func(v float32, _ int) float32 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_Double:
srcData := srcField.GetScalars().GetDoubleData().GetData()
validData := srcField.GetValidData()
fieldData = &DoubleFieldData{
Data: lo.Map(srcData, func(v float64, _ int) float64 { return v }),
Data: lo.Map(srcData, func(v float64, _ int) float64 { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_String, schemapb.DataType_VarChar:
srcData := srcField.GetScalars().GetStringData().GetData()
validData := srcField.GetValidData()
fieldData = &StringFieldData{
Data: lo.Map(srcData, func(v string, _ int) string { return v }),
Data: lo.Map(srcData, func(v string, _ int) string { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_Array:
srcData := srcField.GetScalars().GetArrayData().GetData()
validData := srcField.GetValidData()
fieldData = &ArrayFieldData{
ElementType: field.GetElementType(),
Data: lo.Map(srcData, func(v *schemapb.ScalarField, _ int) *schemapb.ScalarField { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
case schemapb.DataType_JSON:
srcData := srcField.GetScalars().GetJsonData().GetData()
validData := srcField.GetValidData()
fieldData = &JSONFieldData{
Data: lo.Map(srcData, func(v []byte, _ int) []byte { return v }),
Data: lo.Map(srcData, func(v []byte, _ int) []byte { return v }),
ValidData: lo.Map(validData, func(v bool, _ int) bool { return v }),
}
default:
@ -676,89 +696,105 @@ func InsertMsgToInsertData(msg *msgstream.InsertMsg, schema *schemapb.Collection
func mergeBoolField(data *InsertData, fid FieldID, field *BoolFieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &BoolFieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*BoolFieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeInt8Field(data *InsertData, fid FieldID, field *Int8FieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &Int8FieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*Int8FieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeInt16Field(data *InsertData, fid FieldID, field *Int16FieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &Int16FieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*Int16FieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeInt32Field(data *InsertData, fid FieldID, field *Int32FieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &Int32FieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*Int32FieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeInt64Field(data *InsertData, fid FieldID, field *Int64FieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &Int64FieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*Int64FieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeFloatField(data *InsertData, fid FieldID, field *FloatFieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &FloatFieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*FloatFieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeDoubleField(data *InsertData, fid FieldID, field *DoubleFieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &DoubleFieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*DoubleFieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeStringField(data *InsertData, fid FieldID, field *StringFieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &StringFieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*StringFieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) {
@ -766,22 +802,26 @@ func mergeArrayField(data *InsertData, fid FieldID, field *ArrayFieldData) {
fieldData := &ArrayFieldData{
ElementType: field.ElementType,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*ArrayFieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeJSONField(data *InsertData, fid FieldID, field *JSONFieldData) {
if _, ok := data.Data[fid]; !ok {
fieldData := &JSONFieldData{
Data: nil,
Data: nil,
ValidData: nil,
}
data.Data[fid] = fieldData
}
fieldData := data.Data[fid].(*JSONFieldData)
fieldData.Data = append(fieldData.Data, field.Data...)
fieldData.ValidData = append(fieldData.ValidData, field.ValidData...)
}
func mergeBinaryVectorField(data *InsertData, fid FieldID, field *BinaryVectorFieldData) {

View File

@ -434,6 +434,121 @@ func genAllFieldsSchema(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse
return schema, pkFieldID, fieldIDs
}
func genAllFieldsSchemaNullable(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse bool) (schema *schemapb.CollectionSchema, pkFieldID UniqueID, fieldIDs []UniqueID) {
schema = &schemapb.CollectionSchema{
Name: "all_fields_schema_nullable",
Description: "all_fields_schema_nullable",
AutoID: false,
Fields: []*schemapb.FieldSchema{
{
DataType: schemapb.DataType_Int64,
IsPrimaryKey: true,
},
{
DataType: schemapb.DataType_Bool,
Nullable: true,
},
{
DataType: schemapb.DataType_Int8,
Nullable: true,
},
{
DataType: schemapb.DataType_Int16,
Nullable: true,
},
{
DataType: schemapb.DataType_Int32,
Nullable: true,
},
{
DataType: schemapb.DataType_Int64,
Nullable: true,
},
{
DataType: schemapb.DataType_Float,
Nullable: true,
},
{
DataType: schemapb.DataType_Double,
Nullable: true,
},
{
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(fVecDim),
},
},
},
{
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(bVecDim),
},
},
},
{
DataType: schemapb.DataType_Float16Vector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(f16VecDim),
},
},
},
{
DataType: schemapb.DataType_BFloat16Vector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(bf16VecDim),
},
},
},
{
DataType: schemapb.DataType_Array,
Nullable: true,
},
{
DataType: schemapb.DataType_JSON,
Nullable: true,
},
},
}
if withSparse {
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
DataType: schemapb.DataType_SparseFloatVector,
})
}
fieldIDs = make([]UniqueID, 0)
for idx := range schema.Fields {
fID := int64(common.StartOfUserFieldID + idx)
schema.Fields[idx].FieldID = fID
if schema.Fields[idx].IsPrimaryKey {
pkFieldID = fID
}
fieldIDs = append(fieldIDs, fID)
}
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
FieldID: common.RowIDField,
Name: common.RowIDFieldName,
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_Int64,
})
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
FieldID: common.TimeStampField,
Name: common.TimeStampFieldName,
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_Int64,
})
return schema, pkFieldID, fieldIDs
}
func generateInt32ArrayList(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
@ -616,6 +731,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], d)
@ -636,6 +754,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], int8(d))
@ -656,6 +777,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], int16(d))
@ -676,6 +800,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], d)
@ -696,6 +823,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], d)
@ -717,6 +847,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], d)
@ -737,6 +870,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], d)
@ -856,6 +992,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], d)
@ -877,6 +1016,9 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
},
FieldId: field.FieldID,
}
if field.GetNullable() {
f.ValidData = testutils.GenerateBoolArray(numRows)
}
msg.FieldsData = append(msg.FieldsData, f)
for _, d := range data {
columns[idx] = append(columns[idx], d)
@ -1019,6 +1161,24 @@ func TestColumnBasedInsertMsgToInsertData(t *testing.T) {
}
}
func TestColumnBasedInsertMsgToInsertDataNullable(t *testing.T) {
numRows, fVecDim, bVecDim, f16VecDim, bf16VecDim := 2, 2, 8, 2, 2
schema, _, fieldIDs := genAllFieldsSchemaNullable(fVecDim, bVecDim, f16VecDim, bf16VecDim, true)
msg, _, columns := genColumnBasedInsertMsg(schema, numRows, fVecDim, bVecDim, f16VecDim, bf16VecDim)
idata, err := ColumnBasedInsertMsgToInsertData(msg, schema)
assert.NoError(t, err)
for idx, fID := range fieldIDs {
column := columns[idx]
fData, ok := idata.Data[fID]
assert.True(t, ok)
assert.Equal(t, len(column), fData.RowNum())
for j := range column {
assert.Equal(t, fData.GetRow(j), column[j])
}
}
}
func TestColumnBasedInsertMsgToInsertFloat16VectorDataError(t *testing.T) {
msg := &msgstream.InsertMsg{
BaseMsg: msgstream.BaseMsg{
@ -1145,233 +1305,391 @@ func TestInsertMsgToInsertData2(t *testing.T) {
}
func TestMergeInsertData(t *testing.T) {
d1 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{1},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{1},
},
BoolField: &BoolFieldData{
Data: []bool{true},
},
Int8Field: &Int8FieldData{
Data: []int8{1},
},
Int16Field: &Int16FieldData{
Data: []int16{1},
},
Int32Field: &Int32FieldData{
Data: []int32{1},
},
Int64Field: &Int64FieldData{
Data: []int64{1},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"1"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{0, 1},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{0, 1},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
t.Run("empty data in buffer", func(t *testing.T) {
d1 := &InsertData{
Data: make(map[FieldID]FieldData),
Infos: []BlobInfo{},
}
d2 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{2},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{2},
},
BoolField: &BoolFieldData{
Data: []bool{false},
},
Int8Field: &Int8FieldData{
Data: []int8{2},
},
Int16Field: &Int16FieldData{
Data: []int16{2},
},
Int32Field: &Int32FieldData{
Data: []int32{2},
},
Int64Field: &Int64FieldData{
Data: []int64{2},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"2"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{1, 2, 3},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{4, 5, 6},
},
},
},
},
},
},
JSONField: &JSONFieldData{
Data: [][]byte{[]byte(`{"key":"value"}`)},
},
},
Infos: nil,
}
d2 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{2},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{2},
},
BoolField: &BoolFieldData{
Data: []bool{false},
},
Int8Field: &Int8FieldData{
Data: []int8{2},
},
Int16Field: &Int16FieldData{
Data: []int16{2},
},
Int32Field: &Int32FieldData{
Data: []int32{2},
},
Int64Field: &Int64FieldData{
Data: []int64{2},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"2"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
JSONField: &JSONFieldData{
Data: [][]byte{[]byte(`{"hello":"world"}`)},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{4, 5, 6},
Infos: nil,
}
MergeInsertData(d1, d2)
f, ok := d1.Data[common.RowIDField]
assert.True(t, ok)
assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[common.TimeStampField]
assert.True(t, ok)
assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[BoolField]
assert.True(t, ok)
assert.Equal(t, []bool{false}, f.(*BoolFieldData).Data)
f, ok = d1.Data[Int8Field]
assert.True(t, ok)
assert.Equal(t, []int8{2}, f.(*Int8FieldData).Data)
f, ok = d1.Data[Int16Field]
assert.True(t, ok)
assert.Equal(t, []int16{2}, f.(*Int16FieldData).Data)
f, ok = d1.Data[Int32Field]
assert.True(t, ok)
assert.Equal(t, []int32{2}, f.(*Int32FieldData).Data)
f, ok = d1.Data[Int64Field]
assert.True(t, ok)
assert.Equal(t, []int64{2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[FloatField]
assert.True(t, ok)
assert.Equal(t, []float32{0}, f.(*FloatFieldData).Data)
f, ok = d1.Data[DoubleField]
assert.True(t, ok)
assert.Equal(t, []float64{0}, f.(*DoubleFieldData).Data)
f, ok = d1.Data[StringField]
assert.True(t, ok)
assert.Equal(t, []string{"2"}, f.(*StringFieldData).Data)
f, ok = d1.Data[BinaryVectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0}, f.(*BinaryVectorFieldData).Data)
f, ok = d1.Data[FloatVectorField]
assert.True(t, ok)
assert.Equal(t, []float32{0}, f.(*FloatVectorFieldData).Data)
f, ok = d1.Data[Float16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{2, 3}, f.(*Float16VectorFieldData).Data)
f, ok = d1.Data[BFloat16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{2, 3}, f.(*BFloat16VectorFieldData).Data)
f, ok = d1.Data[SparseFloatVectorField]
assert.True(t, ok)
assert.Equal(t, &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
}, f.(*SparseFloatVectorFieldData))
f, ok = d1.Data[ArrayField]
assert.True(t, ok)
assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[0].GetIntData().GetData())
f, ok = d1.Data[JSONField]
assert.True(t, ok)
assert.EqualValues(t, [][]byte{[]byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data)
})
t.Run("normal case", func(t *testing.T) {
d1 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{1},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{1},
},
BoolField: &BoolFieldData{
Data: []bool{true},
},
Int8Field: &Int8FieldData{
Data: []int8{1},
},
Int16Field: &Int16FieldData{
Data: []int16{1},
},
Int32Field: &Int32FieldData{
Data: []int32{1},
},
Int64Field: &Int64FieldData{
Data: []int64{1},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"1"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{0, 1},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{0, 1},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{1, 2, 3},
},
},
},
},
},
JSONField: &JSONFieldData{
Data: [][]byte{[]byte(`{"key":"value"}`)},
},
},
JSONField: &JSONFieldData{
Data: [][]byte{[]byte(`{"hello":"world"}`)},
Infos: nil,
}
d2 := &InsertData{
Data: map[int64]FieldData{
common.RowIDField: &Int64FieldData{
Data: []int64{2},
},
common.TimeStampField: &Int64FieldData{
Data: []int64{2},
},
BoolField: &BoolFieldData{
Data: []bool{false},
},
Int8Field: &Int8FieldData{
Data: []int8{2},
},
Int16Field: &Int16FieldData{
Data: []int16{2},
},
Int32Field: &Int32FieldData{
Data: []int32{2},
},
Int64Field: &Int64FieldData{
Data: []int64{2},
},
FloatField: &FloatFieldData{
Data: []float32{0},
},
DoubleField: &DoubleFieldData{
Data: []float64{0},
},
StringField: &StringFieldData{
Data: []string{"2"},
},
BinaryVectorField: &BinaryVectorFieldData{
Data: []byte{0},
Dim: 8,
},
FloatVectorField: &FloatVectorFieldData{
Data: []float32{0},
Dim: 1,
},
Float16VectorField: &Float16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
BFloat16VectorField: &BFloat16VectorFieldData{
Data: []byte{2, 3},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{4, 5, 6},
},
},
},
},
},
JSONField: &JSONFieldData{
Data: [][]byte{[]byte(`{"hello":"world"}`)},
},
},
},
Infos: nil,
}
Infos: nil,
}
MergeInsertData(d1, d2)
MergeInsertData(d1, d2)
f, ok := d1.Data[common.RowIDField]
assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok := d1.Data[common.RowIDField]
assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[common.TimeStampField]
assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[common.TimeStampField]
assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[BoolField]
assert.True(t, ok)
assert.Equal(t, []bool{true, false}, f.(*BoolFieldData).Data)
f, ok = d1.Data[BoolField]
assert.True(t, ok)
assert.Equal(t, []bool{true, false}, f.(*BoolFieldData).Data)
f, ok = d1.Data[Int8Field]
assert.True(t, ok)
assert.Equal(t, []int8{1, 2}, f.(*Int8FieldData).Data)
f, ok = d1.Data[Int8Field]
assert.True(t, ok)
assert.Equal(t, []int8{1, 2}, f.(*Int8FieldData).Data)
f, ok = d1.Data[Int16Field]
assert.True(t, ok)
assert.Equal(t, []int16{1, 2}, f.(*Int16FieldData).Data)
f, ok = d1.Data[Int16Field]
assert.True(t, ok)
assert.Equal(t, []int16{1, 2}, f.(*Int16FieldData).Data)
f, ok = d1.Data[Int32Field]
assert.True(t, ok)
assert.Equal(t, []int32{1, 2}, f.(*Int32FieldData).Data)
f, ok = d1.Data[Int32Field]
assert.True(t, ok)
assert.Equal(t, []int32{1, 2}, f.(*Int32FieldData).Data)
f, ok = d1.Data[Int64Field]
assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[Int64Field]
assert.True(t, ok)
assert.Equal(t, []int64{1, 2}, f.(*Int64FieldData).Data)
f, ok = d1.Data[FloatField]
assert.True(t, ok)
assert.Equal(t, []float32{0, 0}, f.(*FloatFieldData).Data)
f, ok = d1.Data[FloatField]
assert.True(t, ok)
assert.Equal(t, []float32{0, 0}, f.(*FloatFieldData).Data)
f, ok = d1.Data[DoubleField]
assert.True(t, ok)
assert.Equal(t, []float64{0, 0}, f.(*DoubleFieldData).Data)
f, ok = d1.Data[DoubleField]
assert.True(t, ok)
assert.Equal(t, []float64{0, 0}, f.(*DoubleFieldData).Data)
f, ok = d1.Data[StringField]
assert.True(t, ok)
assert.Equal(t, []string{"1", "2"}, f.(*StringFieldData).Data)
f, ok = d1.Data[StringField]
assert.True(t, ok)
assert.Equal(t, []string{"1", "2"}, f.(*StringFieldData).Data)
f, ok = d1.Data[BinaryVectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0, 0}, f.(*BinaryVectorFieldData).Data)
f, ok = d1.Data[BinaryVectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0, 0}, f.(*BinaryVectorFieldData).Data)
f, ok = d1.Data[FloatVectorField]
assert.True(t, ok)
assert.Equal(t, []float32{0, 0}, f.(*FloatVectorFieldData).Data)
f, ok = d1.Data[FloatVectorField]
assert.True(t, ok)
assert.Equal(t, []float32{0, 0}, f.(*FloatVectorFieldData).Data)
f, ok = d1.Data[Float16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0, 1, 2, 3}, f.(*Float16VectorFieldData).Data)
f, ok = d1.Data[Float16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0, 1, 2, 3}, f.(*Float16VectorFieldData).Data)
f, ok = d1.Data[BFloat16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0, 1, 2, 3}, f.(*BFloat16VectorFieldData).Data)
f, ok = d1.Data[BFloat16VectorField]
assert.True(t, ok)
assert.Equal(t, []byte{0, 1, 2, 3}, f.(*BFloat16VectorFieldData).Data)
f, ok = d1.Data[SparseFloatVectorField]
assert.True(t, ok)
assert.Equal(t, &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
f, ok = d1.Data[SparseFloatVectorField]
assert.True(t, ok)
assert.Equal(t, &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
typeutil.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
typeutil.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
typeutil.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
},
}, f.(*SparseFloatVectorFieldData))
}, f.(*SparseFloatVectorFieldData))
f, ok = d1.Data[ArrayField]
assert.True(t, ok)
assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData())
assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[1].GetIntData().GetData())
f, ok = d1.Data[ArrayField]
assert.True(t, ok)
assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData())
assert.Equal(t, []int32{4, 5, 6}, f.(*ArrayFieldData).Data[1].GetIntData().GetData())
f, ok = d1.Data[JSONField]
assert.True(t, ok)
assert.EqualValues(t, [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data)
f, ok = d1.Data[JSONField]
assert.True(t, ok)
assert.EqualValues(t, [][]byte{[]byte(`{"key":"value"}`), []byte(`{"hello":"world"}`)}, f.(*JSONFieldData).Data)
})
}
func TestMergeFloat16VectorField(t *testing.T) {

View File

@ -70,7 +70,7 @@ func (suite *ReaderSuite) SetupTest() {
func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.FieldData) []byte {
dataType := field.GetDataType()
w := storage.NewInsertBinlogWriter(dataType, 1, 1, 1, field.GetFieldID())
w := storage.NewInsertBinlogWriter(dataType, 1, 1, 1, field.GetFieldID(), false)
assert.NotNil(t, w)
defer w.Close()
@ -81,7 +81,7 @@ func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.Fie
dim = 1
}
evt, err := w.NextInsertEventWriter(int(dim))
evt, err := w.NextInsertEventWriter(false, int(dim))
assert.NoError(t, err)
evt.SetEventTimestamp(1, math.MaxInt64)
@ -94,42 +94,42 @@ func createBinlogBuf(t *testing.T, field *schemapb.FieldSchema, data storage.Fie
switch dataType {
case schemapb.DataType_Bool:
err = evt.AddBoolToPayload(data.(*storage.BoolFieldData).Data)
err = evt.AddBoolToPayload(data.(*storage.BoolFieldData).Data, nil)
assert.NoError(t, err)
case schemapb.DataType_Int8:
err = evt.AddInt8ToPayload(data.(*storage.Int8FieldData).Data)
err = evt.AddInt8ToPayload(data.(*storage.Int8FieldData).Data, nil)
assert.NoError(t, err)
case schemapb.DataType_Int16:
err = evt.AddInt16ToPayload(data.(*storage.Int16FieldData).Data)
err = evt.AddInt16ToPayload(data.(*storage.Int16FieldData).Data, nil)
assert.NoError(t, err)
case schemapb.DataType_Int32:
err = evt.AddInt32ToPayload(data.(*storage.Int32FieldData).Data)
err = evt.AddInt32ToPayload(data.(*storage.Int32FieldData).Data, nil)
assert.NoError(t, err)
case schemapb.DataType_Int64:
err = evt.AddInt64ToPayload(data.(*storage.Int64FieldData).Data)
err = evt.AddInt64ToPayload(data.(*storage.Int64FieldData).Data, nil)
assert.NoError(t, err)
case schemapb.DataType_Float:
err = evt.AddFloatToPayload(data.(*storage.FloatFieldData).Data)
err = evt.AddFloatToPayload(data.(*storage.FloatFieldData).Data, nil)
assert.NoError(t, err)
case schemapb.DataType_Double:
err = evt.AddDoubleToPayload(data.(*storage.DoubleFieldData).Data)
err = evt.AddDoubleToPayload(data.(*storage.DoubleFieldData).Data, nil)
assert.NoError(t, err)
case schemapb.DataType_VarChar:
values := data.(*storage.StringFieldData).Data
for _, val := range values {
err = evt.AddOneStringToPayload(val)
err = evt.AddOneStringToPayload(val, true)
assert.NoError(t, err)
}
case schemapb.DataType_JSON:
rows := data.(*storage.JSONFieldData).Data
for i := 0; i < len(rows); i++ {
err = evt.AddOneJSONToPayload(rows[i])
err = evt.AddOneJSONToPayload(rows[i], true)
assert.NoError(t, err)
}
case schemapb.DataType_Array:
rows := data.(*storage.ArrayFieldData).Data
for i := 0; i < len(rows); i++ {
err = evt.AddOneArrayToPayload(rows[i])
err = evt.AddOneArrayToPayload(rows[i], true)
assert.NoError(t, err)
}
case schemapb.DataType_BinaryVector:

View File

@ -43,7 +43,7 @@ func readData(reader *storage.BinlogReader, et storage.EventTypeCode) ([]any, er
return nil, merr.WrapErrImportFailed(fmt.Sprintf("wrong binlog type, expect:%s, actual:%s",
et.String(), event.TypeCode.String()))
}
rows, _, err := event.PayloadReaderInterface.GetDataFromPayload()
rows, _, _, err := event.PayloadReaderInterface.GetDataFromPayload()
if err != nil {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read data, error: %v", err))
}

View File

@ -590,6 +590,7 @@ func AppendFieldData(dst, src []*schemapb.FieldData, idx int64) (appendSize int6
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{},
},
ValidData: fieldData.GetValidData(),
}
}
dstScalar := dst[i].GetScalars()