2021-12-28 12:11:55 +00:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
2021-10-11 06:10:48 +00:00
// with the License. You may obtain a copy of the License at
//
2021-12-28 12:11:55 +00:00
// http://www.apache.org/licenses/LICENSE-2.0
2021-10-11 06:10:48 +00:00
//
2021-12-28 12:11:55 +00:00
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2021-10-11 06:10:48 +00:00
package storage
import (
"bytes"
2024-03-25 12:29:07 +00:00
"context"
2021-10-11 06:10:48 +00:00
"encoding/binary"
"fmt"
2022-03-04 07:09:56 +00:00
"io"
2023-10-31 04:18:15 +00:00
"io/fs"
"os"
2021-11-12 10:27:10 +00:00
"sort"
2021-10-12 09:00:34 +00:00
"strconv"
2021-10-11 06:10:48 +00:00
2023-02-26 03:31:49 +00:00
"github.com/cockroachdb/errors"
2022-03-11 06:39:59 +00:00
"github.com/golang/protobuf/proto"
2024-01-09 03:50:48 +00:00
"github.com/samber/lo"
2022-03-04 07:09:56 +00:00
"go.uber.org/zap"
2023-06-08 17:28:37 +00:00
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
2022-04-29 05:35:49 +00:00
"github.com/milvus-io/milvus/internal/proto/segcorepb"
2023-04-06 11:14:32 +00:00
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/mq/msgstream"
2023-10-31 04:18:15 +00:00
"github.com/milvus-io/milvus/pkg/util/merr"
2024-03-22 05:57:06 +00:00
"github.com/milvus-io/milvus/pkg/util/paramtable"
2023-04-06 11:14:32 +00:00
"github.com/milvus-io/milvus/pkg/util/typeutil"
2021-10-11 06:10:48 +00:00
)
2021-11-12 10:27:10 +00:00
//////////////////////////////////////////////////////////////////////////////////////////////////
2023-10-31 04:18:15 +00:00
// Open opens file as os.Open works,
// also converts the os errors to Milvus errors
func Open ( filepath string ) ( * os . File , error ) {
// NOLINT
reader , err := os . Open ( filepath )
if os . IsNotExist ( err ) {
return nil , merr . WrapErrIoKeyNotFound ( filepath )
} else if err != nil {
return nil , merr . WrapErrIoFailed ( filepath , err )
}
return reader , nil
}
// ReadFile reads file as os.ReadFile works,
// also converts the os errors to Milvus errors
func ReadFile ( filepath string ) ( [ ] byte , error ) {
// NOLINT
data , err := os . ReadFile ( filepath )
if os . IsNotExist ( err ) {
return nil , merr . WrapErrIoKeyNotFound ( filepath )
} else if err != nil {
return nil , merr . WrapErrIoFailed ( filepath , err )
}
return data , nil
}
// WriteFile writes file as os.WriteFile works,
// also converts the os errors to Milvus errors
func WriteFile ( filepath string , data [ ] byte , perm fs . FileMode ) error {
// NOLINT
err := os . WriteFile ( filepath , data , perm )
if err != nil {
return merr . WrapErrIoFailed ( filepath , err )
}
return nil
}
2021-11-12 10:27:10 +00:00
func checkTsField ( data * InsertData ) bool {
tsData , ok := data . Data [ common . TimeStampField ]
if ! ok {
return false
}
_ , ok = tsData . ( * Int64FieldData )
return ok
}
func checkRowIDField ( data * InsertData ) bool {
rowIDData , ok := data . Data [ common . RowIDField ]
if ! ok {
return false
}
_ , ok = rowIDData . ( * Int64FieldData )
return ok
}
func checkNumRows ( fieldDatas ... FieldData ) bool {
if len ( fieldDatas ) <= 0 {
return true
}
numRows := fieldDatas [ 0 ] . RowNum ( )
for i := 1 ; i < len ( fieldDatas ) ; i ++ {
if numRows != fieldDatas [ i ] . RowNum ( ) {
return false
}
}
return true
}
type fieldDataList struct {
IDs [ ] FieldID
datas [ ] FieldData
}
func ( ls fieldDataList ) Len ( ) int {
return len ( ls . IDs )
}
func ( ls fieldDataList ) Less ( i , j int ) bool {
return ls . IDs [ i ] < ls . IDs [ j ]
}
func ( ls fieldDataList ) Swap ( i , j int ) {
ls . IDs [ i ] , ls . IDs [ j ] = ls . IDs [ j ] , ls . IDs [ i ]
ls . datas [ i ] , ls . datas [ j ] = ls . datas [ j ] , ls . datas [ i ]
}
func sortFieldDataList ( ls fieldDataList ) {
sort . Sort ( ls )
}
// TransferColumnBasedInsertDataToRowBased transfer column-based insert data to row-based rows.
// Note:
2023-02-26 03:31:49 +00:00
// - ts column must exist in insert data;
// - row id column must exist in insert data;
// - the row num of all column must be equal;
// - num_rows = len(RowData), a row will be assembled into the value of blob with field id order;
2021-11-12 10:27:10 +00:00
func TransferColumnBasedInsertDataToRowBased ( data * InsertData ) (
Timestamps [ ] uint64 ,
RowIDs [ ] int64 ,
RowData [ ] * commonpb . Blob ,
err error ,
) {
if ! checkTsField ( data ) {
return nil , nil , nil ,
errors . New ( "cannot get timestamps from insert data" )
}
if ! checkRowIDField ( data ) {
return nil , nil , nil ,
errors . New ( "cannot get row ids from insert data" )
}
tss := data . Data [ common . TimeStampField ] . ( * Int64FieldData )
2024-03-27 22:33:11 +00:00
rowIDs := data . Data [ common . RowIDField ] . ( * Int64FieldData )
2021-11-12 10:27:10 +00:00
2021-11-30 01:57:44 +00:00
ls := fieldDataList { }
2021-11-12 10:27:10 +00:00
for fieldID := range data . Data {
if fieldID == common . TimeStampField || fieldID == common . RowIDField {
continue
}
ls . IDs = append ( ls . IDs , fieldID )
ls . datas = append ( ls . datas , data . Data [ fieldID ] )
}
2024-03-27 22:33:11 +00:00
// checkNumRows(tss, rowIDs, ls.datas...) // don't work
all := [ ] FieldData { tss , rowIDs }
2021-11-12 10:27:10 +00:00
all = append ( all , ls . datas ... )
if ! checkNumRows ( all ... ) {
return nil , nil , nil ,
errors . New ( "columns of insert data have different length" )
}
sortFieldDataList ( ls )
numRows := tss . RowNum ( )
rows := make ( [ ] * commonpb . Blob , numRows )
for i := 0 ; i < numRows ; i ++ {
2021-11-30 01:57:44 +00:00
blob := & commonpb . Blob { }
2021-11-12 10:27:10 +00:00
var buffer bytes . Buffer
for j := 0 ; j < ls . Len ( ) ; j ++ {
d := ls . datas [ j ] . GetRow ( i )
err := binary . Write ( & buffer , common . Endian , d )
if err != nil {
return nil , nil , nil ,
fmt . Errorf ( "failed to get binary row, err: %v" , err )
}
}
blob . Value = buffer . Bytes ( )
rows [ i ] = blob
}
utss := make ( [ ] uint64 , tss . RowNum ( ) )
for i := 0 ; i < tss . RowNum ( ) ; i ++ {
utss [ i ] = uint64 ( tss . Data [ i ] )
}
2024-03-27 22:33:11 +00:00
return utss , rowIDs . Data , rows , nil
2021-11-12 10:27:10 +00:00
}
2022-03-04 07:09:56 +00:00
///////////////////////////////////////////////////////////////////////////////////////////
// TODO: remove these functions to proper file.
// GetDimFromParams get dim from params.
func GetDimFromParams ( params [ ] * commonpb . KeyValuePair ) ( int , error ) {
var dim int
var err error
for _ , t := range params {
2023-05-16 09:41:22 +00:00
if t . Key == common . DimKey {
2022-03-04 07:09:56 +00:00
dim , err = strconv . Atoi ( t . Value )
if err != nil {
return - 1 , err
}
return dim , nil
}
}
return - 1 , errors . New ( "dim not found in params" )
}
// ReadBinary read data in bytes and write it into receiver.
2023-02-26 03:31:49 +00:00
//
// The receiver can be any type in int8, int16, int32, int64, float32, float64 and bool
// ReadBinary uses LittleEndian ByteOrder.
2022-03-04 07:09:56 +00:00
func ReadBinary ( reader io . Reader , receiver interface { } , dataType schemapb . DataType ) {
err := binary . Read ( reader , common . Endian , receiver )
if err != nil {
log . Error ( "binary.Read failed" , zap . Any ( "data type" , dataType ) , zap . Error ( err ) )
}
}
// It will save my life if golang support generic programming.
// TODO: string type.
func readFloatVectors ( blobReaders [ ] io . Reader , dim int ) [ ] float32 {
ret := make ( [ ] float32 , 0 )
for _ , r := range blobReaders {
2023-09-21 01:45:27 +00:00
v := make ( [ ] float32 , dim )
2022-03-04 07:09:56 +00:00
ReadBinary ( r , & v , schemapb . DataType_FloatVector )
ret = append ( ret , v ... )
}
return ret
}
func readBinaryVectors ( blobReaders [ ] io . Reader , dim int ) [ ] byte {
ret := make ( [ ] byte , 0 )
for _ , r := range blobReaders {
2023-09-21 01:45:27 +00:00
v := make ( [ ] byte , dim / 8 )
2022-03-04 07:09:56 +00:00
ReadBinary ( r , & v , schemapb . DataType_BinaryVector )
ret = append ( ret , v ... )
}
return ret
}
2023-09-08 02:03:16 +00:00
func readFloat16Vectors ( blobReaders [ ] io . Reader , dim int ) [ ] byte {
ret := make ( [ ] byte , 0 )
for _ , r := range blobReaders {
2023-09-21 01:45:27 +00:00
v := make ( [ ] byte , dim * 2 )
2023-09-08 02:03:16 +00:00
ReadBinary ( r , & v , schemapb . DataType_Float16Vector )
ret = append ( ret , v ... )
}
return ret
}
2024-01-11 07:48:51 +00:00
func readBFloat16Vectors ( blobReaders [ ] io . Reader , dim int ) [ ] byte {
ret := make ( [ ] byte , 0 )
for _ , r := range blobReaders {
v := make ( [ ] byte , dim * 2 )
ReadBinary ( r , & v , schemapb . DataType_BFloat16Vector )
ret = append ( ret , v ... )
}
return ret
}
2022-03-04 07:09:56 +00:00
func readBoolArray ( blobReaders [ ] io . Reader ) [ ] bool {
ret := make ( [ ] bool , 0 )
for _ , r := range blobReaders {
var v bool
ReadBinary ( r , & v , schemapb . DataType_Bool )
ret = append ( ret , v )
}
return ret
}
func readInt8Array ( blobReaders [ ] io . Reader ) [ ] int8 {
ret := make ( [ ] int8 , 0 )
for _ , r := range blobReaders {
var v int8
ReadBinary ( r , & v , schemapb . DataType_Int8 )
ret = append ( ret , v )
}
return ret
}
func readInt16Array ( blobReaders [ ] io . Reader ) [ ] int16 {
ret := make ( [ ] int16 , 0 )
for _ , r := range blobReaders {
var v int16
ReadBinary ( r , & v , schemapb . DataType_Int16 )
ret = append ( ret , v )
}
return ret
}
func readInt32Array ( blobReaders [ ] io . Reader ) [ ] int32 {
ret := make ( [ ] int32 , 0 )
for _ , r := range blobReaders {
var v int32
ReadBinary ( r , & v , schemapb . DataType_Int32 )
ret = append ( ret , v )
}
return ret
}
func readInt64Array ( blobReaders [ ] io . Reader ) [ ] int64 {
ret := make ( [ ] int64 , 0 )
for _ , r := range blobReaders {
var v int64
ReadBinary ( r , & v , schemapb . DataType_Int64 )
ret = append ( ret , v )
}
return ret
}
func readFloatArray ( blobReaders [ ] io . Reader ) [ ] float32 {
ret := make ( [ ] float32 , 0 )
for _ , r := range blobReaders {
var v float32
ReadBinary ( r , & v , schemapb . DataType_Float )
ret = append ( ret , v )
}
return ret
}
func readDoubleArray ( blobReaders [ ] io . Reader ) [ ] float64 {
ret := make ( [ ] float64 , 0 )
for _ , r := range blobReaders {
var v float64
ReadBinary ( r , & v , schemapb . DataType_Double )
ret = append ( ret , v )
}
return ret
}
func RowBasedInsertMsgToInsertData ( msg * msgstream . InsertMsg , collSchema * schemapb . CollectionSchema ) ( idata * InsertData , err error ) {
blobReaders := make ( [ ] io . Reader , 0 )
for _ , blob := range msg . RowData {
blobReaders = append ( blobReaders , bytes . NewReader ( blob . GetValue ( ) ) )
}
idata = & InsertData {
Data : make ( map [ FieldID ] FieldData ) ,
// TODO: handle Infos.
Infos : nil ,
}
for _ , field := range collSchema . Fields {
switch field . DataType {
case schemapb . DataType_FloatVector :
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
vecs := readFloatVectors ( blobReaders , dim )
idata . Data [ field . FieldID ] = & FloatVectorFieldData {
2023-01-28 03:09:52 +00:00
Data : vecs ,
Dim : dim ,
2022-03-04 07:09:56 +00:00
}
2023-09-08 02:03:16 +00:00
case schemapb . DataType_Float16Vector :
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
vecs := readFloat16Vectors ( blobReaders , dim )
idata . Data [ field . FieldID ] = & Float16VectorFieldData {
Data : vecs ,
Dim : dim ,
}
2024-01-11 07:48:51 +00:00
case schemapb . DataType_BFloat16Vector :
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
vecs := readBFloat16Vectors ( blobReaders , dim )
idata . Data [ field . FieldID ] = & BFloat16VectorFieldData {
Data : vecs ,
Dim : dim ,
}
2022-03-04 07:09:56 +00:00
case schemapb . DataType_BinaryVector :
var dim int
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
vecs := readBinaryVectors ( blobReaders , dim )
idata . Data [ field . FieldID ] = & BinaryVectorFieldData {
2023-01-28 03:09:52 +00:00
Data : vecs ,
Dim : dim ,
2022-03-04 07:09:56 +00:00
}
2024-03-13 21:32:54 +00:00
case schemapb . DataType_SparseFloatVector :
return nil , fmt . Errorf ( "Sparse Float Vector is not supported in row based data" )
2022-03-04 07:09:56 +00:00
case schemapb . DataType_Bool :
idata . Data [ field . FieldID ] = & BoolFieldData {
2023-01-28 03:09:52 +00:00
Data : readBoolArray ( blobReaders ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int8 :
idata . Data [ field . FieldID ] = & Int8FieldData {
2023-01-28 03:09:52 +00:00
Data : readInt8Array ( blobReaders ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int16 :
idata . Data [ field . FieldID ] = & Int16FieldData {
2023-01-28 03:09:52 +00:00
Data : readInt16Array ( blobReaders ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int32 :
idata . Data [ field . FieldID ] = & Int32FieldData {
2023-01-28 03:09:52 +00:00
Data : readInt32Array ( blobReaders ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int64 :
idata . Data [ field . FieldID ] = & Int64FieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
fieldData := idata . Data [ field . FieldID ] . ( * Int64FieldData )
switch field . FieldID {
case 0 : // rowIDs
fieldData . Data = append ( fieldData . Data , msg . RowIDs ... )
case 1 : // Timestamps
for _ , ts := range msg . Timestamps {
fieldData . Data = append ( fieldData . Data , int64 ( ts ) )
}
default :
fieldData . Data = readInt64Array ( blobReaders )
}
case schemapb . DataType_Float :
idata . Data [ field . FieldID ] = & FloatFieldData {
2023-01-28 03:09:52 +00:00
Data : readFloatArray ( blobReaders ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Double :
idata . Data [ field . FieldID ] = & DoubleFieldData {
2023-01-28 03:09:52 +00:00
Data : readDoubleArray ( blobReaders ) ,
2022-03-04 07:09:56 +00:00
}
}
}
return idata , nil
}
2024-01-09 03:50:48 +00:00
// ColumnBasedInsertMsgToInsertData converts an InsertMsg msg into InsertData based
// on provided CollectionSchema collSchema.
//
// This function checks whether all fields are provided in the collSchema.Fields.
// If any field is missing in the msg, an error will be returned.
//
// This funcion also checks the length of each column. All columns shall have the same length.
// Also, the InsertData.Infos shall have BlobInfo with this length returned.
// When the length is not aligned, an error will be returned.
2022-03-04 07:09:56 +00:00
func ColumnBasedInsertMsgToInsertData ( msg * msgstream . InsertMsg , collSchema * schemapb . CollectionSchema ) ( idata * InsertData , err error ) {
srcFields := make ( map [ FieldID ] * schemapb . FieldData )
for _ , field := range msg . FieldsData {
srcFields [ field . FieldId ] = field
}
idata = & InsertData {
Data : make ( map [ FieldID ] FieldData ) ,
}
2024-01-09 03:50:48 +00:00
length := 0
2022-03-04 07:09:56 +00:00
for _ , field := range collSchema . Fields {
2024-01-09 03:50:48 +00:00
srcField , ok := srcFields [ field . GetFieldID ( ) ]
if ! ok && field . GetFieldID ( ) >= common . StartOfUserFieldID {
return nil , merr . WrapErrFieldNotFound ( field . GetFieldID ( ) , fmt . Sprintf ( "field %s not found when converting insert msg to insert data" , field . GetName ( ) ) )
}
var fieldData FieldData
2022-03-04 07:09:56 +00:00
switch field . DataType {
case schemapb . DataType_FloatVector :
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
2024-01-09 03:50:48 +00:00
srcData := srcField . GetVectors ( ) . GetFloatVector ( ) . GetData ( )
fieldData = & FloatVectorFieldData {
Data : lo . Map ( srcData , func ( v float32 , _ int ) float32 { return v } ) ,
2023-01-28 03:09:52 +00:00
Dim : dim ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_BinaryVector :
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
2024-01-09 03:50:48 +00:00
srcData := srcField . GetVectors ( ) . GetBinaryVector ( )
2022-03-04 07:09:56 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & BinaryVectorFieldData {
Data : lo . Map ( srcData , func ( v byte , _ int ) byte { return v } ) ,
2023-01-28 03:09:52 +00:00
Dim : dim ,
2022-03-04 07:09:56 +00:00
}
2023-09-08 02:03:16 +00:00
case schemapb . DataType_Float16Vector :
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
2024-01-09 03:50:48 +00:00
srcData := srcField . GetVectors ( ) . GetFloat16Vector ( )
2023-09-08 02:03:16 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & Float16VectorFieldData {
Data : lo . Map ( srcData , func ( v byte , _ int ) byte { return v } ) ,
2023-09-08 02:03:16 +00:00
Dim : dim ,
}
2024-01-11 07:48:51 +00:00
case schemapb . DataType_BFloat16Vector :
dim , err := GetDimFromParams ( field . TypeParams )
if err != nil {
log . Error ( "failed to get dim" , zap . Error ( err ) )
return nil , err
}
srcData := srcField . GetVectors ( ) . GetBfloat16Vector ( )
fieldData = & BFloat16VectorFieldData {
Data : lo . Map ( srcData , func ( v byte , _ int ) byte { return v } ) ,
Dim : dim ,
}
2024-03-13 21:32:54 +00:00
case schemapb . DataType_SparseFloatVector :
fieldData = & SparseFloatVectorFieldData {
SparseFloatArray : * srcFields [ field . FieldID ] . GetVectors ( ) . GetSparseFloatVector ( ) ,
}
2022-03-04 07:09:56 +00:00
case schemapb . DataType_Bool :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetBoolData ( ) . GetData ( )
2022-03-04 07:09:56 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & BoolFieldData {
Data : lo . Map ( srcData , func ( v bool , _ int ) bool { return v } ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int8 :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetIntData ( ) . GetData ( )
2022-03-04 07:09:56 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & Int8FieldData {
Data : lo . Map ( srcData , func ( v int32 , _ int ) int8 { return int8 ( v ) } ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int16 :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetIntData ( ) . GetData ( )
2022-03-04 07:09:56 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & Int16FieldData {
Data : lo . Map ( srcData , func ( v int32 , _ int ) int16 { return int16 ( v ) } ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int32 :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetIntData ( ) . GetData ( )
2022-03-04 07:09:56 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & Int32FieldData {
Data : lo . Map ( srcData , func ( v int32 , _ int ) int32 { return v } ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Int64 :
switch field . FieldID {
2024-01-09 03:50:48 +00:00
case common . RowIDField : // rowIDs
fieldData = & Int64FieldData {
Data : lo . Map ( msg . GetRowIDs ( ) , func ( v int64 , _ int ) int64 { return v } ) ,
}
case common . TimeStampField : // Timestamps
fieldData = & Int64FieldData {
Data : lo . Map ( msg . GetTimestamps ( ) , func ( v uint64 , _ int ) int64 { return int64 ( v ) } ) ,
2022-03-04 07:09:56 +00:00
}
default :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetLongData ( ) . GetData ( )
fieldData = & Int64FieldData {
Data : lo . Map ( srcData , func ( v int64 , _ int ) int64 { return v } ) ,
}
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Float :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetFloatData ( ) . GetData ( )
2022-03-04 07:09:56 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & FloatFieldData {
Data : lo . Map ( srcData , func ( v float32 , _ int ) float32 { return v } ) ,
2022-03-04 07:09:56 +00:00
}
case schemapb . DataType_Double :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetDoubleData ( ) . GetData ( )
2022-03-04 07:09:56 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & DoubleFieldData {
Data : lo . Map ( srcData , func ( v float64 , _ int ) float64 { return v } ) ,
2022-03-04 07:09:56 +00:00
}
2022-03-25 06:27:25 +00:00
case schemapb . DataType_String , schemapb . DataType_VarChar :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetStringData ( ) . GetData ( )
2022-03-25 06:27:25 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & StringFieldData {
Data : lo . Map ( srcData , func ( v string , _ int ) string { return v } ) ,
2022-03-25 06:27:25 +00:00
}
2023-04-20 03:32:31 +00:00
case schemapb . DataType_Array :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetArrayData ( ) . GetData ( )
2023-04-20 03:32:31 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & ArrayFieldData {
2023-11-09 06:16:20 +00:00
ElementType : field . GetElementType ( ) ,
2024-01-09 03:50:48 +00:00
Data : lo . Map ( srcData , func ( v * schemapb . ScalarField , _ int ) * schemapb . ScalarField { return v } ) ,
2023-04-20 03:32:31 +00:00
}
case schemapb . DataType_JSON :
2024-01-09 03:50:48 +00:00
srcData := srcField . GetScalars ( ) . GetJsonData ( ) . GetData ( )
2023-04-20 03:32:31 +00:00
2024-01-09 03:50:48 +00:00
fieldData = & JSONFieldData {
Data : lo . Map ( srcData , func ( v [ ] byte , _ int ) [ ] byte { return v } ) ,
2023-04-20 03:32:31 +00:00
}
2024-01-09 03:50:48 +00:00
default :
return nil , merr . WrapErrServiceInternal ( "data type not handled" , field . GetDataType ( ) . String ( ) )
}
if length == 0 {
length = fieldData . RowNum ( )
}
if fieldData . RowNum ( ) != length {
return nil , merr . WrapErrServiceInternal ( "row num not match" , fmt . Sprintf ( "field %s row num not match %d, other column %d" , field . GetName ( ) , fieldData . RowNum ( ) , length ) )
2022-03-04 07:09:56 +00:00
}
2024-01-09 03:50:48 +00:00
idata . Data [ field . FieldID ] = fieldData
2022-03-04 07:09:56 +00:00
}
2024-01-09 03:50:48 +00:00
idata . Infos = [ ] BlobInfo {
{ Length : length } ,
}
2022-03-04 07:09:56 +00:00
return idata , nil
}
func InsertMsgToInsertData ( msg * msgstream . InsertMsg , schema * schemapb . CollectionSchema ) ( idata * InsertData , err error ) {
if msg . IsRowBased ( ) {
return RowBasedInsertMsgToInsertData ( msg , schema )
}
return ColumnBasedInsertMsgToInsertData ( msg , schema )
}
func mergeBoolField ( data * InsertData , fid FieldID , field * BoolFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & BoolFieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * BoolFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeInt8Field ( data * InsertData , fid FieldID , field * Int8FieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & Int8FieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * Int8FieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeInt16Field ( data * InsertData , fid FieldID , field * Int16FieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & Int16FieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * Int16FieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeInt32Field ( data * InsertData , fid FieldID , field * Int32FieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & Int32FieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * Int32FieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeInt64Field ( data * InsertData , fid FieldID , field * Int64FieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & Int64FieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * Int64FieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeFloatField ( data * InsertData , fid FieldID , field * FloatFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & FloatFieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * FloatFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeDoubleField ( data * InsertData , fid FieldID , field * DoubleFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & DoubleFieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * DoubleFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeStringField ( data * InsertData , fid FieldID , field * StringFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & StringFieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * StringFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
2023-04-20 03:32:31 +00:00
func mergeArrayField ( data * InsertData , fid FieldID , field * ArrayFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & ArrayFieldData {
2023-11-09 06:16:20 +00:00
ElementType : field . ElementType ,
Data : nil ,
2023-04-20 03:32:31 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * ArrayFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeJSONField ( data * InsertData , fid FieldID , field * JSONFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & JSONFieldData {
Data : nil ,
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * JSONFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
2022-03-04 07:09:56 +00:00
func mergeBinaryVectorField ( data * InsertData , fid FieldID , field * BinaryVectorFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & BinaryVectorFieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
Dim : field . Dim ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * BinaryVectorFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
func mergeFloatVectorField ( data * InsertData , fid FieldID , field * FloatVectorFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & FloatVectorFieldData {
2023-01-28 03:09:52 +00:00
Data : nil ,
Dim : field . Dim ,
2022-03-04 07:09:56 +00:00
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * FloatVectorFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
2023-09-08 02:03:16 +00:00
func mergeFloat16VectorField ( data * InsertData , fid FieldID , field * Float16VectorFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & Float16VectorFieldData {
Data : nil ,
Dim : field . Dim ,
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * Float16VectorFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
2024-01-11 07:48:51 +00:00
func mergeBFloat16VectorField ( data * InsertData , fid FieldID , field * BFloat16VectorFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
fieldData := & BFloat16VectorFieldData {
Data : nil ,
Dim : field . Dim ,
}
data . Data [ fid ] = fieldData
}
fieldData := data . Data [ fid ] . ( * BFloat16VectorFieldData )
fieldData . Data = append ( fieldData . Data , field . Data ... )
}
2024-03-13 21:32:54 +00:00
func mergeSparseFloatVectorField ( data * InsertData , fid FieldID , field * SparseFloatVectorFieldData ) {
if _ , ok := data . Data [ fid ] ; ! ok {
data . Data [ fid ] = & SparseFloatVectorFieldData { }
}
fieldData := data . Data [ fid ] . ( * SparseFloatVectorFieldData )
fieldData . AppendAllRows ( field )
}
2022-03-04 07:09:56 +00:00
// MergeFieldData merge field into data.
func MergeFieldData ( data * InsertData , fid FieldID , field FieldData ) {
if field == nil {
return
}
switch field := field . ( type ) {
case * BoolFieldData :
mergeBoolField ( data , fid , field )
case * Int8FieldData :
mergeInt8Field ( data , fid , field )
case * Int16FieldData :
mergeInt16Field ( data , fid , field )
case * Int32FieldData :
mergeInt32Field ( data , fid , field )
case * Int64FieldData :
mergeInt64Field ( data , fid , field )
case * FloatFieldData :
mergeFloatField ( data , fid , field )
case * DoubleFieldData :
mergeDoubleField ( data , fid , field )
case * StringFieldData :
mergeStringField ( data , fid , field )
2023-04-20 03:32:31 +00:00
case * ArrayFieldData :
mergeArrayField ( data , fid , field )
case * JSONFieldData :
mergeJSONField ( data , fid , field )
2022-03-04 07:09:56 +00:00
case * BinaryVectorFieldData :
mergeBinaryVectorField ( data , fid , field )
case * FloatVectorFieldData :
mergeFloatVectorField ( data , fid , field )
2023-09-08 02:03:16 +00:00
case * Float16VectorFieldData :
mergeFloat16VectorField ( data , fid , field )
2024-01-11 07:48:51 +00:00
case * BFloat16VectorFieldData :
mergeBFloat16VectorField ( data , fid , field )
2024-03-13 21:32:54 +00:00
case * SparseFloatVectorFieldData :
mergeSparseFloatVectorField ( data , fid , field )
2022-03-04 07:09:56 +00:00
}
}
2023-09-05 13:41:48 +00:00
// MergeInsertData append the insert datas to the original buffer.
func MergeInsertData ( buffer * InsertData , datas ... * InsertData ) {
if buffer == nil {
log . Warn ( "Attempt to merge data into a nil buffer, skip the data merge." )
return
2022-03-04 07:09:56 +00:00
}
2023-09-05 13:41:48 +00:00
2022-03-04 07:09:56 +00:00
for _ , data := range datas {
if data != nil {
for fid , field := range data . Data {
2023-09-05 13:41:48 +00:00
MergeFieldData ( buffer , fid , field )
2022-03-04 07:09:56 +00:00
}
// TODO: handle storage.InsertData.Infos
2023-09-05 13:41:48 +00:00
buffer . Infos = append ( buffer . Infos , data . Infos ... )
2022-03-04 07:09:56 +00:00
}
}
}
// TODO: string type.
2022-03-25 06:27:25 +00:00
func GetPkFromInsertData ( collSchema * schemapb . CollectionSchema , data * InsertData ) ( FieldData , error ) {
2022-03-04 07:09:56 +00:00
helper , err := typeutil . CreateSchemaHelper ( collSchema )
if err != nil {
log . Error ( "failed to create schema helper" , zap . Error ( err ) )
return nil , err
}
pf , err := helper . GetPrimaryKeyField ( )
if err != nil {
log . Warn ( "no primary field found" , zap . Error ( err ) )
return nil , err
}
pfData , ok := data . Data [ pf . FieldID ]
if ! ok {
log . Warn ( "no primary field found in insert msg" , zap . Int64 ( "fieldID" , pf . FieldID ) )
return nil , errors . New ( "no primary field found in insert msg" )
}
2022-03-25 06:27:25 +00:00
var realPfData FieldData
switch pf . DataType {
case schemapb . DataType_Int64 :
realPfData , ok = pfData . ( * Int64FieldData )
case schemapb . DataType_VarChar :
realPfData , ok = pfData . ( * StringFieldData )
default :
2023-09-21 01:45:27 +00:00
// TODO
2022-03-25 06:27:25 +00:00
}
2022-03-04 07:09:56 +00:00
if ! ok {
2022-03-25 06:27:25 +00:00
log . Warn ( "primary field not in Int64 or VarChar format" , zap . Int64 ( "fieldID" , pf . FieldID ) )
return nil , errors . New ( "primary field not in Int64 or VarChar format" )
2022-03-04 07:09:56 +00:00
}
2022-03-25 06:27:25 +00:00
return realPfData , nil
2022-03-04 07:09:56 +00:00
}
2022-03-11 06:39:59 +00:00
2022-09-04 01:05:09 +00:00
// GetTimestampFromInsertData returns the Int64FieldData for timestamp field.
func GetTimestampFromInsertData ( data * InsertData ) ( * Int64FieldData , error ) {
if data == nil {
return nil , errors . New ( "try to get timestamp from nil insert data" )
}
fieldData , ok := data . Data [ common . TimeStampField ]
if ! ok {
return nil , errors . New ( "no timestamp field in insert data" )
}
ifd , ok := fieldData . ( * Int64FieldData )
if ! ok {
return nil , errors . New ( "timestamp field is not Int64" )
}
return ifd , nil
}
2022-03-11 06:39:59 +00:00
func boolFieldDataToPbBytes ( field * BoolFieldData ) ( [ ] byte , error ) {
arr := & schemapb . BoolArray { Data : field . Data }
return proto . Marshal ( arr )
}
func stringFieldDataToPbBytes ( field * StringFieldData ) ( [ ] byte , error ) {
arr := & schemapb . StringArray { Data : field . Data }
return proto . Marshal ( arr )
}
2023-04-20 03:32:31 +00:00
func arrayFieldDataToPbBytes ( field * ArrayFieldData ) ( [ ] byte , error ) {
arr := & schemapb . ArrayArray { Data : field . Data }
return proto . Marshal ( arr )
}
func jsonFieldDataToPbBytes ( field * JSONFieldData ) ( [ ] byte , error ) {
arr := & schemapb . JSONArray { Data : field . Data }
return proto . Marshal ( arr )
}
2022-03-11 06:39:59 +00:00
func binaryWrite ( endian binary . ByteOrder , data interface { } ) ( [ ] byte , error ) {
buf := new ( bytes . Buffer )
err := binary . Write ( buf , endian , data )
if err != nil {
return nil , err
}
return buf . Bytes ( ) , nil
}
// FieldDataToBytes encode field data to byte slice.
// For some fixed-length data, such as int32, int64, float vector, use binary.Write directly.
// For binary vector, return it directly.
// For bool data, first transfer to schemapb.BoolArray and then marshal it. (TODO: handle bool like other scalar data.)
// For variable-length data, such as string, first transfer to schemapb.StringArray and then marshal it.
// TODO: find a proper way to store variable-length data. Or we should unify to use protobuf?
func FieldDataToBytes ( endian binary . ByteOrder , fieldData FieldData ) ( [ ] byte , error ) {
switch field := fieldData . ( type ) {
case * BoolFieldData :
// return binaryWrite(endian, field.Data)
return boolFieldDataToPbBytes ( field )
case * StringFieldData :
return stringFieldDataToPbBytes ( field )
2023-04-20 03:32:31 +00:00
case * ArrayFieldData :
return arrayFieldDataToPbBytes ( field )
case * JSONFieldData :
return jsonFieldDataToPbBytes ( field )
2022-03-11 06:39:59 +00:00
case * BinaryVectorFieldData :
return field . Data , nil
case * FloatVectorFieldData :
return binaryWrite ( endian , field . Data )
case * Int8FieldData :
return binaryWrite ( endian , field . Data )
case * Int16FieldData :
return binaryWrite ( endian , field . Data )
case * Int32FieldData :
return binaryWrite ( endian , field . Data )
case * Int64FieldData :
return binaryWrite ( endian , field . Data )
case * FloatFieldData :
return binaryWrite ( endian , field . Data )
case * DoubleFieldData :
return binaryWrite ( endian , field . Data )
default :
return nil , fmt . Errorf ( "unsupported field data: %s" , field )
}
}
2022-04-29 05:35:49 +00:00
func TransferInsertDataToInsertRecord ( insertData * InsertData ) ( * segcorepb . InsertRecord , error ) {
insertRecord := & segcorepb . InsertRecord { }
for fieldID , rawData := range insertData . Data {
var fieldData * schemapb . FieldData
switch rawData := rawData . ( type ) {
case * BoolFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Bool ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_BoolData {
BoolData : & schemapb . BoolArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
case * Int8FieldData :
int32Data := make ( [ ] int32 , len ( rawData . Data ) )
for index , v := range rawData . Data {
int32Data [ index ] = int32 ( v )
}
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Int8 ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_IntData {
IntData : & schemapb . IntArray {
Data : int32Data ,
} ,
} ,
} ,
} ,
}
case * Int16FieldData :
int32Data := make ( [ ] int32 , len ( rawData . Data ) )
for index , v := range rawData . Data {
int32Data [ index ] = int32 ( v )
}
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Int16 ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_IntData {
IntData : & schemapb . IntArray {
Data : int32Data ,
} ,
} ,
} ,
} ,
}
case * Int32FieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Int32 ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_IntData {
IntData : & schemapb . IntArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
case * Int64FieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Int64 ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_LongData {
LongData : & schemapb . LongArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
case * FloatFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Float ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_FloatData {
FloatData : & schemapb . FloatArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
case * DoubleFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Double ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_DoubleData {
DoubleData : & schemapb . DoubleArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
case * StringFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_VarChar ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_StringData {
StringData : & schemapb . StringArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
2023-04-20 03:32:31 +00:00
case * ArrayFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Array ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_ArrayData {
ArrayData : & schemapb . ArrayArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
case * JSONFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_JSON ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Scalars {
Scalars : & schemapb . ScalarField {
Data : & schemapb . ScalarField_JsonData {
JsonData : & schemapb . JSONArray {
Data : rawData . Data ,
} ,
} ,
} ,
} ,
}
2022-04-29 05:35:49 +00:00
case * FloatVectorFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_FloatVector ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Vectors {
Vectors : & schemapb . VectorField {
Data : & schemapb . VectorField_FloatVector {
FloatVector : & schemapb . FloatArray {
Data : rawData . Data ,
} ,
} ,
Dim : int64 ( rawData . Dim ) ,
} ,
} ,
}
case * BinaryVectorFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_BinaryVector ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Vectors {
Vectors : & schemapb . VectorField {
Data : & schemapb . VectorField_BinaryVector {
BinaryVector : rawData . Data ,
} ,
Dim : int64 ( rawData . Dim ) ,
} ,
} ,
}
2024-03-28 06:11:10 +00:00
case * Float16VectorFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_Float16Vector ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Vectors {
Vectors : & schemapb . VectorField {
Data : & schemapb . VectorField_Float16Vector {
Float16Vector : rawData . Data ,
} ,
Dim : int64 ( rawData . Dim ) ,
} ,
} ,
}
case * BFloat16VectorFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_BFloat16Vector ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Vectors {
Vectors : & schemapb . VectorField {
Data : & schemapb . VectorField_Bfloat16Vector {
Bfloat16Vector : rawData . Data ,
} ,
Dim : int64 ( rawData . Dim ) ,
} ,
} ,
}
2024-03-13 21:32:54 +00:00
case * SparseFloatVectorFieldData :
fieldData = & schemapb . FieldData {
Type : schemapb . DataType_SparseFloatVector ,
FieldId : fieldID ,
Field : & schemapb . FieldData_Vectors {
Vectors : & schemapb . VectorField {
Data : & schemapb . VectorField_SparseFloatVector {
SparseFloatVector : & rawData . SparseFloatArray ,
} ,
} ,
} ,
}
2022-04-29 05:35:49 +00:00
default :
return insertRecord , fmt . Errorf ( "unsupported data type when transter storage.InsertData to internalpb.InsertRecord" )
}
insertRecord . FieldsData = append ( insertRecord . FieldsData , fieldData )
insertRecord . NumRows = int64 ( rawData . RowNum ( ) )
}
return insertRecord , nil
}
func TransferInsertMsgToInsertRecord ( schema * schemapb . CollectionSchema , msg * msgstream . InsertMsg ) ( * segcorepb . InsertRecord , error ) {
if msg . IsRowBased ( ) {
insertData , err := RowBasedInsertMsgToInsertData ( msg , schema )
if err != nil {
return nil , err
}
return TransferInsertDataToInsertRecord ( insertData )
}
// column base insert msg
insertRecord := & segcorepb . InsertRecord {
NumRows : int64 ( msg . NumRows ) ,
}
insertRecord . FieldsData = append ( insertRecord . FieldsData , msg . FieldsData ... )
return insertRecord , nil
}
2024-02-06 09:04:25 +00:00
func Min ( a , b int64 ) int64 {
if a < b {
return a
}
return b
}
2024-03-22 05:57:06 +00:00
func NewTestChunkManagerFactory ( params * paramtable . ComponentParam , rootPath string ) * ChunkManagerFactory {
return NewChunkManagerFactory ( "minio" ,
RootPath ( rootPath ) ,
Address ( params . MinioCfg . Address . GetValue ( ) ) ,
AccessKeyID ( params . MinioCfg . AccessKeyID . GetValue ( ) ) ,
SecretAccessKeyID ( params . MinioCfg . SecretAccessKey . GetValue ( ) ) ,
UseSSL ( params . MinioCfg . UseSSL . GetAsBool ( ) ) ,
BucketName ( params . MinioCfg . BucketName . GetValue ( ) ) ,
UseIAM ( params . MinioCfg . UseIAM . GetAsBool ( ) ) ,
CloudProvider ( params . MinioCfg . CloudProvider . GetValue ( ) ) ,
IAMEndpoint ( params . MinioCfg . IAMEndpoint . GetValue ( ) ) ,
CreateBucket ( true ) )
}
2024-03-25 12:29:07 +00:00
func GetFilesSize ( ctx context . Context , paths [ ] string , cm ChunkManager ) ( int64 , error ) {
totalSize := int64 ( 0 )
for _ , filePath := range paths {
size , err := cm . Size ( ctx , filePath )
if err != nil {
return 0 , err
}
totalSize += size
}
return totalSize , nil
}