mirror of https://github.com/milvus-io/milvus.git
enhance: reduce memory when read data (#30284)
Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>pull/30326/head
parent
c3c9dcdc2b
commit
9512af357b
|
@ -50,16 +50,14 @@ func ReadBoolData(pcr *ParquetColumnReader, count int64) ([]bool, error) {
|
|||
data := make([]bool, 0, count)
|
||||
for _, chunk := range chunked.Chunks() {
|
||||
dataNums := chunk.Data().Len()
|
||||
chunkData := make([]bool, dataNums)
|
||||
boolReader, ok := chunk.(*array.Boolean)
|
||||
if !ok {
|
||||
log.Warn("the column data in parquet is not bool", zap.String("fieldName", pcr.fieldName), zap.String("actual type", chunk.DataType().Name()))
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the column data in parquet is not bool of field: %s, but: %s", pcr.fieldName, chunk.DataType().Name()))
|
||||
}
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = boolReader.Value(i)
|
||||
data = append(data, boolReader.Value(i))
|
||||
}
|
||||
data = append(data, chunkData...)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
@ -72,42 +70,40 @@ func ReadIntegerOrFloatData[T constraints.Integer | constraints.Float](pcr *Parq
|
|||
data := make([]T, 0, count)
|
||||
for _, chunk := range chunked.Chunks() {
|
||||
dataNums := chunk.Data().Len()
|
||||
chunkData := make([]T, dataNums)
|
||||
switch chunk.DataType().ID() {
|
||||
case arrow.INT8:
|
||||
int8Reader := chunk.(*array.Int8)
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = T(int8Reader.Value(i))
|
||||
data = append(data, T(int8Reader.Value(i)))
|
||||
}
|
||||
case arrow.INT16:
|
||||
int16Reader := chunk.(*array.Int16)
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = T(int16Reader.Value(i))
|
||||
data = append(data, T(int16Reader.Value(i)))
|
||||
}
|
||||
case arrow.INT32:
|
||||
int32Reader := chunk.(*array.Int32)
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = T(int32Reader.Value(i))
|
||||
data = append(data, T(int32Reader.Value(i)))
|
||||
}
|
||||
case arrow.INT64:
|
||||
int64Reader := chunk.(*array.Int64)
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = T(int64Reader.Value(i))
|
||||
data = append(data, T(int64Reader.Value(i)))
|
||||
}
|
||||
case arrow.FLOAT32:
|
||||
float32Reader := chunk.(*array.Float32)
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = T(float32Reader.Value(i))
|
||||
data = append(data, T(float32Reader.Value(i)))
|
||||
}
|
||||
case arrow.FLOAT64:
|
||||
float64Reader := chunk.(*array.Float64)
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = T(float64Reader.Value(i))
|
||||
data = append(data, T(float64Reader.Value(i)))
|
||||
}
|
||||
default:
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the column data type is not integer, neither float, but: %s", chunk.DataType().Name()))
|
||||
}
|
||||
data = append(data, chunkData...)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
@ -120,16 +116,14 @@ func ReadStringData(pcr *ParquetColumnReader, count int64) ([]string, error) {
|
|||
data := make([]string, 0, count)
|
||||
for _, chunk := range chunked.Chunks() {
|
||||
dataNums := chunk.Data().Len()
|
||||
chunkData := make([]string, dataNums)
|
||||
stringReader, ok := chunk.(*array.String)
|
||||
if !ok {
|
||||
log.Warn("the column data in parquet is not string", zap.String("fieldName", pcr.fieldName), zap.String("actual type", chunk.DataType().Name()))
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the column data in parquet is not string of field: %s, but: %s", pcr.fieldName, chunk.DataType().Name()))
|
||||
}
|
||||
for i := 0; i < dataNums; i++ {
|
||||
chunkData[i] = stringReader.Value(i)
|
||||
data = append(data, stringReader.Value(i))
|
||||
}
|
||||
data = append(data, chunkData...)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue