fix: fix wrong size of arrow array for zero-copy mode (#38449)

#37767

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
pull/38479/head
zhagnlu 2024-12-15 18:14:43 +08:00 committed by GitHub
parent 01de0afc4e
commit c3edc85359
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 36 additions and 7 deletions

View File

@ -17,6 +17,7 @@
package storage
import (
"encoding/binary"
"fmt"
"io"
"math"
@ -24,12 +25,10 @@ import (
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/apache/arrow/go/v12/parquet"
"github.com/apache/arrow/go/v12/parquet/compress"
"github.com/apache/arrow/go/v12/parquet/pqarrow"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"google.golang.org/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
@ -616,12 +615,42 @@ func calculateArraySize(a arrow.Array) int {
if a == nil || a.Data() == nil || a.Data().Buffers() == nil {
return 0
}
return lo.SumBy[*memory.Buffer, int](a.Data().Buffers(), func(b *memory.Buffer) int {
if b == nil {
return 0
var totalSize int
offset := a.Data().Offset()
length := a.Len()
for i, buf := range a.Data().Buffers() {
if buf == nil {
continue
}
return b.Len()
})
switch i {
case 0:
// Handle bitmap buffer
totalSize += (length + 7) / 8
case 1:
switch a.DataType().ID() {
case arrow.STRING, arrow.BINARY:
// Handle variable-length types like STRING/BINARY
startOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[offset*4:]))
endOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[(offset+length)*4:]))
totalSize += endOffset - startOffset
case arrow.LIST:
// Handle nest types like list
for i := 0; i < length; i++ {
startOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[i*4:]))
endOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[(i+1)*4:]))
totalSize += endOffset - startOffset
}
default:
// Handle fixed-length types
elementSize := buf.Len() / a.Data().Len()
totalSize += elementSize * length
}
}
}
return totalSize
}
func newSelectiveRecord(r Record, selectedFieldId FieldID) *selectiveRecord {