mirror of https://github.com/milvus-io/milvus.git
fix: fix wrong size of arrow array for zero-copy mode (#38449)
#37767 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>pull/38479/head
parent
01de0afc4e
commit
c3edc85359
|
@ -17,6 +17,7 @@
|
||||||
package storage
|
package storage
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"math"
|
"math"
|
||||||
|
@ -24,12 +25,10 @@ import (
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow"
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
||||||
"github.com/apache/arrow/go/v12/parquet"
|
"github.com/apache/arrow/go/v12/parquet"
|
||||||
"github.com/apache/arrow/go/v12/parquet/compress"
|
"github.com/apache/arrow/go/v12/parquet/compress"
|
||||||
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
"github.com/samber/lo"
|
|
||||||
"google.golang.org/protobuf/proto"
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
@ -616,12 +615,42 @@ func calculateArraySize(a arrow.Array) int {
|
||||||
if a == nil || a.Data() == nil || a.Data().Buffers() == nil {
|
if a == nil || a.Data() == nil || a.Data().Buffers() == nil {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
return lo.SumBy[*memory.Buffer, int](a.Data().Buffers(), func(b *memory.Buffer) int {
|
|
||||||
if b == nil {
|
var totalSize int
|
||||||
return 0
|
offset := a.Data().Offset()
|
||||||
|
length := a.Len()
|
||||||
|
|
||||||
|
for i, buf := range a.Data().Buffers() {
|
||||||
|
if buf == nil {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
return b.Len()
|
|
||||||
})
|
switch i {
|
||||||
|
case 0:
|
||||||
|
// Handle bitmap buffer
|
||||||
|
totalSize += (length + 7) / 8
|
||||||
|
case 1:
|
||||||
|
switch a.DataType().ID() {
|
||||||
|
case arrow.STRING, arrow.BINARY:
|
||||||
|
// Handle variable-length types like STRING/BINARY
|
||||||
|
startOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[offset*4:]))
|
||||||
|
endOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[(offset+length)*4:]))
|
||||||
|
totalSize += endOffset - startOffset
|
||||||
|
case arrow.LIST:
|
||||||
|
// Handle nest types like list
|
||||||
|
for i := 0; i < length; i++ {
|
||||||
|
startOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[i*4:]))
|
||||||
|
endOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[(i+1)*4:]))
|
||||||
|
totalSize += endOffset - startOffset
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// Handle fixed-length types
|
||||||
|
elementSize := buf.Len() / a.Data().Len()
|
||||||
|
totalSize += elementSize * length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return totalSize
|
||||||
}
|
}
|
||||||
|
|
||||||
func newSelectiveRecord(r Record, selectedFieldId FieldID) *selectiveRecord {
|
func newSelectiveRecord(r Record, selectedFieldId FieldID) *selectiveRecord {
|
||||||
|
|
Loading…
Reference in New Issue