mirror of https://github.com/milvus-io/milvus.git
170 lines
4.4 KiB
Go
170 lines
4.4 KiB
Go
// Copyright 2023 Zilliz
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package packed
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/apache/arrow/go/v17/arrow"
|
|
"github.com/apache/arrow/go/v17/arrow/array"
|
|
"github.com/apache/arrow/go/v17/arrow/memory"
|
|
"github.com/stretchr/testify/suite"
|
|
"golang.org/x/exp/rand"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/initcore"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
)
|
|
|
|
func TestPackedReadAndWrite(t *testing.T) {
|
|
suite.Run(t, new(PackedTestSuite))
|
|
}
|
|
|
|
type PackedTestSuite struct {
|
|
suite.Suite
|
|
schema *arrow.Schema
|
|
rec arrow.Record
|
|
localDataRootPath string
|
|
}
|
|
|
|
func (suite *PackedTestSuite) SetupSuite() {
|
|
paramtable.Init()
|
|
}
|
|
|
|
func (suite *PackedTestSuite) SetupTest() {
|
|
initcore.InitLocalArrowFileSystem("/tmp")
|
|
schema := arrow.NewSchema([]arrow.Field{
|
|
{Name: "a", Type: arrow.PrimitiveTypes.Int32},
|
|
{Name: "b", Type: arrow.PrimitiveTypes.Int64},
|
|
{Name: "c", Type: arrow.BinaryTypes.String},
|
|
}, nil)
|
|
suite.schema = schema
|
|
|
|
b := array.NewRecordBuilder(memory.DefaultAllocator, schema)
|
|
defer b.Release()
|
|
for idx := range schema.Fields() {
|
|
switch idx {
|
|
case 0:
|
|
b.Field(idx).(*array.Int32Builder).AppendValues(
|
|
[]int32{int32(1), int32(2), int32(3)}, nil,
|
|
)
|
|
case 1:
|
|
b.Field(idx).(*array.Int64Builder).AppendValues(
|
|
[]int64{int64(4), int64(5), int64(6)}, nil,
|
|
)
|
|
case 2:
|
|
b.Field(idx).(*array.StringBuilder).AppendValues(
|
|
[]string{"a", "b", "c"}, nil,
|
|
)
|
|
}
|
|
}
|
|
rec := b.NewRecord()
|
|
suite.rec = rec
|
|
}
|
|
|
|
func (suite *PackedTestSuite) TestPackedOneFile() {
|
|
batches := 100
|
|
|
|
paths := []string{"/tmp/100"}
|
|
columnGroups := [][]int{{0, 1, 2}}
|
|
bufferSize := int64(10 * 1024 * 1024) // 10MB
|
|
multiPartUploadSize := int64(0)
|
|
pw, err := NewPackedWriter(paths, suite.schema, bufferSize, multiPartUploadSize, columnGroups)
|
|
suite.NoError(err)
|
|
for i := 0; i < batches; i++ {
|
|
err = pw.WriteRecordBatch(suite.rec)
|
|
suite.NoError(err)
|
|
}
|
|
err = pw.Close()
|
|
suite.NoError(err)
|
|
|
|
reader, err := NewPackedReader(paths, suite.schema, bufferSize)
|
|
suite.NoError(err)
|
|
rr, err := reader.ReadNext()
|
|
suite.NoError(err)
|
|
defer rr.Release()
|
|
suite.Equal(int64(3*batches), rr.NumRows())
|
|
}
|
|
|
|
func (suite *PackedTestSuite) TestPackedMultiFiles() {
|
|
batches := 1000
|
|
|
|
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.schema)
|
|
strLen := 1000
|
|
arrLen := 30
|
|
defer b.Release()
|
|
for idx := range suite.schema.Fields() {
|
|
switch idx {
|
|
case 0:
|
|
values := make([]int32, arrLen)
|
|
for i := 0; i < arrLen; i++ {
|
|
values[i] = int32(i + 1)
|
|
}
|
|
b.Field(idx).(*array.Int32Builder).AppendValues(values, nil)
|
|
case 1:
|
|
values := make([]int64, arrLen)
|
|
for i := 0; i < arrLen; i++ {
|
|
values[i] = int64(i + 1)
|
|
}
|
|
b.Field(idx).(*array.Int64Builder).AppendValues(values, nil)
|
|
case 2:
|
|
values := make([]string, arrLen)
|
|
for i := 0; i < arrLen; i++ {
|
|
values[i] = randomString(strLen)
|
|
}
|
|
b.Field(idx).(*array.StringBuilder).AppendValues(values, nil)
|
|
}
|
|
}
|
|
rec := b.NewRecord()
|
|
defer rec.Release()
|
|
paths := []string{"/tmp/100", "/tmp/101"}
|
|
columnGroups := [][]int{{2}, {0, 1}}
|
|
bufferSize := int64(10 * 1024 * 1024) // 10MB
|
|
multiPartUploadSize := int64(0)
|
|
pw, err := NewPackedWriter(paths, suite.schema, bufferSize, multiPartUploadSize, columnGroups)
|
|
suite.NoError(err)
|
|
for i := 0; i < batches; i++ {
|
|
err = pw.WriteRecordBatch(rec)
|
|
suite.NoError(err)
|
|
}
|
|
err = pw.Close()
|
|
suite.NoError(err)
|
|
|
|
reader, err := NewPackedReader(paths, suite.schema, bufferSize)
|
|
suite.NoError(err)
|
|
var rows int64 = 0
|
|
var rr arrow.Record
|
|
for {
|
|
rr, err = reader.ReadNext()
|
|
suite.NoError(err)
|
|
if rr == nil {
|
|
// end of file
|
|
break
|
|
}
|
|
|
|
rows += rr.NumRows()
|
|
}
|
|
|
|
suite.Equal(int64(arrLen*batches), rows)
|
|
}
|
|
|
|
func randomString(length int) string {
|
|
const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
result := make([]byte, length)
|
|
for i := range result {
|
|
result[i] = charset[rand.Intn(len(charset))]
|
|
}
|
|
return string(result)
|
|
}
|