milvus/internal/storage/stats_test.go

159 lines
4.2 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"encoding/json"
"fmt"
"testing"
"github.com/bits-and-blooms/bloom/v3"
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/common"
)
func TestStatsWriter_Int64PrimaryKey(t *testing.T) {
data := &Int64FieldData{
Data: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9},
}
sw := &StatsWriter{}
err := sw.GeneratePrimaryKeyStats(common.RowIDField, schemapb.DataType_Int64, data)
assert.NoError(t, err)
b := sw.GetBuffer()
sr := &StatsReader{}
sr.SetBuffer(b)
stats, err := sr.GetPrimaryKeyStats()
assert.Nil(t, err)
maxPk := &Int64PrimaryKey{
Value: 9,
}
minPk := &Int64PrimaryKey{
Value: 1,
}
assert.Equal(t, true, stats.MaxPk.EQ(maxPk))
assert.Equal(t, true, stats.MinPk.EQ(minPk))
buffer := make([]byte, 8)
for _, id := range data.Data {
common.Endian.PutUint64(buffer, uint64(id))
assert.True(t, stats.BF.Test(buffer))
}
msgs := &Int64FieldData{
Data: []int64{},
}
err = sw.GeneratePrimaryKeyStats(common.RowIDField, schemapb.DataType_Int64, msgs)
assert.Nil(t, err)
}
func TestStatsWriter_BF(t *testing.T) {
value := make([]int64, 1000000)
for i := 0; i < 1000000; i++ {
value[i] = int64(i)
}
data := &Int64FieldData{
Data: value,
}
fmt.Println(data.RowNum())
sw := &StatsWriter{}
err := sw.GeneratePrimaryKeyStats(common.RowIDField, schemapb.DataType_Int64, data)
assert.NoError(t, err)
stats := &PrimaryKeyStats{}
stats.UnmarshalJSON(sw.buffer)
buf := make([]byte, 8)
for i := 0; i < 1000000; i++ {
common.Endian.PutUint64(buf, uint64(i))
assert.True(t, stats.BF.Test(buf))
}
common.Endian.PutUint64(buf, uint64(1000001))
assert.False(t, stats.BF.Test(buf))
assert.True(t, stats.MinPk.EQ(NewInt64PrimaryKey(0)))
assert.True(t, stats.MaxPk.EQ(NewInt64PrimaryKey(999999)))
}
func TestStatsWriter_VarCharPrimaryKey(t *testing.T) {
data := &StringFieldData{
Data: []string{"bc", "ac", "abd", "cd", "milvus"},
}
sw := &StatsWriter{}
err := sw.GeneratePrimaryKeyStats(common.RowIDField, schemapb.DataType_VarChar, data)
assert.NoError(t, err)
b := sw.GetBuffer()
sr := &StatsReader{}
sr.SetBuffer(b)
stats, err := sr.GetPrimaryKeyStats()
assert.Nil(t, err)
maxPk := NewVarCharPrimaryKey("milvus")
minPk := NewVarCharPrimaryKey("abd")
assert.Equal(t, true, stats.MaxPk.EQ(maxPk))
assert.Equal(t, true, stats.MinPk.EQ(minPk))
for _, id := range data.Data {
assert.True(t, stats.BF.TestString(id))
}
msgs := &Int64FieldData{
Data: []int64{},
}
err = sw.GeneratePrimaryKeyStats(common.RowIDField, schemapb.DataType_Int64, msgs)
assert.Nil(t, err)
}
func TestStatsWriter_UpgradePrimaryKey(t *testing.T) {
data := &Int64FieldData{
Data: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9},
}
stats := &PrimaryKeyStats{
FieldID: common.RowIDField,
Min: 1,
Max: 9,
BF: bloom.NewWithEstimates(100000, 0.05),
}
b := make([]byte, 8)
for _, int64Value := range data.Data {
common.Endian.PutUint64(b, uint64(int64Value))
stats.BF.Add(b)
}
blob, err := json.Marshal(stats)
assert.Nil(t, err)
sr := &StatsReader{}
sr.SetBuffer(blob)
unmarshaledStats, err := sr.GetPrimaryKeyStats()
assert.Nil(t, err)
maxPk := &Int64PrimaryKey{
Value: 9,
}
minPk := &Int64PrimaryKey{
Value: 1,
}
assert.Equal(t, true, unmarshaledStats.MaxPk.EQ(maxPk))
assert.Equal(t, true, unmarshaledStats.MinPk.EQ(minPk))
buffer := make([]byte, 8)
for _, id := range data.Data {
common.Endian.PutUint64(buffer, uint64(id))
assert.True(t, unmarshaledStats.BF.Test(buffer))
}
}