test: supplementary json expr go-sdk test cases (#39824)

issue: #33419

---------

Signed-off-by: ThreadDao <yufen.zong@zilliz.com>
pull/39579/head
ThreadDao 2025-02-12 19:04:47 +08:00 committed by GitHub
parent 53a4207f46
commit 28c2558f5d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 118 additions and 35 deletions

View File

@ -3,9 +3,11 @@ package helper
import (
"bytes"
"encoding/json"
"fmt"
"math/rand"
"slices"
"strconv"
"strings"
"go.uber.org/zap"
@ -214,10 +216,13 @@ func GenArrayColumnData(nb int, eleType entity.FieldType, option GenDataOption)
}
type JSONStruct struct {
Number int32 `json:"number,omitempty" milvus:"name:number"`
String string `json:"string,omitempty" milvus:"name:string"`
Number int32 `json:"number,omitempty" milvus:"name:number"`
String string `json:"string,omitempty" milvus:"name:string"`
Float float32 `json:"float,omitempty" milvus:"name:float"`
*BoolStruct
List []int64 `json:"list,omitempty" milvus:"name:list"`
List []int64 `json:"list,omitempty" milvus:"name:list"`
FloatArray []float64 `json:"floatArray,omitempty" milvus:"name:floatArray"`
StringArray []string `json:"stringArray,omitempty" milvus:"name:stringArray"`
}
// GenDefaultJSONData gen default column with data
@ -233,12 +238,15 @@ func GenDefaultJSONData(nb int, option GenDataOption) [][]byte {
if i < (start+nb)/2 {
if i%2 == 0 {
m = JSONStruct{
String: strconv.Itoa(i),
BoolStruct: _bool,
String: strconv.Itoa(i),
BoolStruct: _bool,
FloatArray: []float64{float64(i), float64(i), float64(i)},
StringArray: []string{fmt.Sprintf("%05d", i)},
}
} else {
m = JSONStruct{
Number: int32(i),
Float: float32(i),
String: strconv.Itoa(i),
BoolStruct: _bool,
List: []int64{int64(i), int64(i + 1)},
@ -266,6 +274,24 @@ func GenDefaultJSONData(nb int, option GenDataOption) [][]byte {
return jsonValues
}
func GenNestedJSON(depth int, value any) map[string]interface{} {
if depth == 1 {
return map[string]interface{}{"value": value}
}
return map[string]interface{}{
fmt.Sprintf("level%d", depth): GenNestedJSON(depth-1, value),
}
}
func GenNestedJSONExprKey(depth int, jsonField string) string {
var pathParts []string
for i := depth; i > 1; i-- {
pathParts = append(pathParts, fmt.Sprintf("level%d", i))
}
pathParts = append(pathParts, "value")
return fmt.Sprintf("%s['%s']", jsonField, strings.Join(pathParts, "']['"))
}
// GenColumnData GenColumnDataOption except dynamic column
func GenColumnData(nb int, fieldType entity.FieldType, option GenDataOption) column.Column {
dim := option.dim

View File

@ -17,7 +17,6 @@ import (
)
func TestIndexVectorDefault(t *testing.T) {
t.Parallel()
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout*2)
mc := createDefaultMilvusClient(ctx, t)
@ -50,7 +49,6 @@ func TestIndexVectorDefault(t *testing.T) {
}
func TestIndexVectorIP(t *testing.T) {
t.Parallel()
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout*2)
mc := createDefaultMilvusClient(ctx, t)
@ -84,7 +82,6 @@ func TestIndexVectorIP(t *testing.T) {
}
func TestIndexVectorCosine(t *testing.T) {
t.Parallel()
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout*2)
mc := createDefaultMilvusClient(ctx, t)
@ -118,7 +115,6 @@ func TestIndexVectorCosine(t *testing.T) {
}
func TestIndexAutoFloatVector(t *testing.T) {
t.Parallel()
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := createDefaultMilvusClient(ctx, t)
@ -155,7 +151,6 @@ func TestIndexAutoFloatVector(t *testing.T) {
}
func TestIndexAutoBinaryVector(t *testing.T) {
t.Parallel()
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := createDefaultMilvusClient(ctx, t)
@ -196,7 +191,6 @@ func TestIndexAutoBinaryVector(t *testing.T) {
}
func TestIndexAutoSparseVector(t *testing.T) {
t.Parallel()
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := createDefaultMilvusClient(ctx, t)
@ -758,7 +752,6 @@ func TestCreateIndexDup(t *testing.T) {
}
func TestCreateIndexSparseVectorGeneric(t *testing.T) {
t.Parallel()
idxInverted := index.NewGenericIndex(common.DefaultSparseVecFieldName, map[string]string{"drop_ratio_build": "0.2", index.MetricTypeKey: "IP", index.IndexTypeKey: "SPARSE_INVERTED_INDEX"})
idxWand := index.NewGenericIndex(common.DefaultSparseVecFieldName, map[string]string{"drop_ratio_build": "0.3", index.MetricTypeKey: "IP", index.IndexTypeKey: "SPARSE_WAND"})
@ -787,7 +780,6 @@ func TestCreateIndexSparseVectorGeneric(t *testing.T) {
}
func TestCreateIndexSparseVector(t *testing.T) {
t.Parallel()
idxInverted1 := index.NewSparseInvertedIndex(entity.IP, 0.2)
idxWand1 := index.NewSparseWANDIndex(entity.IP, 0.3)
for _, idx := range []index.Index{idxInverted1, idxWand1} {

View File

@ -1,6 +1,7 @@
package testcases
import (
"encoding/json"
"fmt"
"testing"
"time"
@ -604,6 +605,13 @@ func TestQueryCountJsonDynamicExpr(t *testing.T) {
{expr: fmt.Sprintf("%s == [1503, 1504]", common.DefaultJSONFieldName), count: 1}, // json == [1,2]
{expr: fmt.Sprintf("%s[0] > 1", common.DefaultJSONFieldName), count: 1500 / 4}, // json[0] > 1
{expr: fmt.Sprintf("%s[0][0] > 1", common.DefaultJSONFieldName), count: 0}, // json == [1,2]
// Key and value types do not match
{expr: fmt.Sprintf("%s['float'] <= 3000", common.DefaultJSONFieldName), count: common.DefaultNb / 4},
{expr: fmt.Sprintf("%s['float'] <= 3000.0", common.DefaultJSONFieldName), count: common.DefaultNb / 4},
{expr: fmt.Sprintf("%s['string'] > 0", common.DefaultJSONFieldName), count: 0},
{expr: fmt.Sprintf("%s['floatArray'][0] < 1000.0", common.DefaultJSONFieldName), count: 500},
{expr: fmt.Sprintf("%s['stringArray'][0] == '00100'", common.DefaultJSONFieldName), count: 1},
}
for _, _exprCount := range exprCounts {
@ -614,6 +622,50 @@ func TestQueryCountJsonDynamicExpr(t *testing.T) {
}
}
func TestQueryNestedJsonExpr(t *testing.T) {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := createDefaultMilvusClient(ctx, t)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64VecJSON), hp.TNewFieldsOption(), hp.TNewSchemaOption())
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
pkColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeInt64, *hp.TNewDataOption())
vecColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeFloatVector, *hp.TNewDataOption())
jsonValues := make([][]byte, 0, common.DefaultNb)
nestedDepth := 100
for i := 0; i < common.DefaultNb; i++ {
var m map[string]interface{}
if i%2 == 0 {
m = make(map[string]interface{})
} else {
m = hp.GenNestedJSON(nestedDepth, i)
}
bs, _ := json.Marshal(&m)
jsonValues = append(jsonValues, bs)
}
jsonColumn := column.NewColumnJSONBytes(common.DefaultJSONFieldName, jsonValues)
_, err := mc.Insert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName, pkColumn, vecColumn, jsonColumn))
common.CheckErr(t, err, true)
type exprCount struct {
expr string
count int64
}
exprKey := hp.GenNestedJSONExprKey(nestedDepth, common.DefaultJSONFieldName)
nestedExpr := exprKey + " < 1000 "
t.Log("https://github.com/milvus-io/milvus/issues/39822")
exprCounts := []exprCount{
//{expr: fmt.Sprintf("json_length(%s) == 0", common.DefaultJSONFieldName), count: common.DefaultNb / 2},
{expr: nestedExpr, count: 500},
}
for _, _exprCount := range exprCounts {
log.Info("TestQueryCountJsonDynamicExpr", zap.String("expr", _exprCount.expr))
countRes, _ := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithConsistencyLevel(entity.ClStrong).WithFilter(_exprCount.expr).WithOutputFields(common.QueryCountFieldName))
count, _ := countRes.Fields[0].GetAsInt64(0)
require.Equal(t, _exprCount.count, count)
}
}
// test query with all kinds of array expr
func TestQueryArrayFieldExpr(t *testing.T) {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)

View File

@ -4,6 +4,7 @@ import (
"fmt"
"math/rand"
"strconv"
"sync"
"testing"
"time"
@ -570,32 +571,44 @@ func TestSearchInvalidScannReorderK(t *testing.T) {
// test search with scann index params: with_raw_data and metrics_type [L2, IP, COSINE]
func TestSearchScannAllMetricsWithRawData(t *testing.T) {
t.Parallel()
ch := make(chan struct{}, 3)
wg := sync.WaitGroup{}
testFunc := func(withRawData bool, metricType entity.MetricType) {
defer func() {
wg.Done()
<-ch
}()
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := createDefaultMilvusClient(ctx, t)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64VecJSON),
hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
prepare.FlushData(ctx, t, mc, schema.CollectionName)
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{
common.DefaultFloatVecFieldName: index.NewSCANNIndex(metricType, 16, withRawData),
}))
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
// search and output all fields
vectors := hp.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloatVector)
resSearch, errSearch := mc.Search(ctx, client.NewSearchOption(schema.CollectionName, common.DefaultLimit, vectors).
WithConsistencyLevel(entity.ClStrong).WithOutputFields("*"))
common.CheckErr(t, errSearch, true)
common.CheckOutputFields(t, []string{
common.DefaultInt64FieldName, common.DefaultJSONFieldName,
common.DefaultFloatVecFieldName, common.DefaultDynamicFieldName,
}, resSearch[0].Fields)
common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultLimit)
}
for _, withRawData := range []bool{true, false} {
for _, metricType := range []entity.MetricType{entity.L2, entity.IP, entity.COSINE} {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := createDefaultMilvusClient(ctx, t)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64VecJSON),
hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
prepare.FlushData(ctx, t, mc, schema.CollectionName)
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{
common.DefaultFloatVecFieldName: index.NewSCANNIndex(metricType, 16, withRawData),
}))
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
// search and output all fields
vectors := hp.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloatVector)
resSearch, errSearch := mc.Search(ctx, client.NewSearchOption(schema.CollectionName, common.DefaultLimit, vectors).
WithConsistencyLevel(entity.ClStrong).WithOutputFields("*"))
common.CheckErr(t, errSearch, true)
common.CheckOutputFields(t, []string{
common.DefaultInt64FieldName, common.DefaultJSONFieldName,
common.DefaultFloatVecFieldName, common.DefaultDynamicFieldName,
}, resSearch[0].Fields)
common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultLimit)
ch <- struct{}{}
wg.Add(1)
go testFunc(withRawData, metricType)
}
}
wg.Wait()
}
// test search with valid expression