milvus/tests/go_client/testcases/full_text_search_test.go

284 lines
11 KiB
Go

package testcases
import (
"testing"
"time"
"github.com/milvus-io/milvus/client/v2/entity"
"github.com/milvus-io/milvus/client/v2/index"
"github.com/milvus-io/milvus/client/v2/milvusclient"
"github.com/milvus-io/milvus/tests/go_client/common"
hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper"
)
func TestFullTextSearchDefault(t *testing.T) {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := hp.CreateDefaultMilvusClient(ctx, t)
// create -> insert -> flush -> index -> load
analyzerParams := map[string]any{"tokenizer": "standard"}
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang)
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
prepare.FlushData(ctx, t, mc, schema.CollectionName)
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
prepare.CreateIndex(ctx, t, mc, indexparams)
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
// search
queries := hp.GenFullTextQuery(common.DefaultNq, common.DefaultTextLang)
vectors := make([]entity.Vector, 0, len(queries))
for _, query := range queries {
vectors = append(vectors, entity.Text(query))
}
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, common.DefaultLimit, vectors).WithConsistencyLevel(entity.ClStrong))
common.CheckErr(t, err, true)
common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultLimit)
}
// TestSearchFullTextBase tests basic full text search functionality with different languages
func TestSearchFullTextWithDiffLang(t *testing.T) {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := hp.CreateDefaultMilvusClient(ctx, t)
// Test cases for different languages and analyzers
testCases := []struct {
name string
language string
analyzer string
query string
numRows int
topK int
}{
{
name: "English_Standard",
language: "english",
analyzer: "standard",
query: "what is information retrieval and its applications?",
numRows: 3000,
topK: 10,
},
{
name: "Chinese_Jieba",
language: "chinese",
analyzer: "jieba",
query: "信息检索的应用",
numRows: 3000,
topK: 10,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows)
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
prepare.FlushData(ctx, t, mc, schema.CollectionName)
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
prepare.CreateIndex(ctx, t, mc, indexparams)
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
// search
queries := []string{tc.query}
vectors := make([]entity.Vector, 0, len(queries))
for _, query := range queries {
vectors = append(vectors, entity.Text(query))
}
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
common.CheckErr(t, err, true)
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
})
}
}
// TestSearchFullTextWithDynamicField tests full text search with dynamic field enabled
func TestSearchFullTextWithDynamicField(t *testing.T) {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := hp.CreateDefaultMilvusClient(ctx, t)
// Test cases for different languages and analyzers
testCases := []struct {
name string
language string
analyzer string
query string
numRows int
topK int
}{
{
name: "English_Standard",
language: "english",
analyzer: "standard",
query: "what is information retrieval and its applications?",
numRows: 1000,
topK: 5,
},
{
name: "Chinese_Jieba",
language: "chinese",
analyzer: "jieba",
query: "信息检索的应用",
numRows: 1000,
topK: 5,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
schemaOption := hp.TNewSchemaOption().TWithFunction(function).TWithEnableDynamicField(true)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows)
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
prepare.FlushData(ctx, t, mc, schema.CollectionName)
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
prepare.CreateIndex(ctx, t, mc, indexparams)
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
// search
queries := []string{tc.query}
vectors := make([]entity.Vector, 0, len(queries))
for _, query := range queries {
vectors = append(vectors, entity.Text(query))
}
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
common.CheckErr(t, err, true)
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
})
}
}
// TestSearchFullTextWithPartitionKey tests full text search with partition key
func TestSearchFullTextWithPartitionKey(t *testing.T) {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := hp.CreateDefaultMilvusClient(ctx, t)
// Test cases for different languages and analyzers
testCases := []struct {
name string
language string
analyzer string
query string
numRows int
topK int
}{
{
name: "English_Standard",
language: "english",
analyzer: "standard",
query: "what is information retrieval and its applications?",
numRows: 1000,
topK: 5,
},
{
name: "Chinese_Jieba",
language: "chinese",
analyzer: "jieba",
query: "信息检索的应用",
numRows: 1000,
topK: 5,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams).TWithIsPartitionKey(true)
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows)
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
prepare.FlushData(ctx, t, mc, schema.CollectionName)
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
prepare.CreateIndex(ctx, t, mc, indexparams)
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
// search
queries := []string{tc.query}
vectors := make([]entity.Vector, 0, len(queries))
for _, query := range queries {
vectors = append(vectors, entity.Text(query))
}
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
common.CheckErr(t, err, true)
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
})
}
}
// TestSearchFullTextWithEmptyData tests full text search with empty data
func TestSearchFullTextWithEmptyData(t *testing.T) {
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
mc := hp.CreateDefaultMilvusClient(ctx, t)
// Test cases for different empty percent
testCases := []struct {
name string
language string
analyzer string
query string
numRows int
topK int
emptyPercent int
}{
{
name: "English_Standard",
language: "english",
analyzer: "standard",
query: "what is information retrieval and its applications?",
numRows: 3000,
topK: 5,
emptyPercent: 50,
},
{
name: "Chinese_Jieba",
language: "chinese",
analyzer: "jieba",
query: "信息检索的应用",
numRows: 3000,
topK: 5,
emptyPercent: 80,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams).TWithIsPartitionKey(true)
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows).TWithTextEmptyPercent(tc.emptyPercent)
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
prepare.FlushData(ctx, t, mc, schema.CollectionName)
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
prepare.CreateIndex(ctx, t, mc, indexparams)
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
// search
queries := []string{tc.query}
vectors := make([]entity.Vector, 0, len(queries))
for _, query := range queries {
vectors = append(vectors, entity.Text(query))
}
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
common.CheckErr(t, err, true)
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
})
}
}