mirror of https://github.com/milvus-io/milvus.git
284 lines
11 KiB
Go
284 lines
11 KiB
Go
package testcases
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
"github.com/milvus-io/milvus/client/v2/index"
|
|
"github.com/milvus-io/milvus/client/v2/milvusclient"
|
|
"github.com/milvus-io/milvus/tests/go_client/common"
|
|
hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper"
|
|
)
|
|
|
|
func TestFullTextSearchDefault(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
analyzerParams := map[string]any{"tokenizer": "standard"}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang)
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// search
|
|
queries := hp.GenFullTextQuery(common.DefaultNq, common.DefaultTextLang)
|
|
vectors := make([]entity.Vector, 0, len(queries))
|
|
for _, query := range queries {
|
|
vectors = append(vectors, entity.Text(query))
|
|
}
|
|
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, common.DefaultLimit, vectors).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultLimit)
|
|
}
|
|
|
|
// TestSearchFullTextBase tests basic full text search functionality with different languages
|
|
func TestSearchFullTextWithDiffLang(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// Test cases for different languages and analyzers
|
|
testCases := []struct {
|
|
name string
|
|
language string
|
|
analyzer string
|
|
query string
|
|
numRows int
|
|
topK int
|
|
}{
|
|
{
|
|
name: "English_Standard",
|
|
language: "english",
|
|
analyzer: "standard",
|
|
query: "what is information retrieval and its applications?",
|
|
numRows: 3000,
|
|
topK: 10,
|
|
},
|
|
{
|
|
name: "Chinese_Jieba",
|
|
language: "chinese",
|
|
analyzer: "jieba",
|
|
query: "信息检索的应用",
|
|
numRows: 3000,
|
|
topK: 10,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows)
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// search
|
|
queries := []string{tc.query}
|
|
vectors := make([]entity.Vector, 0, len(queries))
|
|
for _, query := range queries {
|
|
vectors = append(vectors, entity.Text(query))
|
|
}
|
|
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestSearchFullTextWithDynamicField tests full text search with dynamic field enabled
|
|
func TestSearchFullTextWithDynamicField(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
// Test cases for different languages and analyzers
|
|
testCases := []struct {
|
|
name string
|
|
language string
|
|
analyzer string
|
|
query string
|
|
numRows int
|
|
topK int
|
|
}{
|
|
{
|
|
name: "English_Standard",
|
|
language: "english",
|
|
analyzer: "standard",
|
|
query: "what is information retrieval and its applications?",
|
|
numRows: 1000,
|
|
topK: 5,
|
|
},
|
|
{
|
|
name: "Chinese_Jieba",
|
|
language: "chinese",
|
|
analyzer: "jieba",
|
|
query: "信息检索的应用",
|
|
numRows: 1000,
|
|
topK: 5,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function).TWithEnableDynamicField(true)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows)
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// search
|
|
queries := []string{tc.query}
|
|
vectors := make([]entity.Vector, 0, len(queries))
|
|
for _, query := range queries {
|
|
vectors = append(vectors, entity.Text(query))
|
|
}
|
|
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestSearchFullTextWithPartitionKey tests full text search with partition key
|
|
func TestSearchFullTextWithPartitionKey(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// Test cases for different languages and analyzers
|
|
testCases := []struct {
|
|
name string
|
|
language string
|
|
analyzer string
|
|
query string
|
|
numRows int
|
|
topK int
|
|
}{
|
|
{
|
|
name: "English_Standard",
|
|
language: "english",
|
|
analyzer: "standard",
|
|
query: "what is information retrieval and its applications?",
|
|
numRows: 1000,
|
|
topK: 5,
|
|
},
|
|
{
|
|
name: "Chinese_Jieba",
|
|
language: "chinese",
|
|
analyzer: "jieba",
|
|
query: "信息检索的应用",
|
|
numRows: 1000,
|
|
topK: 5,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams).TWithIsPartitionKey(true)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows)
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// search
|
|
queries := []string{tc.query}
|
|
vectors := make([]entity.Vector, 0, len(queries))
|
|
for _, query := range queries {
|
|
vectors = append(vectors, entity.Text(query))
|
|
}
|
|
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestSearchFullTextWithEmptyData tests full text search with empty data
|
|
func TestSearchFullTextWithEmptyData(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// Test cases for different empty percent
|
|
testCases := []struct {
|
|
name string
|
|
language string
|
|
analyzer string
|
|
query string
|
|
numRows int
|
|
topK int
|
|
emptyPercent int
|
|
}{
|
|
{
|
|
name: "English_Standard",
|
|
language: "english",
|
|
analyzer: "standard",
|
|
query: "what is information retrieval and its applications?",
|
|
numRows: 3000,
|
|
topK: 5,
|
|
emptyPercent: 50,
|
|
},
|
|
{
|
|
name: "Chinese_Jieba",
|
|
language: "chinese",
|
|
analyzer: "jieba",
|
|
query: "信息检索的应用",
|
|
numRows: 3000,
|
|
topK: 5,
|
|
emptyPercent: 80,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams).TWithIsPartitionKey(true)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithNb(tc.numRows).TWithTextEmptyPercent(tc.emptyPercent)
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// search
|
|
queries := []string{tc.query}
|
|
vectors := make([]entity.Vector, 0, len(queries))
|
|
for _, query := range queries {
|
|
vectors = append(vectors, entity.Text(query))
|
|
}
|
|
resSearch, err := mc.Search(ctx, milvusclient.NewSearchOption(schema.CollectionName, tc.topK, vectors).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckSearchResult(t, resSearch, len(queries), tc.topK)
|
|
})
|
|
}
|
|
}
|