milvus/internal/util/function/vertexai_embedding_provider...

277 lines
8.9 KiB
Go

/*
* # Licensed to the LF AI & Data foundation under one
* # or more contributor license agreements. See the NOTICE file
* # distributed with this work for additional information
* # regarding copyright ownership. The ASF licenses this file
* # to you under the Apache License, Version 2.0 (the
* # "License"); you may not use this file except in compliance
* # with the License. You may obtain a copy of the License at
* #
* # http://www.apache.org/licenses/LICENSE-2.0
* #
* # Unless required by applicable law or agreed to in writing, software
* # distributed under the License is distributed on an "AS IS" BASIS,
* # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* # See the License for the specific language governing permissions and
* # limitations under the License.
*/
package function
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/util/function/models/vertexai"
)
func TestVertexAITextEmbeddingProvider(t *testing.T) {
suite.Run(t, new(VertexAITextEmbeddingProviderSuite))
}
type VertexAITextEmbeddingProviderSuite struct {
suite.Suite
schema *schemapb.CollectionSchema
providers []string
}
func (s *VertexAITextEmbeddingProviderSuite) SetupTest() {
s.schema = &schemapb.CollectionSchema{
Name: "test",
Fields: []*schemapb.FieldSchema{
{FieldID: 100, Name: "int64", DataType: schemapb.DataType_Int64},
{FieldID: 101, Name: "text", DataType: schemapb.DataType_VarChar},
{
FieldID: 102, Name: "vector", DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "4"},
},
},
},
}
}
func createVertexAIProvider(url string, schema *schemapb.FieldSchema) (textEmbeddingProvider, error) {
functionSchema := &schemapb.FunctionSchema{
Name: "test",
Type: schemapb.FunctionType_Unknown,
InputFieldNames: []string{"text"},
OutputFieldNames: []string{"vector"},
InputFieldIds: []int64{101},
OutputFieldIds: []int64{102},
Params: []*commonpb.KeyValuePair{
{Key: modelNameParamKey, Value: textEmbedding005},
{Key: locationParamKey, Value: "mock_local"},
{Key: projectIDParamKey, Value: "mock_id"},
{Key: taskTypeParamKey, Value: vertexAICodeRetrival},
{Key: embeddingURLParamKey, Value: url},
{Key: dimParamKey, Value: "4"},
},
}
mockClient := vertexai.NewVertexAIEmbedding(url, []byte{1, 2, 3}, "mock scope", "mock token")
return NewVertexAIEmbeddingProvider(schema, functionSchema, mockClient)
}
func (s *VertexAITextEmbeddingProviderSuite) TestEmbedding() {
ts := CreateVertexAIEmbeddingServer()
defer ts.Close()
provder, err := createVertexAIProvider(ts.URL, s.schema.Fields[2])
s.NoError(err)
{
data := []string{"sentence"}
ret, err2 := provder.CallEmbedding(data, InsertMode)
s.NoError(err2)
s.Equal(1, len(ret))
s.Equal(4, len(ret[0]))
s.Equal([]float32{0.0, 0.1, 0.2, 0.3}, ret[0])
}
{
data := []string{"sentence 1", "sentence 2", "sentence 3"}
ret, _ := provder.CallEmbedding(data, SearchMode)
s.Equal([][]float32{{0.0, 0.1, 0.2, 0.3}, {1.0, 1.1, 1.2, 1.3}, {2.0, 2.1, 2.2, 2.3}}, ret)
}
}
func (s *VertexAITextEmbeddingProviderSuite) TestEmbeddingDimNotMatch() {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var res vertexai.EmbeddingResponse
res.Predictions = append(res.Predictions, vertexai.Prediction{
Embeddings: vertexai.Embeddings{
Statistics: vertexai.Statistics{
Truncated: false,
TokenCount: 10,
},
Values: []float32{1.0, 1.0, 1.0, 1.0},
},
})
res.Predictions = append(res.Predictions, vertexai.Prediction{
Embeddings: vertexai.Embeddings{
Statistics: vertexai.Statistics{
Truncated: false,
TokenCount: 10,
},
Values: []float32{1.0, 1.0},
},
})
res.Metadata = vertexai.Metadata{
BillableCharacterCount: 100,
}
w.WriteHeader(http.StatusOK)
data, _ := json.Marshal(res)
w.Write(data)
}))
defer ts.Close()
provder, err := createVertexAIProvider(ts.URL, s.schema.Fields[2])
s.NoError(err)
// embedding dim not match
data := []string{"sentence", "sentence"}
_, err2 := provder.CallEmbedding(data, InsertMode)
s.Error(err2)
}
func (s *VertexAITextEmbeddingProviderSuite) TestEmbeddingNubmerNotMatch() {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var res vertexai.EmbeddingResponse
res.Predictions = append(res.Predictions, vertexai.Prediction{
Embeddings: vertexai.Embeddings{
Statistics: vertexai.Statistics{
Truncated: false,
TokenCount: 10,
},
Values: []float32{1.0, 1.0, 1.0, 1.0},
},
})
res.Metadata = vertexai.Metadata{
BillableCharacterCount: 100,
}
w.WriteHeader(http.StatusOK)
data, _ := json.Marshal(res)
w.Write(data)
}))
defer ts.Close()
provder, err := createVertexAIProvider(ts.URL, s.schema.Fields[2])
s.NoError(err)
// embedding dim not match
data := []string{"sentence", "sentence2"}
_, err2 := provder.CallEmbedding(data, InsertMode)
s.Error(err2)
}
func (s *VertexAITextEmbeddingProviderSuite) TestCheckVertexAITask() {
err := checkTask(textMultilingualEmbedding002, "UnkownTask")
s.Error(err)
// textMultilingualEmbedding002 not support vertexAICodeRetrival task
err = checkTask(textMultilingualEmbedding002, vertexAICodeRetrival)
s.Error(err)
err = checkTask(textEmbedding005, vertexAICodeRetrival)
s.NoError(err)
err = checkTask(textMultilingualEmbedding002, vertexAISTS)
s.NoError(err)
}
func (s *VertexAITextEmbeddingProviderSuite) TestGetVertexAIJsonKey() {
os.Setenv(vertexServiceAccountJSONEnv, "ErrorPath")
defer os.Unsetenv(vertexServiceAccountJSONEnv)
_, err := getVertexAIJsonKey()
s.Error(err)
}
func (s *VertexAITextEmbeddingProviderSuite) TestGetTaskType() {
functionSchema := &schemapb.FunctionSchema{
Name: "test",
Type: schemapb.FunctionType_Unknown,
InputFieldNames: []string{"text"},
OutputFieldNames: []string{"vector"},
InputFieldIds: []int64{101},
OutputFieldIds: []int64{102},
Params: []*commonpb.KeyValuePair{
{Key: modelNameParamKey, Value: textEmbedding005},
{Key: projectIDParamKey, Value: "mock_id"},
{Key: dimParamKey, Value: "4"},
},
}
mockClient := vertexai.NewVertexAIEmbedding("mock_url", []byte{1, 2, 3}, "mock scope", "mock token")
{
provider, err := NewVertexAIEmbeddingProvider(s.schema.Fields[2], functionSchema, mockClient)
s.NoError(err)
s.Equal(provider.getTaskType(InsertMode), "RETRIEVAL_DOCUMENT")
s.Equal(provider.getTaskType(SearchMode), "RETRIEVAL_QUERY")
}
{
functionSchema.Params = append(functionSchema.Params, &commonpb.KeyValuePair{Key: taskTypeParamKey, Value: vertexAICodeRetrival})
provider, err := NewVertexAIEmbeddingProvider(s.schema.Fields[2], functionSchema, mockClient)
s.NoError(err)
s.Equal(provider.getTaskType(InsertMode), "RETRIEVAL_DOCUMENT")
s.Equal(provider.getTaskType(SearchMode), "CODE_RETRIEVAL_QUERY")
}
{
functionSchema.Params[3] = &commonpb.KeyValuePair{Key: taskTypeParamKey, Value: vertexAISTS}
provider, err := NewVertexAIEmbeddingProvider(s.schema.Fields[2], functionSchema, mockClient)
s.NoError(err)
s.Equal(provider.getTaskType(InsertMode), "SEMANTIC_SIMILARITY")
s.Equal(provider.getTaskType(SearchMode), "SEMANTIC_SIMILARITY")
}
// invalid task
{
functionSchema.Params[3] = &commonpb.KeyValuePair{Key: taskTypeParamKey, Value: "UnkownTask"}
_, err := NewVertexAIEmbeddingProvider(s.schema.Fields[2], functionSchema, mockClient)
s.Error(err)
}
}
func (s *VertexAITextEmbeddingProviderSuite) TestCreateVertexAIEmbeddingClient() {
os.Setenv(vertexServiceAccountJSONEnv, "ErrorPath")
defer os.Unsetenv(vertexServiceAccountJSONEnv)
_, err := createVertexAIEmbeddingClient("https://mock_url.com")
s.Error(err)
}
func (s *VertexAITextEmbeddingProviderSuite) TestNewVertexAIEmbeddingProvider() {
functionSchema := &schemapb.FunctionSchema{
Name: "test",
Type: schemapb.FunctionType_Unknown,
InputFieldNames: []string{"text"},
OutputFieldNames: []string{"vector"},
InputFieldIds: []int64{101},
OutputFieldIds: []int64{102},
Params: []*commonpb.KeyValuePair{
{Key: modelNameParamKey, Value: textEmbedding005},
{Key: projectIDParamKey, Value: "mock_id"},
{Key: dimParamKey, Value: "4"},
},
}
mockClient := vertexai.NewVertexAIEmbedding("mock_url", []byte{1, 2, 3}, "mock scope", "mock token")
provider, err := NewVertexAIEmbeddingProvider(s.schema.Fields[2], functionSchema, mockClient)
s.NoError(err)
s.True(provider.MaxBatch() > 0)
s.Equal(provider.FieldDim(), int64(4))
// check model name
functionSchema.Params[0] = &commonpb.KeyValuePair{Key: modelNameParamKey, Value: "UnkownModel"}
_, err = NewVertexAIEmbeddingProvider(s.schema.Fields[2], functionSchema, mockClient)
s.Error(err)
}