Remove field name in query node and segCore

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
pull/4973/head^2
bigsheeper 2021-02-03 10:10:07 +08:00 committed by yefu.chen
parent 45b99c0cf3
commit 5e781b9370
17 changed files with 112 additions and 76 deletions

View File

@ -16,7 +16,6 @@
#include "knowhere/index/vector_index/VecIndex.h"
struct LoadIndexInfo {
std::string field_name;
int64_t field_id;
std::map<std::string, std::string> index_params;
milvus::knowhere::VecIndexPtr index;

View File

@ -59,11 +59,9 @@ AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* c_index_key, cons
}
CStatus
AppendFieldInfo(CLoadIndexInfo c_load_index_info, const char* c_field_name, int64_t field_id) {
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id) {
try {
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
std::string field_name(c_field_name);
load_index_info->field_name = field_name;
load_index_info->field_id = field_id;
auto status = CStatus();
@ -97,7 +95,6 @@ AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
load_index_info->index =
milvus::knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_params["index_type"], mode);
load_index_info->index->Load(*binary_set);
auto status = CStatus();
status.error_code = Success;
status.error_msg = "";

View File

@ -33,7 +33,7 @@ CStatus
AppendIndexParam(CLoadIndexInfo c_load_index_info, const char* index_key, const char* index_value);
CStatus
AppendFieldInfo(CLoadIndexInfo c_load_index_info, const char* field_name, int64_t field_id);
AppendFieldInfo(CLoadIndexInfo c_load_index_info, int64_t field_id);
CStatus
AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set);

View File

@ -781,7 +781,7 @@ TEST(CApiTest, LoadIndexInfo) {
status = AppendIndexParam(c_load_index_info, index_param_key2.data(), index_param_value2.data());
assert(status.error_code == Success);
std::string field_name = "field0";
status = AppendFieldInfo(c_load_index_info, field_name.data(), 0);
status = AppendFieldInfo(c_load_index_info, 0);
assert(status.error_code == Success);
status = AppendIndex(c_load_index_info, c_binary_set);
assert(status.error_code == Success);
@ -937,7 +937,7 @@ TEST(CApiTest, UpdateSegmentIndex_Without_Predicate) {
AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str());
AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(c_load_index_info, "fakevec", 100);
AppendFieldInfo(c_load_index_info, 100);
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
status = UpdateSegmentIndex(segment, c_load_index_info);
@ -1074,7 +1074,7 @@ TEST(CApiTest, UpdateSegmentIndex_With_float_Predicate_Range) {
AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str());
AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(c_load_index_info, "fakevec", 100);
AppendFieldInfo(c_load_index_info, 100);
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
status = UpdateSegmentIndex(segment, c_load_index_info);
@ -1211,7 +1211,7 @@ TEST(CApiTest, UpdateSegmentIndex_With_float_Predicate_Term) {
AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str());
AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(c_load_index_info, "fakevec", 100);
AppendFieldInfo(c_load_index_info, 100);
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
status = UpdateSegmentIndex(segment, c_load_index_info);
@ -1350,7 +1350,7 @@ TEST(CApiTest, UpdateSegmentIndex_With_binary_Predicate_Range) {
AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str());
AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(c_load_index_info, "fakevec", 100);
AppendFieldInfo(c_load_index_info, 100);
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
status = UpdateSegmentIndex(segment, c_load_index_info);
@ -1488,7 +1488,7 @@ TEST(CApiTest, UpdateSegmentIndex_With_binary_Predicate_Term) {
AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str());
AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(c_load_index_info, "fakevec", 100);
AppendFieldInfo(c_load_index_info, 100);
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
status = UpdateSegmentIndex(segment, c_load_index_info);
@ -1665,7 +1665,7 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str());
AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(c_load_index_info, "fakevec", 100);
AppendFieldInfo(c_load_index_info, 100);
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
auto load_index_info = (LoadIndexInfo*)c_load_index_info;

View File

@ -105,7 +105,6 @@ TEST(Sealed, without_predicate) {
auto ref_result = QueryResultToJson(qr);
LoadIndexInfo load_info;
load_info.field_name = "fakevec";
load_info.field_id = fake_id.get();
load_info.index = indexing;
load_info.index_params["metric_type"] = "L2";
@ -198,7 +197,6 @@ TEST(Sealed, with_predicate) {
auto result = indexing->Query(query_dataset, conf, nullptr);
LoadIndexInfo load_info;
load_info.field_name = "fakevec";
load_info.field_id = fake_id.get();
load_info.index = indexing;
load_info.index_params["metric_type"] = "L2";
@ -312,7 +310,6 @@ TEST(Sealed, LoadFieldData) {
LoadIndexInfo vec_info;
vec_info.field_id = fakevec_id.get();
vec_info.field_name = "fakevec";
vec_info.index = indexing;
vec_info.index_params["metric_type"] = milvus::knowhere::Metric::L2;
segment->LoadIndex(vec_info);

View File

@ -98,7 +98,7 @@ func TestGrpcService(t *testing.T) {
var binlogLock sync.Mutex
binlogPathArray := make([]string, 0, 16)
core.BuildIndexReq = func(binlog []string, typeParams []*commonpb.KeyValuePair, indexParams []*commonpb.KeyValuePair) (typeutil.UniqueID, error) {
core.BuildIndexReq = func(binlog []string, typeParams []*commonpb.KeyValuePair, indexParams []*commonpb.KeyValuePair, indexID typeutil.UniqueID, indexName string) (typeutil.UniqueID, error) {
binlogLock.Lock()
defer binlogLock.Unlock()
binlogPathArray = append(binlogPathArray, binlog...)

View File

@ -247,7 +247,7 @@ func (it *IndexBuildTask) Execute() error {
}
var indexCodec storage.IndexCodec
serializedIndexBlobs, err := indexCodec.Serialize(getStorageBlobs(indexBlobs), indexParams, it.cmd.Req.IndexName, it.cmd.Req.IndexID)
serializedIndexBlobs, err := indexCodec.Serialize(getStorageBlobs(indexBlobs), indexParams)
if err != nil {
return err
}

View File

@ -152,7 +152,7 @@ type Core struct {
GetBinlogFilePathsFromDataServiceReq func(segID typeutil.UniqueID, fieldID typeutil.UniqueID) ([]string, error)
//TODO, call index builder's client to build index, return build id
BuildIndexReq func(binlog []string, typeParams []*commonpb.KeyValuePair, indexParams []*commonpb.KeyValuePair) (typeutil.UniqueID, error)
BuildIndexReq func(binlog []string, typeParams []*commonpb.KeyValuePair, indexParams []*commonpb.KeyValuePair, indexID typeutil.UniqueID, indexName string) (typeutil.UniqueID, error)
//TODO, proxy service interface, notify proxy service to drop collection
InvalidateCollectionMetaCache func(ts typeutil.Timestamp, dbName string, collectionName string) error
@ -671,11 +671,13 @@ func (c *Core) SetDataService(s DataServiceInterface) error {
}
func (c *Core) SetIndexService(s IndexServiceInterface) error {
c.BuildIndexReq = func(binlog []string, typeParams []*commonpb.KeyValuePair, indexParams []*commonpb.KeyValuePair) (typeutil.UniqueID, error) {
c.BuildIndexReq = func(binlog []string, typeParams []*commonpb.KeyValuePair, indexParams []*commonpb.KeyValuePair, indexID typeutil.UniqueID, indexName string) (typeutil.UniqueID, error) {
rsp, err := s.BuildIndex(&indexpb.BuildIndexRequest{
DataPaths: binlog,
TypeParams: typeParams,
IndexParams: indexParams,
IndexID: indexID,
IndexName: indexName,
})
if err != nil {
return 0, err

View File

@ -628,7 +628,7 @@ func (t *CreateIndexTask) BuildIndex() error {
})
}
}
bldID, err = t.core.BuildIndexReq(binlogs, t.fieldSchema.TypeParams, t.indexParams)
bldID, err = t.core.BuildIndexReq(binlogs, t.fieldSchema.TypeParams, t.indexParams, idxID, t.indexName)
if err != nil {
return err
}

View File

@ -41,7 +41,7 @@ type collectionReplica interface {
getCollectionByID(collectionID UniqueID) (*Collection, error)
getCollectionByName(collectionName string) (*Collection, error)
hasCollection(collectionID UniqueID) bool
getVecFieldsByCollectionID(collectionID UniqueID) (map[int64]string, error)
getVecFieldsByCollectionID(collectionID UniqueID) ([]int64, error)
// partition
// Partition tags in different collections are not unique,
@ -175,7 +175,7 @@ func (colReplica *collectionReplicaImpl) hasCollection(collectionID UniqueID) bo
return false
}
func (colReplica *collectionReplicaImpl) getVecFieldsByCollectionID(collectionID UniqueID) (map[int64]string, error) {
func (colReplica *collectionReplicaImpl) getVecFieldsByCollectionID(collectionID UniqueID) ([]int64, error) {
colReplica.mu.RLock()
defer colReplica.mu.RUnlock()
@ -184,10 +184,10 @@ func (colReplica *collectionReplicaImpl) getVecFieldsByCollectionID(collectionID
return nil, err
}
vecFields := make(map[int64]string)
vecFields := make([]int64, 0)
for _, field := range col.Schema().Fields {
if field.DataType == schemapb.DataType_VECTOR_BINARY || field.DataType == schemapb.DataType_VECTOR_FLOAT {
vecFields[field.FieldID] = field.Name
vecFields = append(vecFields, field.FieldID)
}
}

View File

@ -29,7 +29,7 @@ func (s *Segment) buildIndex(collection *Collection) commonpb.Status {
return commonpb.Status{ErrorCode: commonpb.ErrorCode_SUCCESS}
}
func (s *Segment) dropIndex(fieldName string) commonpb.Status {
func (s *Segment) dropIndex(fieldID int64) commonpb.Status {
// WARN: Not support yet
return commonpb.Status{ErrorCode: commonpb.ErrorCode_SUCCESS}

View File

@ -51,10 +51,9 @@ func (li *LoadIndexInfo) appendIndexParam(indexKey string, indexValue string) er
return nil
}
func (li *LoadIndexInfo) appendFieldInfo(fieldName string, fieldID int64) error {
cFieldName := C.CString(fieldName)
func (li *LoadIndexInfo) appendFieldInfo(fieldID int64) error {
cFieldID := C.long(fieldID)
status := C.AppendFieldInfo(li.cLoadIndexInfo, cFieldName, cFieldID)
status := C.AppendFieldInfo(li.cLoadIndexInfo, cFieldID)
errorCode := status.error_code
if errorCode != 0 {

View File

@ -1,12 +1,64 @@
package querynode
import (
"strconv"
"testing"
"github.com/stretchr/testify/assert"
"github.com/zilliztech/milvus-distributed/internal/indexnode"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
)
func genIndexBinarySet() ([][]byte, error) {
const (
msgLength = 1000
DIM = 16
)
indexParams := make(map[string]string)
indexParams["index_type"] = "IVF_PQ"
indexParams["index_mode"] = "cpu"
indexParams["dim"] = "16"
indexParams["k"] = "10"
indexParams["nlist"] = "100"
indexParams["nprobe"] = "10"
indexParams["m"] = "4"
indexParams["nbits"] = "8"
indexParams["metric_type"] = "L2"
indexParams["SLICE_SIZE"] = "4"
typeParams := make(map[string]string)
typeParams["dim"] = strconv.Itoa(DIM)
var indexRowData []float32
for n := 0; n < msgLength; n++ {
for i := 0; i < DIM; i++ {
indexRowData = append(indexRowData, float32(n*i))
}
}
index, err := indexnode.NewCIndex(typeParams, indexParams)
if err != nil {
return nil, err
}
err = index.BuildFloatVecIndexWithoutIds(indexRowData)
if err != nil {
return nil, err
}
// save index to minio
binarySet, err := index.Serialize()
if err != nil {
return nil, err
}
bytesSet := make([][]byte, 0)
for i := range binarySet {
bytesSet = append(bytesSet, binarySet[i].Value)
}
return bytesSet, nil
}
func TestLoadIndexInfo(t *testing.T) {
indexParams := make([]*commonpb.KeyValuePair, 0)
indexParams = append(indexParams, &commonpb.KeyValuePair{
@ -18,19 +70,21 @@ func TestLoadIndexInfo(t *testing.T) {
Value: "cpu",
})
indexBytes := make([][]byte, 0)
indexValue := make([]byte, 10)
indexBytes = append(indexBytes, indexValue)
indexBytes, err := genIndexBinarySet()
assert.NoError(t, err)
indexPaths := make([]string, 0)
indexPaths = append(indexPaths, "index-0")
indexPaths = append(indexPaths, "IVF")
loadIndexInfo, err := newLoadIndexInfo()
assert.Nil(t, err)
for _, indexParam := range indexParams {
loadIndexInfo.appendIndexParam(indexParam.Key, indexParam.Value)
err = loadIndexInfo.appendIndexParam(indexParam.Key, indexParam.Value)
assert.NoError(t, err)
}
loadIndexInfo.appendFieldInfo("field0", 0)
loadIndexInfo.appendIndex(indexBytes, indexPaths)
err = loadIndexInfo.appendFieldInfo(0)
assert.NoError(t, err)
err = loadIndexInfo.appendIndex(indexBytes, indexPaths)
assert.NoError(t, err)
deleteLoadIndexInfo(loadIndexInfo)
}

View File

@ -47,7 +47,6 @@ type loadService struct {
type loadIndex struct {
segmentID UniqueID
fieldID int64
fieldName string
indexPaths []string
}
@ -231,7 +230,7 @@ func (s *loadService) loadIndex(indexPath []string) ([][]byte, indexParam, error
// get index params when detecting indexParamPrefix
if path.Base(p) == storage.IndexParamsFile {
indexCodec := storage.NewIndexCodec()
_, indexParams, _, _, err = indexCodec.Deserialize([]*storage.Blob{
_, indexParams, err = indexCodec.Deserialize([]*storage.Blob{
{
Key: storage.IndexParamsFile,
Value: []byte(indexPiece),
@ -262,7 +261,7 @@ func (s *loadService) updateSegmentIndex(indexParams indexParam, bytesIndex [][]
if err != nil {
return err
}
err = loadIndexInfo.appendFieldInfo(l.fieldName, l.fieldID)
err = loadIndexInfo.appendFieldInfo(l.fieldID)
if err != nil {
return err
}
@ -422,10 +421,9 @@ func (s *loadService) loadIndexImmediate(segment *Segment, indexPaths []string)
if err != nil {
return err
}
for id, name := range vecFieldIDs {
for _, id := range vecFieldIDs {
l := &loadIndex{
segmentID: segment.ID(),
fieldName: name,
fieldID: id,
indexPaths: indexPaths,
}
@ -449,10 +447,9 @@ func (s *loadService) loadIndexDelayed(collectionID, segmentID UniqueID, indexPa
if err != nil {
return err
}
for id, name := range vecFieldIDs {
for _, id := range vecFieldIDs {
l := &loadIndex{
segmentID: segmentID,
fieldName: name,
fieldID: id,
indexPaths: indexPaths,
}
@ -487,10 +484,18 @@ func (s *loadService) getInsertBinlogPaths(segmentID UniqueID) ([]*internalpb2.S
return pathResponse.Paths, pathResponse.FieldIDs, nil
}
func (s *loadService) filterOutVectorFields(fieldIDs []int64, vectorFields map[int64]string) []int64 {
func (s *loadService) filterOutVectorFields(fieldIDs []int64, vectorFields []int64) []int64 {
containsFunc := func(s []int64, e int64) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}
targetFields := make([]int64, 0)
for _, id := range fieldIDs {
if _, ok := vectorFields[id]; !ok {
if !containsFunc(vectorFields, id) {
targetFields = append(targetFields, id)
}
}

View File

@ -853,7 +853,7 @@ func generateIndex(segmentID UniqueID) ([]string, error) {
// serialize index params
var indexCodec storage.IndexCodec
serializedIndexBlobs, err := indexCodec.Serialize(binarySet, indexParams, "index_test_name", 1234)
serializedIndexBlobs, err := indexCodec.Serialize(binarySet, indexParams)
if err != nil {
return nil, err
}

View File

@ -635,16 +635,8 @@ func NewIndexCodec() *IndexCodec {
return &IndexCodec{}
}
func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string, indexName string, indexID UniqueID) ([]*Blob, error) {
paramsBytes, err := json.Marshal(struct {
Params map[string]string
IndexName string
IndexID UniqueID
}{
Params: params,
IndexName: indexName,
IndexID: indexID,
})
func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string) ([]*Blob, error) {
paramsBytes, err := json.Marshal(params)
if err != nil {
return nil, err
}
@ -652,27 +644,20 @@ func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string,
return blobs, nil
}
func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, map[string]string, string, UniqueID, error) {
var file *Blob
func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, map[string]string, error) {
var params map[string]string
for i := 0; i < len(blobs); i++ {
if blobs[i].Key != IndexParamsFile {
continue
}
file = blobs[i]
if err := json.Unmarshal(blobs[i].Value, &params); err != nil {
return nil, nil, err
}
blobs = append(blobs[:i], blobs[i+1:]...)
break
}
if file == nil {
return nil, nil, "", -1, errors.New("can not find params blob")
if params == nil {
return nil, nil, errors.New("can not find params blob")
}
info := struct {
Params map[string]string
IndexName string
IndexID UniqueID
}{}
if err := json.Unmarshal(file.Value, &info); err != nil {
return nil, nil, "", -1, errors.New("json unmarshal error: " + err.Error())
}
return blobs, info.Params, info.IndexName, info.IndexID, nil
return blobs, params, nil
}

View File

@ -310,17 +310,15 @@ func TestIndexCodec(t *testing.T) {
indexParams := map[string]string{
"k1": "v1", "k2": "v2",
}
blobsInput, err := indexCodec.Serialize(blobs, indexParams, "index_test_name", 1234)
blobsInput, err := indexCodec.Serialize(blobs, indexParams)
assert.Nil(t, err)
assert.EqualValues(t, 4, len(blobsInput))
assert.EqualValues(t, IndexParamsFile, blobsInput[3].Key)
blobsOutput, indexParamsOutput, indexName, indexID, err := indexCodec.Deserialize(blobsInput)
assert.EqualValues(t, IndexParamsFile, blobsInput[3])
blobsOutput, indexParamsOutput, err := indexCodec.Deserialize(blobsInput)
assert.Nil(t, err)
assert.EqualValues(t, 3, len(blobsOutput))
for i := 0; i < 3; i++ {
assert.EqualValues(t, blobs[i], blobsOutput[i])
}
assert.EqualValues(t, indexParams, indexParamsOutput)
assert.EqualValues(t, "index_test_name", indexName)
assert.EqualValues(t, 1234, indexID)
}