Add cgo wrapper to index builder

Signed-off-by: dragondriver <jiquan.long@zilliz.com>
pull/4973/head^2
dragondriver 2020-12-18 15:44:27 +08:00 committed by yefu.chen
parent 475d36a2c9
commit da9ea7d2b9
9 changed files with 342 additions and 0 deletions

View File

@ -205,3 +205,12 @@ install(
install(FILES ${CMAKE_BINARY_DIR}/src/segcore/libmilvus_segcore.so
DESTINATION lib)
install(
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/indexbuilder/
DESTINATION include/indexbuilder/
FILES_MATCHING PATTERN "*_c.h"
)
install(FILES ${CMAKE_BINARY_DIR}/src/indexbuilder/libmilvus_indexbuilder.so
DESTINATION lib)

View File

@ -30,3 +30,4 @@ add_subdirectory( segcore )
add_subdirectory( cache )
add_subdirectory( query )
add_subdirectory( common )
add_subdirectory( indexbuilder )

View File

@ -0,0 +1,15 @@
set(INDEXBUILDER_FILES
IndexWrapper.cpp
index_c.cpp)
add_library(milvus_indexbuilder SHARED
${INDEXBUILDER_FILES}
)
target_link_libraries(milvus_indexbuilder
tbb milvus_utils pthread knowhere log milvus_proto
dl backtrace
milvus_common
milvus_query
)

View File

@ -0,0 +1,69 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "knowhere/index/vector_index/VecIndexFactory.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "IndexWrapper.h"
namespace milvus {
namespace indexbuilder {
IndexWrapper::IndexWrapper(const char* type_params_str, const char* index_params_str) {
type_params_ = std::string(type_params_str);
index_params_ = std::string(index_params_str);
parse();
auto index_type = index_config_["index_type"].get<std::string>();
auto index_mode = index_config_["index_mode"].get<std::string>();
auto mode = index_mode == "CPU" ? knowhere::IndexMode::MODE_CPU : knowhere::IndexMode::MODE_GPU;
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type, mode);
}
void
IndexWrapper::parse() {
type_config_ = milvus::Json::parse(type_params_);
index_config_ = milvus::Json::parse(index_params_);
// TODO: parse from type_params & index_params
auto dim = 128;
config_ = knowhere::Config{
{knowhere::meta::DIM, dim}, {knowhere::IndexParams::nlist, 100},
{knowhere::IndexParams::nprobe, 4}, {knowhere::IndexParams::m, 4},
{knowhere::IndexParams::nbits, 8}, {knowhere::Metric::TYPE, knowhere::Metric::L2},
{knowhere::meta::DEVICEID, 0},
};
}
int64_t
IndexWrapper::dim() {
// TODO: get from config
return 128;
}
void
IndexWrapper::BuildWithoutIds(const knowhere::DatasetPtr& dataset) {
index_->Train(dataset, config_);
index_->AddWithoutIds(dataset, config_);
}
char*
IndexWrapper::Serialize() {
return (char*)malloc(1);
}
void
IndexWrapper::Load(const char* dumped_blob_buffer) {
return;
}
} // namespace indexbuilder
} // namespace milvus

View File

@ -0,0 +1,48 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <string>
#include "knowhere/index/vector_index/VecIndex.h"
namespace milvus {
namespace indexbuilder {
class IndexWrapper {
public:
explicit IndexWrapper(const char* type_params_str, const char* index_params_str);
int64_t
dim();
void
BuildWithoutIds(const knowhere::DatasetPtr& dataset);
char*
Serialize();
void
Load(const char* dumped_blob_buffer);
private:
void
parse();
private:
knowhere::VecIndexPtr index_ = nullptr;
std::string type_params_;
std::string index_params_;
milvus::Json type_config_;
milvus::Json index_config_;
knowhere::Config config_;
};
} // namespace indexbuilder
} // namespace milvus

View File

@ -0,0 +1,48 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <string>
#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "indexbuilder/IndexWrapper.h"
#include "indexbuilder/index_c.h"
CIndex
CreateIndex(const char* type_params_str, const char* index_params_str) {
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str, index_params_str);
return (void*)(index.release());
}
void
DeleteIndex(CIndex index) {
auto cIndex = (milvus::indexbuilder::IndexWrapper*)index;
delete cIndex;
}
void
BuildFloatVecIndex(CIndex index, int64_t row_nums, const float* vectors) {
auto cIndex = (milvus::indexbuilder::IndexWrapper*)index;
auto dim = cIndex->dim();
auto ds = milvus::knowhere::GenDataset(row_nums, dim, vectors);
cIndex->BuildWithoutIds(ds);
}
char*
SerializeToSlicedBuffer(CIndex index) {
auto cIndex = (milvus::indexbuilder::IndexWrapper*)index;
return cIndex->Serialize();
}
void
LoadFromSlicedBuffer(CIndex index, const char* dumped_blob_buffer) {
auto cIndex = (milvus::indexbuilder::IndexWrapper*)index;
cIndex->Load(dumped_blob_buffer);
}

View File

@ -0,0 +1,52 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "segcore/collection_c.h"
typedef void* CIndex;
// TODO: how could we pass map between go and c++ more efficiently?
CIndex
CreateIndex(const char* type_params_str, const char* index_params_str);
void
DeleteIndex(CIndex index);
void
BuildFloatVecIndex(CIndex index, const float* vectors);
char*
SerializeToSlicedBuffer(CIndex index);
void
LoadFromSlicedBuffer(CIndex index, const char* dumped_blob_buffer);
#ifdef __cplusplus
};
#endif

View File

@ -0,0 +1,89 @@
package indexbuilder
/*
#cgo CFLAGS: -I${SRCDIR}/../core/output/include
#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_indexbuilder -Wl,-rpath=${SRCDIR}/../core/output/lib
#include "segcore/collection_c.h"
#include "indexbuilder/index_c.h"
*/
import "C"
import (
"encoding/json"
"github.com/zilliztech/milvus-distributed/internal/errors"
)
// TODO: use storage.Blob instead later
// type Blob = storage.Blob
type Blob struct {
Key string
Value []byte
}
type Index interface {
Serialize() ([]*Blob, error)
Load([]*Blob) error
BuildFloatVecIndex(vectors []float32) error
Delete() error
}
type CIndex struct {
indexPtr C.CIndex
}
func (index *CIndex) Serialize() ([]*Blob, error) {
var cDumpedSlicedBuffer *C.char = C.SerializeToSlicedBuffer(index.indexPtr)
var dumpedSlicedBuffer string = C.GoString(cDumpedSlicedBuffer)
var data map[string]interface{}
err := json.Unmarshal([]byte(dumpedSlicedBuffer), &data)
if err != nil {
return nil, errors.New("unmarshal sliced buffer failed")
}
ret := make([]*Blob, 0)
for key, value := range data {
valueString, ok := value.(string)
if !ok {
return nil, errors.New("unexpected data type of dumped sliced buffer")
}
ret = append(ret, &Blob{key, []byte(valueString)})
}
return ret, nil
}
func (index *CIndex) Load([]*Blob) error {
return nil
}
func (index *CIndex) BuildFloatVecIndex(vectors []float32) error {
return nil
}
func (index *CIndex) Delete() error {
C.DeleteIndex(index.indexPtr)
return nil
}
func NewCIndex(typeParams, indexParams map[string]string) (Index, error) {
dumpedTypeParamsStr, err := json.Marshal(typeParams)
if err != nil {
return nil, err
}
dumpedIndexParamsStr, err := json.Marshal(indexParams)
if err != nil {
return nil, err
}
cDumpedTypeParamsStr := C.CString(string(dumpedTypeParamsStr))
cDumpedIndexParamsStr := C.CString(string(dumpedIndexParamsStr))
return &CIndex{
indexPtr: C.CreateIndex(cDumpedTypeParamsStr, cDumpedIndexParamsStr),
}, nil
}

View File

@ -540,6 +540,17 @@ func (dataDefinitionCodec *DataDefinitionCodec) Close() error {
return nil
}
//type IndexCodec struct {
// Base
// readerCloseFunc []func() error
//}
//
////func (builder *IndexBuilder) Build(fieldData FieldData, typeParams map[string]string, indexParams map[string]string) ([]*Blob, error) {}
//func (indexCodec *IndexCodec) Serialize(indexSlices []*Blob) ([]*Blob, error) {}
//
//// TODO: describe inputs and return
//func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, error) {}
type IndexCodec struct {
Base
}