mirror of https://github.com/milvus-io/milvus.git
Fix the codec bug of cgo wrapper of index builder
Signed-off-by: dragondriver <jiquan.long@zilliz.com>pull/4973/head^2
parent
5b9e8c4f60
commit
f67aae9596
|
@ -21,9 +21,14 @@
|
|||
namespace milvus {
|
||||
namespace indexbuilder {
|
||||
|
||||
IndexWrapper::IndexWrapper(const char* serialized_type_params, const char* serialized_index_params) {
|
||||
type_params_ = std::string(serialized_type_params);
|
||||
index_params_ = std::string(serialized_index_params);
|
||||
IndexWrapper::IndexWrapper(const char* serialized_type_params,
|
||||
int64_t type_params_size,
|
||||
const char* serialized_index_params,
|
||||
int64_t index_params_size) {
|
||||
type_params_ = std::string(serialized_type_params, type_params_size);
|
||||
index_params_ = std::string(serialized_index_params, index_params_size);
|
||||
// std::cout << "type_params_.size(): " << type_params_.size() << std::endl;
|
||||
// std::cout << "index_params_.size(): " << index_params_.size() << std::endl;
|
||||
|
||||
parse();
|
||||
|
||||
|
@ -35,20 +40,21 @@ IndexWrapper::IndexWrapper(const char* serialized_type_params, const char* seria
|
|||
auto index_mode = mode.has_value() ? mode_map[mode.value()] : knowhere::IndexMode::MODE_CPU;
|
||||
|
||||
index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type, index_mode);
|
||||
Assert(index_ != nullptr);
|
||||
}
|
||||
|
||||
void
|
||||
IndexWrapper::parse() {
|
||||
namespace indexcgo = milvus::proto::indexcgo;
|
||||
bool serialized_success;
|
||||
bool deserialized_success;
|
||||
|
||||
indexcgo::TypeParams type_config;
|
||||
serialized_success = type_config.ParseFromString(type_params_);
|
||||
Assert(serialized_success);
|
||||
deserialized_success = type_config.ParseFromString(type_params_);
|
||||
Assert(deserialized_success);
|
||||
|
||||
indexcgo::IndexParams index_config;
|
||||
serialized_success = index_config.ParseFromString(index_params_);
|
||||
Assert(serialized_success);
|
||||
deserialized_success = index_config.ParseFromString(index_params_);
|
||||
Assert(deserialized_success);
|
||||
|
||||
for (auto i = 0; i < type_config.params_size(); ++i) {
|
||||
auto type_param = type_config.params(i);
|
||||
|
|
|
@ -18,7 +18,10 @@ namespace indexbuilder {
|
|||
|
||||
class IndexWrapper {
|
||||
public:
|
||||
explicit IndexWrapper(const char* serialized_type_params, const char* serialized_index_params);
|
||||
explicit IndexWrapper(const char* serialized_type_params,
|
||||
int64_t type_params_size,
|
||||
const char* serialized_index_params,
|
||||
int64_t index_params_size);
|
||||
|
||||
int64_t
|
||||
dim();
|
||||
|
|
|
@ -14,9 +14,32 @@
|
|||
#include "indexbuilder/IndexWrapper.h"
|
||||
#include "indexbuilder/index_c.h"
|
||||
|
||||
class CGODebugUtils {
|
||||
public:
|
||||
static int64_t
|
||||
Strlen(const char* str, int64_t size) {
|
||||
if (size == 0) {
|
||||
return size;
|
||||
} else {
|
||||
return strlen(str);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
CIndex
|
||||
CreateIndex(const char* serialized_type_params, const char* serialized_index_params) {
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(serialized_type_params, serialized_index_params);
|
||||
CreateIndex(const char* serialized_type_params,
|
||||
int64_t type_params_size,
|
||||
const char* serialized_index_params,
|
||||
int64_t index_params_size) {
|
||||
// std::cout << "strlen(serialized_type_params): " << CGODebugUtils::Strlen(serialized_type_params,
|
||||
// type_params_size)
|
||||
// << std::endl;
|
||||
// std::cout << "type_params_size: " << type_params_size << std::endl;
|
||||
// std::cout << "strlen(serialized_index_params): "
|
||||
// << CGODebugUtils::Strlen(serialized_index_params, index_params_size) << std::endl;
|
||||
// std::cout << "index_params_size: " << index_params_size << std::endl;
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(serialized_type_params, type_params_size,
|
||||
serialized_index_params, index_params_size);
|
||||
|
||||
return index.release();
|
||||
}
|
||||
|
|
|
@ -35,7 +35,10 @@ typedef void* CIndex;
|
|||
// Solution: using protobuf instead of json, this way significantly increase programming efficiency
|
||||
|
||||
CIndex
|
||||
CreateIndex(const char* serialized_type_params, const char* serialized_index_params);
|
||||
CreateIndex(const char* serialized_type_params,
|
||||
int64_t type_params_size,
|
||||
const char* serialized_index_params,
|
||||
int64_t index_params_size);
|
||||
|
||||
void
|
||||
DeleteIndex(CIndex index);
|
||||
|
|
|
@ -36,7 +36,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
|
|||
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
|
||||
return milvus::knowhere::Config{
|
||||
{milvus::knowhere::meta::DIM, DIM},
|
||||
{milvus::knowhere::meta::TOPK, K},
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
{milvus::knowhere::IndexParams::nlist, 100},
|
||||
// {milvus::knowhere::IndexParams::nprobe, 4},
|
||||
{milvus::knowhere::IndexParams::m, 4},
|
||||
|
@ -47,7 +47,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
|
|||
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
|
||||
return milvus::knowhere::Config{
|
||||
{milvus::knowhere::meta::DIM, DIM},
|
||||
{milvus::knowhere::meta::TOPK, K},
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
{milvus::knowhere::IndexParams::nlist, 100},
|
||||
// {milvus::knowhere::IndexParams::nprobe, 4},
|
||||
{milvus::knowhere::IndexParams::m, 4},
|
||||
|
@ -58,7 +58,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
|
|||
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) {
|
||||
return milvus::knowhere::Config{
|
||||
{milvus::knowhere::meta::DIM, DIM},
|
||||
{milvus::knowhere::meta::TOPK, K},
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
{milvus::knowhere::Metric::TYPE, metric_type},
|
||||
};
|
||||
}
|
||||
|
@ -86,14 +86,14 @@ generate_params(const milvus::knowhere::IndexType& index_type, const milvus::kno
|
|||
}
|
||||
|
||||
auto
|
||||
GenDataset(int64_t N, milvus::knowhere::MetricType metric_type, bool is_binary) {
|
||||
GenDataset(int64_t N, milvus::knowhere::MetricType metric_type, bool is_binary, int64_t dim = DIM) {
|
||||
auto schema = std::make_shared<milvus::Schema>();
|
||||
auto faiss_metric_type = milvus::knowhere::GetMetricType(metric_type);
|
||||
if (!is_binary) {
|
||||
schema->AddField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, DIM, faiss_metric_type);
|
||||
schema->AddField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, dim, faiss_metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
} else {
|
||||
schema->AddField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, DIM, faiss_metric_type);
|
||||
schema->AddField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, dim, faiss_metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
}
|
||||
}
|
||||
|
@ -205,11 +205,39 @@ TEST(PQWrapper, Build) {
|
|||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
}
|
||||
|
||||
TEST(PQCGO, Params) {
|
||||
std::vector<char> type_params;
|
||||
std::vector<char> index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109,
|
||||
101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10,
|
||||
10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95,
|
||||
80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110,
|
||||
108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52};
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params.data(), type_params.size(),
|
||||
index_params.data(), index_params.size());
|
||||
|
||||
auto dim = index->dim();
|
||||
auto dataset = GenDataset(NB, METRIC_TYPE, false, dim);
|
||||
auto xb_data = dataset.get_col<float>(0);
|
||||
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
}
|
||||
|
||||
TEST(PQCGOWrapper, Params) {
|
||||
std::vector<char> type_params;
|
||||
std::vector<char> index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109,
|
||||
101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10,
|
||||
10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95,
|
||||
80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110,
|
||||
108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52};
|
||||
auto index = CreateIndex(type_params.data(), type_params.size(), index_params.data(), index_params.size());
|
||||
DeleteIndex(index);
|
||||
}
|
||||
|
||||
TEST(BinFlatWrapper, Build) {
|
||||
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
|
||||
auto metric_type = milvus::knowhere::Metric::JACCARD;
|
||||
|
@ -227,8 +255,8 @@ TEST(BinFlatWrapper, Build) {
|
|||
std::vector<milvus::knowhere::IDType> ids(NB, 0);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data());
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
|
||||
}
|
||||
|
@ -250,8 +278,8 @@ TEST(BinIdMapWrapper, Build) {
|
|||
std::vector<milvus::knowhere::IDType> ids(NB, 0);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data());
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
|
||||
}
|
||||
|
@ -266,20 +294,20 @@ INSTANTIATE_TEST_CASE_P(IndexTypeParameters,
|
|||
milvus::knowhere::Metric::JACCARD)));
|
||||
|
||||
TEST_P(IndexWrapperTest, Constructor) {
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
}
|
||||
|
||||
TEST_P(IndexWrapperTest, Dim) {
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
|
||||
ASSERT_EQ(index->dim(), DIM);
|
||||
}
|
||||
|
||||
TEST_P(IndexWrapperTest, BuildWithoutIds) {
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
|
||||
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
|
||||
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
|
@ -289,8 +317,8 @@ TEST_P(IndexWrapperTest, BuildWithoutIds) {
|
|||
}
|
||||
|
||||
TEST_P(IndexWrapperTest, Codec) {
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
|
||||
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
|
||||
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
|
@ -300,8 +328,8 @@ TEST_P(IndexWrapperTest, Codec) {
|
|||
}
|
||||
|
||||
auto binary = index->Serialize();
|
||||
auto copy_index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
auto copy_index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
|
||||
type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
|
||||
ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size));
|
||||
ASSERT_EQ(copy_index->dim(), copy_index->dim());
|
||||
auto copy_binary = copy_index->Serialize();
|
||||
|
|
|
@ -129,11 +129,43 @@ func NewCIndex(typeParams, indexParams map[string]string) (Index, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
//print := func(param []byte) {
|
||||
// for i, c := range param {
|
||||
// fmt.Print(c)
|
||||
// fmt.Print(", ")
|
||||
// if i % 25 == 0 {
|
||||
// fmt.Println()
|
||||
// }
|
||||
// }
|
||||
// fmt.Println()
|
||||
//}
|
||||
//print(typeParamsStr)
|
||||
//fmt.Println("len(typeParamsStr): ", len(typeParamsStr))
|
||||
//print(indexParamsStr)
|
||||
//fmt.Println("len(indexParamsStr): ", len(indexParamsStr))
|
||||
|
||||
var typeParamsPointer unsafe.Pointer
|
||||
var indexParamsPointer unsafe.Pointer
|
||||
|
||||
if len(typeParamsStr) > 0 {
|
||||
typeParamsPointer = unsafe.Pointer(&typeParamsStr[0])
|
||||
} else {
|
||||
typeParamsPointer = nil
|
||||
}
|
||||
if len(indexParamsStr) > 0 {
|
||||
indexParamsPointer = unsafe.Pointer(&indexParamsStr[0])
|
||||
} else {
|
||||
indexParamsPointer = nil
|
||||
}
|
||||
|
||||
/*
|
||||
CIndex
|
||||
CreateIndex(const char* serialized_type_params, const char* serialized_index_params);
|
||||
CreateIndex(const char* serialized_type_params,
|
||||
int64_t type_params_size,
|
||||
const char* serialized_index_params
|
||||
int64_t index_params_size);
|
||||
*/
|
||||
return &CIndex{
|
||||
indexPtr: C.CreateIndex((*C.char)(unsafe.Pointer(&typeParamsStr[0])), (*C.char)(unsafe.Pointer(&indexParamsStr[0]))),
|
||||
indexPtr: C.CreateIndex((*C.char)(typeParamsPointer), (C.int64_t)(len(typeParamsStr)), (*C.char)(indexParamsPointer), (C.int64_t)(len(indexParamsStr))),
|
||||
}, nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
package indexbuilder
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
indexType = "IVF_PQ"
|
||||
dim = 8
|
||||
nlist = 100
|
||||
m = 4
|
||||
nbits = 8
|
||||
metricType = "L2"
|
||||
)
|
||||
|
||||
func TestIndex_New(t *testing.T) {
|
||||
typeParams := make(map[string]string)
|
||||
indexParams := make(map[string]string)
|
||||
indexParams["index_type"] = indexType
|
||||
indexParams["dim"] = strconv.Itoa(dim)
|
||||
indexParams["nlist"] = strconv.Itoa(nlist)
|
||||
indexParams["m"] = strconv.Itoa(m)
|
||||
indexParams["nbits"] = strconv.Itoa(nbits)
|
||||
indexParams["metric_type"] = metricType
|
||||
|
||||
index, err := NewCIndex(typeParams, indexParams)
|
||||
assert.Equal(t, err, nil)
|
||||
assert.NotEqual(t, index, nil)
|
||||
|
||||
err = index.Delete()
|
||||
assert.Equal(t, err, nil)
|
||||
}
|
Loading…
Reference in New Issue