Add unittest case which used in regression to index builder

Signed-off-by: dragondriver <jiquan.long@zilliz.com>
pull/4973/head^2
dragondriver 2020-12-29 16:31:03 +08:00 committed by yefu.chen
parent 1e3b7cd55e
commit 243346247b
6 changed files with 260 additions and 178 deletions

View File

@ -25,8 +25,6 @@ namespace indexbuilder {
IndexWrapper::IndexWrapper(const char* serialized_type_params, const char* serialized_index_params) {
type_params_ = std::string(serialized_type_params);
index_params_ = std::string(serialized_index_params);
// std::cout << "type_params_.size(): " << type_params_.size() << std::endl;
// std::cout << "index_params_.size(): " << index_params_.size() << std::endl;
parse();

View File

@ -28,13 +28,6 @@ class CGODebugUtils {
CIndex
CreateIndex(const char* serialized_type_params, const char* serialized_index_params) {
// std::cout << "strlen(serialized_type_params): " << CGODebugUtils::Strlen(serialized_type_params,
// type_params_size)
// << std::endl;
// std::cout << "type_params_size: " << type_params_size << std::endl;
// std::cout << "strlen(serialized_index_params): "
// << CGODebugUtils::Strlen(serialized_index_params, index_params_size) << std::endl;
// std::cout << "index_params_size: " << index_params_size << std::endl;
auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(serialized_type_params, serialized_index_params);
return index.release();

View File

@ -12,6 +12,7 @@
#include <tuple>
#include <map>
#include <gtest/gtest.h>
#include <google/protobuf/text_format.h>
#include "pb/index_cgo_msg.pb.h"
#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h"
@ -116,9 +117,9 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
is_binary = is_binary_map[index_type];
bool ok;
ok = type_params.SerializeToString(&type_params_str);
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
assert(ok);
ok = index_params.SerializeToString(&index_params_str);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, is_binary);
@ -190,99 +191,71 @@ TEST(BINIDMAP, Build) {
ASSERT_NO_THROW(index->BuildAll(xb_dataset, conf));
}
// TEST(PQWrapper, Build) {
// auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
// auto metric_type = milvus::knowhere::Metric::L2;
// indexcgo::TypeParams type_params;
// indexcgo::IndexParams index_params;
// std::tie(type_params, index_params) = generate_params(index_type, metric_type);
// std::string type_params_str, index_params_str;
// bool ok;
// ok = type_params.SerializeToString(&type_params_str);
// assert(ok);
// ok = index_params.SerializeToString(&index_params_str);
// assert(ok);
// auto dataset = GenDataset(NB, metric_type, false);
// auto xb_data = dataset.get_col<float>(0);
// auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
//}
TEST(PQWrapper, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
auto metric_type = milvus::knowhere::Metric::L2;
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, false);
auto xb_data = dataset.get_col<float>(0);
auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
}
// TEST(PQCGO, Params) {
// std::vector<char> type_params;
// std::vector<char> index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109,
// 101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10,
// 10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95,
// 80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110,
// 108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52};
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params.data(), type_params.size(),
// index_params.data(), index_params.size());
//
// auto dim = index->dim();
// auto dataset = GenDataset(NB, METRIC_TYPE, false, dim);
// auto xb_data = dataset.get_col<float>(0);
// auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
//}
TEST(BinFlatWrapper, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
auto metric_type = milvus::knowhere::Metric::JACCARD;
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, true);
auto xb_data = dataset.get_col<uint8_t>(0);
std::vector<milvus::knowhere::IDType> ids(NB, 0);
std::iota(ids.begin(), ids.end(), 0);
auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data());
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
}
// TEST(PQCGOWrapper, Params) {
// std::vector<char> type_params;
// std::vector<char> index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109,
// 101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10,
// 10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95,
// 80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110,
// 108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52};
// auto index = CreateIndex(type_params.data(), type_params.size(), index_params.data(), index_params.size());
// DeleteIndex(index);
//}
// TEST(BinFlatWrapper, Build) {
// auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
// auto metric_type = milvus::knowhere::Metric::JACCARD;
// indexcgo::TypeParams type_params;
// indexcgo::IndexParams index_params;
// std::tie(type_params, index_params) = generate_params(index_type, metric_type);
// std::string type_params_str, index_params_str;
// bool ok;
// ok = type_params.SerializeToString(&type_params_str);
// assert(ok);
// ok = index_params.SerializeToString(&index_params_str);
// assert(ok);
// auto dataset = GenDataset(NB, metric_type, true);
// auto xb_data = dataset.get_col<uint8_t>(0);
// std::vector<milvus::knowhere::IDType> ids(NB, 0);
// std::iota(ids.begin(), ids.end(), 0);
// auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data());
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
// ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
// ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
//}
// TEST(BinIdMapWrapper, Build) {
// auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP;
// auto metric_type = milvus::knowhere::Metric::JACCARD;
// indexcgo::TypeParams type_params;
// indexcgo::IndexParams index_params;
// std::tie(type_params, index_params) = generate_params(index_type, metric_type);
// std::string type_params_str, index_params_str;
// bool ok;
// ok = type_params.SerializeToString(&type_params_str);
// assert(ok);
// ok = index_params.SerializeToString(&index_params_str);
// assert(ok);
// auto dataset = GenDataset(NB, metric_type, true);
// auto xb_data = dataset.get_col<uint8_t>(0);
// std::vector<milvus::knowhere::IDType> ids(NB, 0);
// std::iota(ids.begin(), ids.end(), 0);
// auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data());
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
// ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
//}
TEST(BinIdMapWrapper, Build) {
auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP;
auto metric_type = milvus::knowhere::Metric::JACCARD;
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, true);
auto xb_data = dataset.get_col<uint8_t>(0);
std::vector<milvus::knowhere::IDType> ids(NB, 0);
std::iota(ids.begin(), ids.end(), 0);
auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data());
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
}
INSTANTIATE_TEST_CASE_P(IndexTypeParameters,
IndexWrapperTest,
@ -293,46 +266,46 @@ INSTANTIATE_TEST_CASE_P(IndexTypeParameters,
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
milvus::knowhere::Metric::JACCARD)));
// TEST_P(IndexWrapperTest, Constructor) {
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
//}
TEST_P(IndexWrapperTest, Constructor) {
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
}
// TEST_P(IndexWrapperTest, Dim) {
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
//
// ASSERT_EQ(index->dim(), DIM);
//}
TEST_P(IndexWrapperTest, Dim) {
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
// TEST_P(IndexWrapperTest, BuildWithoutIds) {
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
//
// if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
// ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
// } else {
// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
// }
//}
ASSERT_EQ(index->dim(), DIM);
}
// TEST_P(IndexWrapperTest, Codec) {
// auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
//
// if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
// ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
// ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
// } else {
// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
// }
//
// auto binary = index->Serialize();
// auto copy_index = std::make_unique<milvus::indexbuilder::IndexWrapper>(
// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size());
// ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size));
// ASSERT_EQ(copy_index->dim(), copy_index->dim());
// auto copy_binary = copy_index->Serialize();
// ASSERT_EQ(binary.size, copy_binary.size);
// ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0);
//}
TEST_P(IndexWrapperTest, BuildWithoutIds) {
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
} else {
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
}
}
TEST_P(IndexWrapperTest, Codec) {
auto index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
ASSERT_NO_THROW(index->BuildWithIds(xb_dataset));
} else {
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
}
auto binary = index->Serialize();
auto copy_index =
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size));
ASSERT_EQ(copy_index->dim(), copy_index->dim());
auto copy_binary = copy_index->Serialize();
ASSERT_EQ(binary.size, copy_binary.size);
ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0);
}

View File

@ -123,30 +123,13 @@ func NewCIndex(typeParams, indexParams map[string]string) (Index, error) {
}
indexParamsStr := proto.MarshalTextString(protoIndexParams)
//print := func(param []byte) {
// for i, c := range param {
// fmt.Print(c)
// fmt.Print(", ")
// if i % 25 == 0 {
// fmt.Println()
// }
// }
// fmt.Println()
//}
//print(typeParamsStr)
//fmt.Println("len(typeParamsStr): ", len(typeParamsStr))
//print(indexParamsStr)
//fmt.Println("len(indexParamsStr): ", len(indexParamsStr))
typeParamsPointer := C.CString(typeParamsStr)
indexParamsPointer := C.CString(indexParamsStr)
/*
CIndex
CreateIndex(const char* serialized_type_params,
int64_t type_params_size,
const char* serialized_index_params
int64_t index_params_size);
const char* serialized_index_params);
*/
return &CIndex{
indexPtr: C.CreateIndex(typeParamsPointer, indexParamsPointer),

View File

@ -2,33 +2,168 @@ package indexbuilder
import (
"github.com/stretchr/testify/assert"
"math/rand"
"strconv"
"testing"
)
const (
indexType = "IVF_PQ"
dim = 8
nlist = 100
m = 4
nbits = 8
metricType = "L2"
IvfPq = "IVF_PQ"
BinFlat = "BIN_FLAT"
dim = 8
nlist = 100
m = 4
nbits = 8
L2 = "L2"
Jaccard = "JACCARD"
nb = 8 * 10000
)
func TestIndex_New(t *testing.T) {
type testCase struct {
indexType string
metricType string
isBinary bool
}
func generateFloatVectorTestCases() []testCase {
return []testCase{
{IvfPq, L2, false},
}
}
func generateBinaryVectorTestCases() []testCase {
return []testCase{
{BinFlat, Jaccard, true},
}
}
func generateTestCases() []testCase {
return append(generateFloatVectorTestCases(), generateBinaryVectorTestCases()...)
}
func generateParams(indexType, metricType string) (map[string]string, map[string]string) {
typeParams := make(map[string]string)
indexParams := make(map[string]string)
indexParams["index_type"] = indexType
indexParams["dim"] = strconv.Itoa(dim)
indexParams["nlist"] = strconv.Itoa(nlist)
indexParams["m"] = strconv.Itoa(m)
indexParams["nbits"] = strconv.Itoa(nbits)
indexParams["metric_type"] = metricType
if indexType == IvfPq {
indexParams["dim"] = strconv.Itoa(dim)
indexParams["nlist"] = strconv.Itoa(nlist)
indexParams["m"] = strconv.Itoa(m)
indexParams["nbits"] = strconv.Itoa(nbits)
} else if indexType == BinFlat {
indexParams["dim"] = strconv.Itoa(dim)
}
index, err := NewCIndex(typeParams, indexParams)
assert.Equal(t, err, nil)
assert.NotEqual(t, index, nil)
err = index.Delete()
assert.Equal(t, err, nil)
return typeParams, indexParams
}
func generateFloatVectors() []float32 {
vectors := make([]float32, 0)
for i := 0; i < nb; i++ {
vectors = append(vectors, rand.Float32())
}
return vectors
}
func generateBinaryVectors() []byte {
vectors := make([]byte, 0)
for i := 0; i < nb/8; i++ {
vectors = append(vectors, byte(rand.Intn(8)))
}
return vectors
}
func TestCIndex_New(t *testing.T) {
for _, c := range generateTestCases() {
typeParams, indexParams := generateParams(c.indexType, c.metricType)
index, err := NewCIndex(typeParams, indexParams)
assert.Equal(t, err, nil)
assert.NotEqual(t, index, nil)
err = index.Delete()
assert.Equal(t, err, nil)
}
}
func TestCIndex_BuildFloatVecIndexWithoutIds(t *testing.T) {
for _, c := range generateFloatVectorTestCases() {
typeParams, indexParams := generateParams(c.indexType, c.metricType)
index, err := NewCIndex(typeParams, indexParams)
assert.Equal(t, err, nil)
assert.NotEqual(t, index, nil)
vectors := generateFloatVectors()
err = index.BuildFloatVecIndexWithoutIds(vectors)
assert.Equal(t, err, nil)
err = index.Delete()
assert.Equal(t, err, nil)
}
}
func TestCIndex_BuildBinaryVecIndexWithoutIds(t *testing.T) {
for _, c := range generateBinaryVectorTestCases() {
typeParams, indexParams := generateParams(c.indexType, c.metricType)
index, err := NewCIndex(typeParams, indexParams)
assert.Equal(t, err, nil)
assert.NotEqual(t, index, nil)
vectors := generateBinaryVectors()
err = index.BuildBinaryVecIndexWithoutIds(vectors)
assert.Equal(t, err, nil)
err = index.Delete()
assert.Equal(t, err, nil)
}
}
func TestCIndex_Codec(t *testing.T) {
for _, c := range generateTestCases() {
typeParams, indexParams := generateParams(c.indexType, c.metricType)
index, err := NewCIndex(typeParams, indexParams)
assert.Equal(t, err, nil)
assert.NotEqual(t, index, nil)
if !c.isBinary {
vectors := generateFloatVectors()
err = index.BuildFloatVecIndexWithoutIds(vectors)
assert.Equal(t, err, nil)
} else {
vectors := generateBinaryVectors()
err = index.BuildBinaryVecIndexWithoutIds(vectors)
assert.Equal(t, err, nil)
}
blobs, err := index.Serialize()
assert.Equal(t, err, nil)
copyIndex, err := NewCIndex(typeParams, indexParams)
err = copyIndex.Load(blobs)
assert.Equal(t, err, nil)
copyBlobs, err := copyIndex.Serialize()
assert.Equal(t, err, nil)
assert.Equal(t, len(blobs), len(copyBlobs))
// TODO: check key, value and more
err = index.Delete()
assert.Equal(t, err, nil)
}
}
func TestCIndex_Delete(t *testing.T) {
for _, c := range generateTestCases() {
typeParams, indexParams := generateParams(c.indexType, c.metricType)
index, err := NewCIndex(typeParams, indexParams)
assert.Equal(t, err, nil)
assert.NotEqual(t, index, nil)
err = index.Delete()
assert.Equal(t, err, nil)
}
}

View File

@ -130,7 +130,7 @@ func ValidateDimension(dim int64, isBinary bool) error {
}
func ValidateVectorFieldMetricType(field *schemapb.FieldSchema) error {
if (field.DataType != schemapb.DataType_VECTOR_FLOAT) && (field.DataType != schemapb.DataType_VECTOR_BINARY) {
if field.DataType != schemapb.DataType_VECTOR_FLOAT {
return nil
}
for _, params := range field.IndexParams {