milvus/internal/core/unittest/test_c_api.cpp

5306 lines
209 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <array>
#include <boost/format.hpp>
#include <chrono>
#include <iostream>
#include <memory>
#include <random>
#include <string>
#include <unordered_set>
#include "boost/container/vector.hpp"
#include "common/EasyAssert.h"
#include "common/LoadInfo.h"
#include "common/Types.h"
#include "common/type_c.h"
#include "index/IndexFactory.h"
#include "knowhere/comp/index_param.h"
#include "pb/plan.pb.h"
#include "query/ExprImpl.h"
#include "segcore/Collection.h"
#include "segcore/Reduce.h"
#include "segcore/reduce_c.h"
#include "segcore/segment_c.h"
#include "test_utils/DataGen.h"
#include "test_utils/PbHelper.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "test_utils/storage_test_utils.h"
#include "query/generated/ExecExprVisitor.h"
#include "expr/ITypeExpr.h"
#include "plan/PlanNode.h"
#include "exec/expression/Expr.h"
#include "segcore/load_index_c.h"
namespace chrono = std::chrono;
using namespace milvus;
using namespace milvus::index;
using namespace milvus::segcore;
using namespace milvus::tracer;
using namespace knowhere;
using milvus::index::VectorIndex;
using milvus::segcore::LoadIndexInfo;
namespace {
// const int DIM = 16;
const int64_t ROW_COUNT = 10 * 1000;
const int64_t BIAS = 4200;
CStatus
CSearch(CSegmentInterface c_segment,
CSearchPlan c_plan,
CPlaceholderGroup c_placeholder_group,
uint64_t timestamp,
CSearchResult* result) {
return Search(
{}, c_segment, c_plan, c_placeholder_group, timestamp, result);
}
CStatus
CRetrieve(CSegmentInterface c_segment,
CRetrievePlan c_plan,
uint64_t timestamp,
CRetrieveResult* result) {
return Retrieve(
{}, c_segment, c_plan, timestamp, result, DEFAULT_MAX_OUTPUT_SIZE);
}
const char*
get_default_schema_config() {
static std::string conf = R"(name: "default-collection"
fields: <
fieldID: 100
name: "fakevec"
data_type: FloatVector
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "metric_type"
value: "L2"
>
>
fields: <
fieldID: 101
name: "age"
data_type: Int64
is_primary_key: true
>)";
static std::string fake_conf = "";
return conf.c_str();
}
const char*
get_float16_schema_config() {
static std::string conf = R"(name: "float16-collection"
fields: <
fieldID: 100
name: "fakevec"
data_type: Float16Vector
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "metric_type"
value: "L2"
>
>
fields: <
fieldID: 101
name: "age"
data_type: Int64
is_primary_key: true
>)";
static std::string fake_conf = "";
return conf.c_str();
}
const char*
get_bfloat16_schema_config() {
static std::string conf = R"(name: "bfloat16-collection"
fields: <
fieldID: 100
name: "fakevec"
data_type: BFloat16Vector
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "metric_type"
value: "L2"
>
>
fields: <
fieldID: 101
name: "age"
data_type: Int64
is_primary_key: true
>)";
static std::string fake_conf = "";
return conf.c_str();
}
const char*
get_default_index_meta() {
static std::string conf = R"(maxIndexRowCount: 1000
index_metas: <
fieldID: 100
collectionID: 1001
index_name: "test-index"
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "index_type"
value: "IVF_FLAT"
>
index_params: <
key: "metric_type"
value: "L2"
>
index_params: <
key: "nlist"
value: "128"
>
>)";
return conf.c_str();
}
auto
generate_data(int N) {
std::vector<char> raw_data;
std::vector<uint64_t> timestamps;
std::vector<int64_t> uids;
std::default_random_engine e(42);
std::normal_distribution<> dis(0.0, 1.0);
for (int i = 0; i < N; ++i) {
uids.push_back(10 * N + i);
timestamps.push_back(0);
float vec[DIM];
for (auto& x : vec) {
x = dis(e);
}
raw_data.insert(raw_data.end(),
(const char*)std::begin(vec),
(const char*)std::end(vec));
int age = e() % 100;
raw_data.insert(raw_data.end(),
(const char*)&age,
((const char*)&age) + sizeof(age));
}
return std::make_tuple(raw_data, timestamps, uids);
}
std::string
generate_max_float_query_data(int all_nq, int max_float_nq) {
assert(max_float_nq <= all_nq);
namespace ser = milvus::proto::common;
int dim = DIM;
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::FloatVector);
for (int i = 0; i < all_nq; ++i) {
std::vector<float> vec;
if (i < max_float_nq) {
for (int d = 0; d < dim; ++d) {
vec.push_back(std::numeric_limits<float>::max());
}
} else {
for (int d = 0; d < dim; ++d) {
vec.push_back(1);
}
}
value->add_values(vec.data(), vec.size() * sizeof(float));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
std::string
generate_query_data(int nq) {
namespace ser = milvus::proto::common;
std::default_random_engine e(67);
int dim = DIM;
std::normal_distribution<double> dis(0.0, 1.0);
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::FloatVector);
for (int i = 0; i < nq; ++i) {
std::vector<float> vec;
for (int d = 0; d < dim; ++d) {
vec.push_back(dis(e));
}
value->add_values(vec.data(), vec.size() * sizeof(float));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
std::string
generate_query_data_float16(int nq) {
namespace ser = milvus::proto::common;
std::default_random_engine e(67);
int dim = DIM;
std::normal_distribution<double> dis(0.0, 1.0);
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::Float16Vector);
for (int i = 0; i < nq; ++i) {
std::vector<float16> vec;
for (int d = 0; d < dim; ++d) {
vec.push_back(float16(dis(e)));
}
value->add_values(vec.data(), vec.size() * sizeof(float16));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
std::string
generate_query_data_bfloat16(int nq) {
namespace ser = milvus::proto::common;
std::default_random_engine e(67);
int dim = DIM;
std::normal_distribution<double> dis(0.0, 1.0);
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::BFloat16Vector);
for (int i = 0; i < nq; ++i) {
std::vector<bfloat16> vec;
for (int d = 0; d < dim; ++d) {
vec.push_back(bfloat16(dis(e)));
}
value->add_values(vec.data(), vec.size() * sizeof(bfloat16));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
// 创建枚举包含schema::DataType::BinaryVectorschema::DataType::FloatVector
enum VectorType {
BinaryVector = 0,
FloatVector = 1,
Float16Vector = 2,
BFloat16Vector = 3,
};
std::string
generate_collection_schema(std::string metric_type,
int dim,
VectorType vector_type) {
namespace schema = milvus::proto::schema;
schema::CollectionSchema collection_schema;
collection_schema.set_name("collection_test");
auto vec_field_schema = collection_schema.add_fields();
vec_field_schema->set_name("fakevec");
vec_field_schema->set_fieldid(100);
if (vector_type == VectorType::BinaryVector) {
vec_field_schema->set_data_type(schema::DataType::BinaryVector);
} else if (vector_type == VectorType::Float16Vector) {
vec_field_schema->set_data_type(schema::DataType::Float16Vector);
} else if (vector_type == VectorType::BFloat16Vector) {
vec_field_schema->set_data_type(schema::DataType::BFloat16Vector);
} else {
vec_field_schema->set_data_type(schema::DataType::FloatVector);
}
auto metric_type_param = vec_field_schema->add_index_params();
metric_type_param->set_key("metric_type");
metric_type_param->set_value(metric_type);
auto dim_param = vec_field_schema->add_type_params();
dim_param->set_key("dim");
dim_param->set_value(std::to_string(dim));
auto other_field_schema = collection_schema.add_fields();
other_field_schema->set_name("counter");
other_field_schema->set_fieldid(101);
other_field_schema->set_data_type(schema::DataType::Int64);
other_field_schema->set_is_primary_key(true);
auto other_field_schema2 = collection_schema.add_fields();
other_field_schema2->set_name("doubleField");
other_field_schema2->set_fieldid(102);
other_field_schema2->set_data_type(schema::DataType::Double);
std::string schema_string;
auto marshal = google::protobuf::TextFormat::PrintToString(
collection_schema, &schema_string);
assert(marshal);
return schema_string;
}
// VecIndexPtr
// generate_index(
// void* raw_data, knowhere::Config conf, int64_t dim, int64_t topK, int64_t N, knowhere::IndexType index_type) {
// auto indexing = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
//
// auto database = knowhere::GenDataset(N, dim, raw_data);
// indexing->Train(database, conf);
// indexing->AddWithoutIds(database, conf);
// EXPECT_EQ(indexing->Count(), N);
// EXPECT_EQ(indexing->Dim(), dim);
//
// EXPECT_EQ(indexing->Count(), N);
// EXPECT_EQ(indexing->Dim(), dim);
// return indexing;
//}
//} // namespace
IndexBasePtr
generate_index(void* raw_data,
DataType field_type,
MetricType metric_type,
IndexType index_type,
int64_t dim,
int64_t N) {
auto engine_version =
knowhere::Version::GetCurrentVersion().VersionNumber();
CreateIndexInfo create_index_info{
field_type, index_type, metric_type, engine_version};
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, milvus::storage::FileManagerContext());
auto database = knowhere::GenDataSet(N, dim, raw_data);
auto build_config = generate_build_conf(index_type, metric_type);
indexing->BuildWithDataset(database, build_config);
auto vec_indexing = dynamic_cast<VectorIndex*>(indexing.get());
EXPECT_EQ(vec_indexing->Count(), N);
EXPECT_EQ(vec_indexing->GetDim(), dim);
return indexing;
}
} // namespace
TEST(CApiTest, CollectionTest) {
auto collection = NewCollection(get_default_schema_config());
DeleteCollection(collection);
}
TEST(CApiTest, LoadInfoTest) {
auto load_info = std::make_shared<LoadFieldDataInfo>();
auto c_load_info = reinterpret_cast<CLoadFieldDataInfo*>(load_info.get());
AppendLoadFieldInfo(c_load_info, 100, 100);
EnableMmap(c_load_info, 100, true);
EXPECT_TRUE(load_info->field_infos.at(100).enable_mmap);
}
TEST(CApiTest, SetIndexMetaTest) {
auto collection = NewCollection(get_default_schema_config());
milvus::proto::segcore::CollectionIndexMeta indexMeta;
indexMeta.ParseFromString(get_default_index_meta());
char buffer[indexMeta.ByteSizeLong()];
indexMeta.SerializeToArray(buffer, indexMeta.ByteSizeLong());
SetIndexMeta(collection, buffer, indexMeta.ByteSizeLong());
DeleteCollection(collection);
}
TEST(CApiTest, GetCollectionNameTest) {
auto collection = NewCollection(get_default_schema_config());
auto name = GetCollectionName(collection);
ASSERT_EQ(strcmp(name, "default-collection"), 0);
DeleteCollection(collection);
free((void*)(name));
}
TEST(CApiTest, SegmentTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
CSegmentInterface a_segment;
status = NewSegment(collection, Invalid, -1, &a_segment);
ASSERT_NE(status.error_code, Success);
DeleteCollection(collection);
DeleteSegment(segment);
free((char*)status.error_msg);
}
TEST(CApiTest, CPlan) {
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection = NewCollection(schema_string.c_str());
// const char* dsl_string = R"(
// {
// "bool": {
// "vector": {
// "fakevec": {
// "metric_type": "L2",
// "params": {
// "nprobe": 10
// },
// "query": "$0",
// "topk": 10,
// "round_decimal": 3
// }
// }
// }
// })";
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::BinaryVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
void* plan = nullptr;
auto status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
int64_t field_id = -1;
status = GetFieldID(plan, &field_id);
ASSERT_EQ(status.error_code, Success);
auto col = static_cast<Collection*>(collection);
for (auto& [target_field_id, field_meta] :
col->get_schema()->get_fields()) {
if (field_meta.is_vector()) {
ASSERT_EQ(field_id, target_field_id.get());
}
}
ASSERT_NE(field_id, -1);
DeleteSearchPlan(plan);
DeleteCollection(collection);
}
TEST(CApiTest, CApiCPlan_float16) {
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, 16, VectorType::Float16Vector);
auto collection = NewCollection(schema_string.c_str());
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::Float16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
void* plan = nullptr;
auto status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
int64_t field_id = -1;
status = GetFieldID(plan, &field_id);
ASSERT_EQ(status.error_code, Success);
auto col = static_cast<Collection*>(collection);
for (auto& [target_field_id, field_meta] :
col->get_schema()->get_fields()) {
if (field_meta.is_vector()) {
ASSERT_EQ(field_id, target_field_id.get());
}
}
ASSERT_NE(field_id, -1);
DeleteSearchPlan(plan);
DeleteCollection(collection);
}
TEST(CApiTest, CApiCPlan_bfloat16) {
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, 16, VectorType::BFloat16Vector);
auto collection = NewCollection(schema_string.c_str());
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::BFloat16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
void* plan = nullptr;
auto status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
int64_t field_id = -1;
status = GetFieldID(plan, &field_id);
ASSERT_EQ(status.error_code, Success);
auto col = static_cast<Collection*>(collection);
for (auto& [target_field_id, field_meta] :
col->get_schema()->get_fields()) {
if (field_meta.is_vector()) {
ASSERT_EQ(field_id, target_field_id.get());
}
}
ASSERT_NE(field_id, -1);
DeleteSearchPlan(plan);
DeleteCollection(collection);
}
TEST(CApiTest, InsertTest) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, DeleteTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {0, 0, 0};
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps);
ASSERT_EQ(del_res.error_code, Success);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, MultiDeleteGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto insert_data = serialize(dataset.raw_);
// insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// delete data pks = {1}
std::vector<int64_t> delete_pks = {1};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(1, dataset.timestamps_[N - 1]);
offset = 0;
auto del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks = {1}
std::vector<proto::plan::GenericValue> retrive_pks;
{
proto::plan::GenericValue value;
value.set_int64_val(1);
retrive_pks.push_back(value);
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
auto max_ts = dataset.timestamps_[N - 1] + 10;
CRetrieveResult retrieve_result;
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrieveResult(&retrieve_result);
// retrieve pks = {2}
{
proto::plan::GenericValue value;
value.set_int64_val(2);
retrive_pks.push_back(value);
}
term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 1);
DeleteRetrieveResult(&retrieve_result);
// delete pks = {2}
delete_pks = {2};
ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
delete_data = serialize(ids.get());
delete_timestamps[0]++;
offset = 0;
del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {2}
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, MultiDeleteSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto segment_interface = reinterpret_cast<SegmentInterface*>(segment);
auto sealed_segment = dynamic_cast<SegmentSealed*>(segment_interface);
SealedLoadFieldData(dataset, *sealed_segment);
// delete data pks = {1}
std::vector<int64_t> delete_pks = {1};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(1, dataset.timestamps_[N - 1]);
auto offset = 0;
auto del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks = {1}
std::vector<proto::plan::GenericValue> retrive_pks;
{
proto::plan::GenericValue value;
value.set_int64_val(1);
retrive_pks.push_back(value);
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
auto max_ts = dataset.timestamps_[N - 1] + 10;
CRetrieveResult retrieve_result;
auto res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrieveResult(&retrieve_result);
// retrieve pks = {2}
{
proto::plan::GenericValue value;
value.set_int64_val(2);
retrive_pks.push_back(value);
}
term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 1);
DeleteRetrieveResult(&retrieve_result);
// delete pks = {2}
delete_pks = {2};
ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
delete_data = serialize(ids.get());
delete_timestamps[0]++;
offset = 0;
del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {2}
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto insert_data = serialize(dataset.raw_);
// first insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// second insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
PreInsert(segment, N, &offset);
res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// create retrieve plan pks in {1, 2, 3}
std::vector<proto::plan::GenericValue> retrive_row_ids;
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 6);
DeleteRetrieveResult(&retrieve_result);
// delete data pks = {1, 2, 3}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {1, 2, 3}
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
query_result = std::make_unique<proto::segcore::RetrieveResults>();
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 20;
auto dataset = DataGen(col->get_schema(), N, 42, 0, 2);
auto segment_interface = reinterpret_cast<SegmentInterface*>(segment);
auto sealed_segment = dynamic_cast<SegmentSealed*>(segment_interface);
SealedLoadFieldData(dataset, *sealed_segment);
std::vector<proto::plan::GenericValue> retrive_row_ids;
// create retrieve plan pks in {1, 2, 3}
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
auto res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 6);
DeleteRetrieveResult(&retrieve_result);
// delete data pks = {1, 2, 3}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {1, 2, 3}
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
query_result = std::make_unique<proto::segcore::RetrieveResults>();
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, InsertSamePkAfterDeleteOnGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto insert_data = serialize(dataset.raw_);
// first insert data
// insert data with pks = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} , timestamps = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// delete data pks = {1, 2, 3}, timestamps = {9, 9, 9}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// create retrieve plan pks in {1, 2, 3}, timestamp = 9
std::vector<proto::plan::GenericValue> retrive_row_ids;
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrieveResult(&retrieve_result);
// second insert data
// insert data with pks = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} , timestamps = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}
dataset = DataGen(col->get_schema(), N, 42, N);
insert_data = serialize(dataset.raw_);
PreInsert(segment, N, &offset);
res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// retrieve pks in {1, 2, 3}, timestamp = 19
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
query_result = std::make_unique<proto::segcore::RetrieveResults>();
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 3);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, InsertSamePkAfterDeleteOnSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N, 42, 0, 2);
// insert data with pks = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4} , timestamps = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
auto segment_interface = reinterpret_cast<SegmentInterface*>(segment);
auto sealed_segment = dynamic_cast<SegmentSealed*>(segment_interface);
SealedLoadFieldData(dataset, *sealed_segment);
// delete data pks = {1, 2, 3}, timestamps = {4, 4, 4}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[4]);
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// create retrieve plan pks in {1, 2, 3}, timestamp = 9
std::vector<proto::plan::GenericValue> retrive_row_ids;
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
auto res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 4);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SearchTest) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
CSearchResult search_result2;
auto res2 =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result2);
ASSERT_EQ(res2.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteSearchResult(search_result2);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, SearchTestWithExpr) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
c_collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
CSearchResult search_result;
auto res = CSearch(segment,
plan,
placeholderGroup,
dataset.timestamps_[0],
&search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RetrieveTestWithExpr) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
// create retrieve plan "age in [0]"
std::vector<proto::plan::GenericValue> values;
{
for (auto v : {1, 0}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
values.push_back(val);
}
}
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
auto res = CRetrieve(
segment, plan.get(), dataset.timestamps_[0], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetMemoryUsageInBytesTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto old_memory_usage_size = GetMemoryUsageInBytes(segment);
// std::cout << "old_memory_usage_size = " << old_memory_usage_size << std::endl;
ASSERT_EQ(old_memory_usage_size, 0);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetDeletedCountTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {0, 0, 0};
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps);
ASSERT_EQ(del_res.error_code, Success);
// TODO: assert(deleted_count == len(delete_row_ids))
auto deleted_count = GetDeletedCount(segment);
ASSERT_EQ(deleted_count, 0);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetRowCountTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
auto row_count = GetRowCount(segment);
ASSERT_EQ(row_count, N);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetRealCount) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
auto pks = dataset.get_col<int64_t>(schema->get_primary_field_id().value());
std::vector<int64_t> delete_row_ids(pks.begin(), pks.begin() + 3);
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {dataset.timestamps_[N - 1] + 1,
dataset.timestamps_[N - 1] + 2,
dataset.timestamps_[N - 1] + 3};
auto del_offset = 0;
auto del_res = Delete(segment,
del_offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps);
ASSERT_EQ(del_res.error_code, Success);
auto real_count = GetRealCount(segment);
ASSERT_EQ(real_count, N - delete_row_ids.size());
DeleteCollection(collection);
DeleteSegment(segment);
}
void
CheckSearchResultDuplicate(const std::vector<CSearchResult>& results) {
auto nq = ((SearchResult*)results[0])->total_nq_;
std::unordered_set<PkType> pk_set;
for (int qi = 0; qi < nq; qi++) {
pk_set.clear();
for (size_t i = 0; i < results.size(); i++) {
auto search_result = (SearchResult*)results[i];
ASSERT_EQ(nq, search_result->total_nq_);
auto topk_beg = search_result->topk_per_nq_prefix_sum_[qi];
auto topk_end = search_result->topk_per_nq_prefix_sum_[qi + 1];
for (size_t ki = topk_beg; ki < topk_end; ki++) {
ASSERT_NE(search_result->seg_offsets_[ki], INVALID_SEG_OFFSET);
auto ret = pk_set.insert(search_result->primary_keys_[ki]);
ASSERT_TRUE(ret.second);
}
}
}
}
TEST(CApiTest, ReduceNullResult) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
int num_queries = 10;
auto blob = generate_max_float_query_data(num_queries, num_queries / 2);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
{
auto slice_nqs = std::vector<int64_t>{10};
auto slice_topKs = std::vector<int64_t>{1};
std::vector<CSearchResult> results;
CSearchResult res;
status = CSearch(segment, plan, placeholderGroup, 1L << 63, &res);
ASSERT_EQ(status.error_code, Success);
results.push_back(res);
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
auto search_result = (SearchResult*)results[0];
auto size = search_result->result_offsets_.size();
EXPECT_EQ(size, num_queries / 2);
DeleteSearchResult(res);
DeleteSearchResultDataBlobs(cSearchResultData);
}
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, ReduceRemoveDuplicates) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
int num_queries = 10;
int topK = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
{
auto slice_nqs = std::vector<int64_t>{num_queries / 2, num_queries / 2};
auto slice_topKs = std::vector<int64_t>{topK / 2, topK};
std::vector<CSearchResult> results;
CSearchResult res1, res2;
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res1);
ASSERT_EQ(status.error_code, Success);
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res2);
ASSERT_EQ(status.error_code, Success);
results.push_back(res1);
results.push_back(res2);
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
// TODO:: insert no duplicate pks and check reduce results
CheckSearchResultDuplicate(results);
DeleteSearchResult(res1);
DeleteSearchResult(res2);
DeleteSearchResultDataBlobs(cSearchResultData);
}
{
int nq1 = num_queries / 3;
int nq2 = num_queries / 3;
int nq3 = num_queries - nq1 - nq2;
auto slice_nqs = std::vector<int64_t>{nq1, nq2, nq3};
auto slice_topKs = std::vector<int64_t>{topK / 2, topK, topK};
std::vector<CSearchResult> results;
CSearchResult res1, res2, res3;
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res1);
ASSERT_EQ(status.error_code, Success);
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res2);
ASSERT_EQ(status.error_code, Success);
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res3);
ASSERT_EQ(status.error_code, Success);
results.push_back(res1);
results.push_back(res2);
results.push_back(res3);
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
// TODO:: insert no duplicate pks and check reduce results
CheckSearchResultDuplicate(results);
DeleteSearchResult(res1);
DeleteSearchResult(res2);
DeleteSearchResult(res3);
DeleteSearchResultDataBlobs(cSearchResultData);
}
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteCollection(collection);
DeleteSegment(segment);
}
template <typename VecType = float>
void
testReduceSearchWithExpr(int N,
int topK,
int num_queries,
bool filter_all = false) {
std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", "
<< num_queries << ")" << std::endl;
std::function<const char*()> schema_fun;
std::function<std::string(int)> query_gen_fun;
if constexpr (std::is_same_v<VecType, float>) {
schema_fun = get_default_schema_config;
query_gen_fun = generate_query_data;
} else if constexpr (std::is_same_v<VecType, float16>) {
schema_fun = get_float16_schema_config;
query_gen_fun = generate_query_data_float16;
} else if constexpr (std::is_same_v<VecType, bfloat16>) {
schema_fun = get_bfloat16_schema_config;
query_gen_fun = generate_query_data_bfloat16;
}
auto collection = NewCollection(schema_fun());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
auto fmt = boost::format(R"(vector_anns: <
field_id: 100
query_info: <
topk: %1%
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0">
output_field_ids: 100)") %
topK;
// construct the predicate that filter out all data
if (filter_all) {
fmt = boost::format(R"(vector_anns: <
field_id: 100
predicates: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterThan
value: <
int64_val: %2%
>
>
>
query_info: <
topk: %1%
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0">
output_field_ids: 100)") %
topK % N;
}
auto serialized_expr_plan = fmt.str();
auto blob = query_gen_fun(num_queries);
void* plan = nullptr;
auto binary_plan =
translate_text_plan_to_binary_plan(serialized_expr_plan.data());
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
std::vector<CSearchResult> results;
CSearchResult res1;
CSearchResult res2;
auto res = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[N - 1], &res1);
ASSERT_EQ(res.error_code, Success);
res = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[N - 1], &res2);
ASSERT_EQ(res.error_code, Success);
results.push_back(res1);
results.push_back(res2);
auto slice_nqs = std::vector<int64_t>{num_queries / 2, num_queries / 2};
if (num_queries == 1) {
slice_nqs = std::vector<int64_t>{num_queries};
}
auto slice_topKs = std::vector<int64_t>{topK / 2, topK};
if (topK == 1) {
slice_topKs = std::vector<int64_t>{topK, topK};
}
// 1. reduce
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
auto search_result_data_blobs =
reinterpret_cast<milvus::segcore::SearchResultDataBlobs*>(
cSearchResultData);
// check result
for (size_t i = 0; i < slice_nqs.size(); i++) {
milvus::proto::schema::SearchResultData search_result_data;
auto suc = search_result_data.ParseFromArray(
search_result_data_blobs->blobs[i].data(),
search_result_data_blobs->blobs[i].size());
ASSERT_TRUE(suc);
ASSERT_EQ(search_result_data.num_queries(), slice_nqs[i]);
ASSERT_EQ(search_result_data.top_k(), slice_topKs[i]);
ASSERT_EQ(search_result_data.ids().int_id().data_size(),
search_result_data.topks().at(0) * slice_nqs[i]);
ASSERT_EQ(search_result_data.scores().size(),
search_result_data.topks().at(0) * slice_nqs[i]);
// check real topks
ASSERT_EQ(search_result_data.topks().size(), slice_nqs[i]);
for (auto real_topk : search_result_data.topks()) {
ASSERT_LE(real_topk, slice_topKs[i]);
if (filter_all) {
ASSERT_EQ(real_topk, 0);
}
}
}
DeleteSearchResultDataBlobs(cSearchResultData);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(res1);
DeleteSearchResult(res2);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, ReduceSearchWithExpr) {
//float32
testReduceSearchWithExpr(2, 1, 1);
testReduceSearchWithExpr(2, 10, 10);
testReduceSearchWithExpr(100, 1, 1);
testReduceSearchWithExpr(100, 10, 10);
testReduceSearchWithExpr(10000, 1, 1);
testReduceSearchWithExpr(10000, 10, 10);
//float16
testReduceSearchWithExpr(2, 10, 10, false);
testReduceSearchWithExpr(100, 10, 10, false);
//bfloat16
testReduceSearchWithExpr(2, 10, 10, false);
testReduceSearchWithExpr(100, 10, 10, false);
}
TEST(CApiTest, ReduceSearchWithExprFilterAll) {
//float32
testReduceSearchWithExpr(2, 1, 1, true);
testReduceSearchWithExpr(2, 10, 10, true);
//float16
testReduceSearchWithExpr(2, 1, 1, true);
//bfloat16
testReduceSearchWithExpr(2, 1, 1, true);
}
TEST(CApiTest, LoadIndexInfo) {
// generator index
constexpr auto TOPK = 10;
auto N = 1024 * 10;
auto [raw_data, timestamps, uids] = generate_data(N);
auto get_index_obj = knowhere::IndexFactory::Instance().Create<float>(
knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
knowhere::Version::GetCurrentVersion().VersionNumber());
auto indexing = get_index_obj.value();
auto conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4}};
auto database = knowhere::GenDataSet(N, DIM, raw_data.data());
indexing.Train(*database, conf);
indexing.Add(*database, conf);
EXPECT_EQ(indexing.Count(), N);
EXPECT_EQ(indexing.Dim(), DIM);
knowhere::BinarySet binary_set;
indexing.Serialize(binary_set);
CBinarySet c_binary_set = (CBinarySet)&binary_set;
void* c_load_index_info = nullptr;
auto status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_param_key1 = "index_type";
std::string index_param_value1 = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8;
status = AppendIndexParam(
c_load_index_info, index_param_key1.data(), index_param_value1.data());
std::string index_param_key2 = knowhere::meta::METRIC_TYPE;
std::string index_param_value2 = knowhere::metric::L2;
status = AppendIndexParam(
c_load_index_info, index_param_key2.data(), index_param_value2.data());
ASSERT_EQ(status.error_code, Success);
std::string field_name = "field0";
status = AppendFieldInfo(
c_load_index_info, 0, 0, 0, 0, CDataType::FloatVector, false, "");
ASSERT_EQ(status.error_code, Success);
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
status = AppendIndex(c_load_index_info, c_binary_set);
ASSERT_EQ(status.error_code, Success);
DeleteLoadIndexInfo(c_load_index_info);
}
TEST(CApiTest, LoadIndexSearch) {
// generator index
constexpr auto TOPK = 10;
auto N = 1024 * 10;
auto num_query = 100;
auto [raw_data, timestamps, uids] = generate_data(N);
auto get_index_obj = knowhere::IndexFactory::Instance().Create<float>(
knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
knowhere::Version::GetCurrentVersion().VersionNumber());
auto indexing = get_index_obj.value();
auto conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4}};
auto database = knowhere::GenDataSet(N, DIM, raw_data.data());
indexing.Train(*database, conf);
indexing.Add(*database, conf);
EXPECT_EQ(indexing.Count(), N);
EXPECT_EQ(indexing.Dim(), DIM);
// serializ index to binarySet
knowhere::BinarySet binary_set;
indexing.Serialize(binary_set);
// fill loadIndexInfo
milvus::segcore::LoadIndexInfo load_index_info;
auto& index_params = load_index_info.index_params;
index_params["index_type"] = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8;
load_index_info.index = std::make_unique<VectorMemIndex<float>>(
index_params["index_type"],
knowhere::metric::L2,
knowhere::Version::GetCurrentVersion().VersionNumber());
load_index_info.index->Load(binary_set);
// search
auto query_dataset =
knowhere::GenDataSet(num_query, DIM, raw_data.data() + BIAS * DIM);
auto result = indexing.Search(*query_dataset, conf, nullptr);
}
TEST(CApiTest, Indexing_Without_Predicate) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(5);
query_info->set_round_decimal(-1);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestmap = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestmap,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestmap,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_Without_Predicate) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_float_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = 1000 * 10;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
{
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
}
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_float_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = 1000 * 10;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(
vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_TRUE(res_before_load_index.error_code == Success)
<< res_before_load_index.error_msg;
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 5;
int topK = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
std::vector<CSearchResult> results;
results.push_back(c_search_result_on_bigIndex);
auto slice_nqs = std::vector<int64_t>{num_queries};
auto slice_topKs = std::vector<int64_t>{topK};
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
// status = ReduceSearchResultsAndFillData(plan, results.data(), results.size());
// ASSERT_EQ(status.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
ASSERT_EQ(search_result_on_bigIndex->topk_per_nq_prefix_sum_.size(),
search_result_on_bigIndex->total_nq_ + 1);
auto offset = search_result_on_bigIndex->topk_per_nq_prefix_sum_[i];
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[i * TOPK]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
DeleteSearchResultDataBlobs(cSearchResultData);
}
TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
int topK = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
std::vector<CSearchResult> results;
results.push_back(c_search_result_on_bigIndex);
auto slice_nqs = std::vector<int64_t>{num_queries};
auto slice_topKs = std::vector<int64_t>{topK};
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
ASSERT_EQ(search_result_on_bigIndex->topk_per_nq_prefix_sum_.size(),
search_result_on_bigIndex->total_nq_ + 1);
auto offset = search_result_on_bigIndex->topk_per_nq_prefix_sum_[i];
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[i * TOPK]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
DeleteSearchResultDataBlobs(cSearchResultData);
}
TEST(CApiTest, SealedSegmentTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
int N = 1000;
std::default_random_engine e(67);
auto ages = std::vector<int64_t>(N);
for (auto& age : ages) {
age = e() % 2000;
}
auto res = LoadFieldRawData(segment, 101, ages.data(), N);
ASSERT_EQ(res.error_code, Success);
auto count = GetRowCount(segment);
ASSERT_EQ(count, N);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
SearchInfo search_info;
search_info.topk_ = TOPK;
search_info.metric_type_ = knowhere::metric::L2;
search_info.search_params_ = generate_search_conf(
IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::metric::L2);
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
EXPECT_EQ(result_on_index.distances_.size(), num_queries * TOPK);
status = LoadFieldRawData(segment, 101, counter_col.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 0, dataset.row_ids_.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 1, dataset.timestamps_.data(), N);
ASSERT_EQ(status.error_code, Success);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_search_without_predicates) {
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
uint64_t ts_offset = 1000;
auto dataset = DataGen(schema, N, ts_offset);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
auto vec_array = dataset.get_col(FieldId(100));
auto vec_data = serialize(vec_array.get());
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
status = LoadFieldRawData(segment, 100, vec_data.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 101, counter_col.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 0, dataset.row_ids_.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 1, dataset.timestamps_.data(), N);
ASSERT_EQ(status.error_code, Success);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, N + ts_offset, &search_result);
std::cout << res.error_msg << std::endl;
ASSERT_EQ(res.error_code, Success);
CSearchResult search_result2;
auto res2 = CSearch(
segment, plan, placeholderGroup, N + ts_offset, &search_result2);
ASSERT_EQ(res2.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteSearchResult(search_result2);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load vec index
status = UpdateSealedSegmentIndex(segment, c_load_index_info);
ASSERT_EQ(status.error_code, Success);
// load raw data
status = LoadFieldRawData(segment, 101, counter_col.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 0, dataset.row_ids_.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 1, dataset.timestamps_.data(), N);
ASSERT_EQ(status.error_code, Success);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_Update_Field_Size) {
auto schema = std::make_shared<Schema>();
auto str_fid = schema->AddDebugField("string", DataType::VARCHAR);
auto vec_fid = schema->AddDebugField(
"vector_float", DataType::VECTOR_FLOAT, DIM, "L2");
schema->set_primary_field_id(str_fid);
auto segment = CreateSealedSegment(schema).release();
auto N = ROW_COUNT;
int row_size = 10;
// update row_size =10 with n rows
auto status =
UpdateFieldRawDataSize(segment, str_fid.get(), N, N * row_size);
ASSERT_EQ(status.error_code, Success);
ASSERT_EQ(segment->get_field_avg_size(str_fid), row_size);
// load data and update avg field size
std::vector<std::string> str_datas;
int64_t total_size = 0;
for (int i = 0; i < N; ++i) {
auto str = "string_data_" + std::to_string(i);
total_size += str.size();
str_datas.emplace_back(str);
}
auto res = LoadFieldRawData(segment, str_fid.get(), str_datas.data(), N);
ASSERT_EQ(res.error_code, Success);
ASSERT_EQ(segment->get_field_avg_size(str_fid),
(row_size * N + total_size) / (2 * N));
DeleteSegment(segment);
}
TEST(CApiTest, GrowingSegment_Load_Field_Data) {
auto schema = std::make_shared<Schema>();
schema->AddField(FieldName("RowID"), FieldId(0), DataType::INT64);
schema->AddField(FieldName("Timestamp"), FieldId(1), DataType::INT64);
auto str_fid = schema->AddDebugField("string", DataType::VARCHAR);
auto vec_fid = schema->AddDebugField(
"vector_float", DataType::VECTOR_FLOAT, DIM, "L2");
schema->set_primary_field_id(str_fid);
auto segment = CreateGrowingSegment(schema, empty_index_meta).release();
int N = ROW_COUNT;
auto raw_data = DataGen(schema, N);
auto storage_config = get_default_local_storage_config();
auto cm = storage::CreateChunkManager(storage_config);
auto load_info =
PrepareInsertBinlog(1,
2,
3,
storage_config.root_path + "/" + "test_load_sealed",
raw_data,
cm);
auto status = LoadFieldData(segment, &load_info);
ASSERT_EQ(status.error_code, Success);
ASSERT_EQ(segment->get_real_count(), ROW_COUNT);
ASSERT_NE(segment->get_field_avg_size(str_fid), 0);
DeleteSegment(segment);
}
TEST(CApiTest, RetriveScalarFieldFromSealedSegmentWithIndex) {
auto schema = std::make_shared<Schema>();
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
auto float_fid = schema->AddDebugField("age_float", DataType::FLOAT);
auto double_fid = schema->AddDebugField("age_double", DataType::DOUBLE);
schema->set_primary_field_id(i64_fid);
auto segment = CreateSealedSegment(schema).release();
int N = ROW_COUNT;
auto raw_data = DataGen(schema, N);
LoadIndexInfo load_index_info;
// load timestamp field
auto res = LoadFieldRawData(
segment, TimestampFieldID.get(), raw_data.timestamps_.data(), N);
ASSERT_EQ(res.error_code, Success);
auto count = GetRowCount(segment);
ASSERT_EQ(count, N);
// load rowid field
res = LoadFieldRawData(
segment, RowFieldID.get(), raw_data.row_ids_.data(), N);
ASSERT_EQ(res.error_code, Success);
count = GetRowCount(segment);
ASSERT_EQ(count, N);
// load index for int8 field
auto age8_col = raw_data.get_col<int8_t>(i8_fid);
GenScalarIndexing(N, age8_col.data());
auto age8_index = milvus::index::CreateScalarIndexSort<int8_t>();
age8_index->Build(N, age8_col.data());
load_index_info.field_id = i8_fid.get();
load_index_info.field_type = DataType::INT8;
load_index_info.index = std::move(age8_index);
segment->LoadIndex(load_index_info);
// load index for 16 field
auto age16_col = raw_data.get_col<int16_t>(i16_fid);
GenScalarIndexing(N, age16_col.data());
auto age16_index = milvus::index::CreateScalarIndexSort<int16_t>();
age16_index->Build(N, age16_col.data());
load_index_info.field_id = i16_fid.get();
load_index_info.field_type = DataType::INT16;
load_index_info.index = std::move(age16_index);
segment->LoadIndex(load_index_info);
// load index for int32 field
auto age32_col = raw_data.get_col<int32_t>(i32_fid);
GenScalarIndexing(N, age32_col.data());
auto age32_index = milvus::index::CreateScalarIndexSort<int32_t>();
age32_index->Build(N, age32_col.data());
load_index_info.field_id = i32_fid.get();
load_index_info.field_type = DataType::INT32;
load_index_info.index = std::move(age32_index);
segment->LoadIndex(load_index_info);
// load index for int64 field
auto age64_col = raw_data.get_col<int64_t>(i64_fid);
GenScalarIndexing(N, age64_col.data());
auto age64_index = milvus::index::CreateScalarIndexSort<int64_t>();
age64_index->Build(N, age64_col.data());
load_index_info.field_id = i64_fid.get();
load_index_info.field_type = DataType::INT64;
load_index_info.index = std::move(age64_index);
segment->LoadIndex(load_index_info);
// load index for float field
auto age_float_col = raw_data.get_col<float>(float_fid);
GenScalarIndexing(N, age_float_col.data());
auto age_float_index = milvus::index::CreateScalarIndexSort<float>();
age_float_index->Build(N, age_float_col.data());
load_index_info.field_id = float_fid.get();
load_index_info.field_type = DataType::FLOAT;
load_index_info.index = std::move(age_float_index);
segment->LoadIndex(load_index_info);
// load index for double field
auto age_double_col = raw_data.get_col<double>(double_fid);
GenScalarIndexing(N, age_double_col.data());
auto age_double_index = milvus::index::CreateScalarIndexSort<double>();
age_double_index->Build(N, age_double_col.data());
load_index_info.field_id = double_fid.get();
load_index_info.field_type = DataType::FLOAT;
load_index_info.index = std::move(age_double_index);
segment->LoadIndex(load_index_info);
// create retrieve plan
auto plan = std::make_unique<query::RetrievePlan>(*schema);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
std::vector<proto::plan::GenericValue> retrive_row_ids;
proto::plan::GenericValue val;
val.set_int64_val(age64_col[0]);
retrive_row_ids.push_back(val);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
i64_fid, DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids;
// retrieve value
target_field_ids = {
i8_fid, i16_fid, i32_fid, i64_fid, float_fid, double_fid};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
res = CRetrieve(
segment, plan.get(), raw_data.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->fields_data().size(), 6);
auto fields_data = query_result->fields_data();
for (auto iter = fields_data.begin(); iter < fields_data.end(); ++iter) {
switch (iter->type()) {
case proto::schema::DataType::Int8: {
ASSERT_EQ(iter->scalars().int_data().data(0), age8_col[0]);
break;
}
case proto::schema::DataType::Int16: {
ASSERT_EQ(iter->scalars().int_data().data(0), age16_col[0]);
break;
}
case proto::schema::DataType::Int32: {
ASSERT_EQ(iter->scalars().int_data().data(0), age32_col[0]);
break;
}
case proto::schema::DataType::Int64: {
ASSERT_EQ(iter->scalars().long_data().data(0), age64_col[0]);
break;
}
case proto::schema::DataType::Float: {
ASSERT_EQ(iter->scalars().float_data().data(0),
age_float_col[0]);
break;
}
case proto::schema::DataType::Double: {
ASSERT_EQ(iter->scalars().double_data().data(0),
age_double_col[0]);
break;
}
default: {
PanicInfo(DataTypeInvalid, "not supported type");
}
}
}
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_IP) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP) {
auto c_collection =
NewCollection(get_default_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_L2) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "L2"
search_params: "{\"nprobe\": 10,\"radius\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_L2) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "L2"
search_params: "{\"nprobe\": 10,\"radius\": 20, \"range_filter\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, AssembeChunkTest) {
TargetBitmap chunk(1000);
for (size_t i = 0; i < 1000; ++i) {
chunk[i] = (i % 2 == 0);
}
BitsetType result;
milvus::query::AppendOneChunk(result, chunk);
// std::string s;
// boost::to_string(result, s);
// std::cout << s << std::endl;
int index = 0;
for (size_t i = 0; i < 1000; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
chunk = TargetBitmap(934);
for (int i = 0; i < 934; ++i) {
chunk[i] = (i % 2 == 0);
}
milvus::query::AppendOneChunk(result, chunk);
for (size_t i = 0; i < 934; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
chunk = TargetBitmap(62);
for (int i = 0; i < 62; ++i) {
chunk[i] = (i % 2 == 0);
}
milvus::query::AppendOneChunk(result, chunk);
for (size_t i = 0; i < 62; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
chunk = TargetBitmap(105);
for (int i = 0; i < 105; ++i) {
chunk[i] = (i % 2 == 0);
}
milvus::query::AppendOneChunk(result, chunk);
for (size_t i = 0; i < 105; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
}
std::vector<SegOffset>
search_id(const BitsetType& bitset,
Timestamp* timestamps,
Timestamp timestamp,
bool use_find) {
std::vector<SegOffset> dst_offset;
if (use_find) {
auto i = bitset.find_first();
while (i.has_value()) {
auto offset = SegOffset(i.value());
if (timestamps[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
i = bitset.find_next(i.value());
}
return dst_offset;
} else {
for (int i = 0; i < bitset.size(); i++) {
if (bitset[i]) {
auto offset = SegOffset(i);
if (timestamps[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
}
}
}
return dst_offset;
}
TEST(CApiTest, SearchIdTest) {
// using BitsetType = boost::dynamic_bitset<>;
auto test = [&](int NT) {
BitsetType bitset(1000000);
Timestamp* timestamps = new Timestamp[1000000];
srand(time(NULL));
for (int i = 0; i < 1000000; i++) {
timestamps[i] = i;
bitset[i] = false;
}
for (int i = 0; i < NT; i++) {
bitset[1000000 * ((double)rand() / RAND_MAX)] = true;
}
auto start = std::chrono::steady_clock::now();
auto res1 = search_id(bitset, timestamps, 1000000, true);
std::cout << "search id cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
start = std::chrono::steady_clock::now();
auto res2 = search_id(bitset, timestamps, 1000000, false);
std::cout << "search id origin cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
ASSERT_EQ(res1.size(), res2.size());
for (int i = 0; i < res1.size(); i++) {
if (res1[i].get() != res2[i].get()) {
std::cout << "error:" << i;
}
}
start = std::chrono::steady_clock::now();
bitset.flip();
std::cout << "bit set flip cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
delete[] timestamps;
};
int test_nt[] = {10, 50, 100};
for (auto nt : test_nt) {
test(nt);
}
}
TEST(CApiTest, AssembeChunkPerfTest) {
TargetBitmap chunk(100000000);
for (size_t i = 0; i < 100000000; ++i) {
chunk[i] = (i % 2 == 0);
}
BitsetType result;
// while (true) {
std::cout << "start test" << std::endl;
auto start = std::chrono::steady_clock::now();
milvus::query::AppendOneChunk(result, chunk);
std::cout << "cost: "
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
int index = 0;
for (size_t i = 0; i < 1000; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
// }
// std::string s;
// boost::to_string(result, s);
// std::cout << s << std::endl;
}
TEST(CApiTest, Indexing_Without_Predicate_float16) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::Float16Vector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float16>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::Float16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(5);
query_info->set_round_decimal(-1);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateFloat16PlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestmap = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestmap,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT16,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IDMAP,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IDMAP;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::Float16Vector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestmap,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Without_Predicate_bfloat16) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::BFloat16Vector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<bfloat16>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::BFloat16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(5);
query_info->set_round_decimal(-1);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateBFloat16PlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestmap = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestmap,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BFLOAT16,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IDMAP,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IDMAP;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BFloat16Vector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestmap,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_FLOAT16) {
auto c_collection =
NewCollection(get_float16_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data_float16(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_BFLOAT16) {
auto c_collection =
NewCollection(get_bfloat16_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data_bfloat16(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, IsLoadWithDisk) {
ASSERT_TRUE(IsLoadWithDisk(INVERTED_INDEX_TYPE, 0));
}