// Copyright (C) 2019-2020 Zilliz. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License #include #include #include #include #include #include #include #include #include "common/LoadInfo.h" #include "knowhere/index/VecIndexFactory.h" #include "knowhere/index/vector_index/IndexIVFPQ.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" #include "knowhere/index/vector_index/adapter/VectorAdapter.h" #include "pb/plan.pb.h" #include "query/ExprImpl.h" #include "segcore/Collection.h" #include "segcore/reduce_c.h" #include "segcore/Reduce.h" #include "test_utils/DataGen.h" #include "index/IndexFactory.h" #include "test_utils/indexbuilder_test_utils.h" #include "test_utils/PbHelper.h" namespace chrono = std::chrono; using namespace milvus; using namespace milvus::segcore; using namespace milvus::index; using namespace knowhere; using milvus::index::VectorIndex; using milvus::segcore::LoadIndexInfo; namespace { // const int DIM = 16; const int64_t ROW_COUNT = 100 * 1000; const CStorageConfig c_storage_config = get_default_cstorage_config(); const char* get_default_schema_config() { static std::string conf = R"(name: "default-collection" fields: < fieldID: 100 name: "fakevec" data_type: FloatVector type_params: < key: "dim" value: "16" > index_params: < key: "metric_type" value: "L2" > > fields: < fieldID: 101 name: "age" data_type: Int64 is_primary_key: true >)"; static std::string fake_conf = ""; return conf.c_str(); } auto generate_data(int N) { std::vector raw_data; std::vector timestamps; std::vector uids; std::default_random_engine e(42); std::normal_distribution<> dis(0.0, 1.0); for (int i = 0; i < N; ++i) { uids.push_back(10 * N + i); timestamps.push_back(0); float vec[DIM]; for (auto& x : vec) { x = dis(e); } raw_data.insert(raw_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec)); int age = e() % 100; raw_data.insert(raw_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age)); } return std::make_tuple(raw_data, timestamps, uids); } std::string generate_max_float_query_data(int all_nq, int max_float_nq) { assert(max_float_nq <= all_nq); namespace ser = milvus::proto::common; int dim = DIM; ser::PlaceholderGroup raw_group; auto value = raw_group.add_placeholders(); value->set_tag("$0"); value->set_type(ser::PlaceholderType::FloatVector); for (int i = 0; i < all_nq; ++i) { std::vector vec; if (i < max_float_nq) { for (int d = 0; d < dim; ++d) { vec.push_back(std::numeric_limits::max()); } } else { for (int d = 0; d < dim; ++d) { vec.push_back(1); } } value->add_values(vec.data(), vec.size() * sizeof(float)); } auto blob = raw_group.SerializeAsString(); return blob; } std::string generate_query_data(int nq) { namespace ser = milvus::proto::common; std::default_random_engine e(67); int dim = DIM; std::normal_distribution dis(0.0, 1.0); ser::PlaceholderGroup raw_group; auto value = raw_group.add_placeholders(); value->set_tag("$0"); value->set_type(ser::PlaceholderType::FloatVector); for (int i = 0; i < nq; ++i) { std::vector vec; for (int d = 0; d < dim; ++d) { vec.push_back(dis(e)); } value->add_values(vec.data(), vec.size() * sizeof(float)); } auto blob = raw_group.SerializeAsString(); return blob; } std::string generate_collection_schema(std::string metric_type, int dim, bool is_binary) { namespace schema = milvus::proto::schema; schema::CollectionSchema collection_schema; collection_schema.set_name("collection_test"); auto vec_field_schema = collection_schema.add_fields(); vec_field_schema->set_name("fakevec"); vec_field_schema->set_fieldid(100); if (is_binary) { vec_field_schema->set_data_type(schema::DataType::BinaryVector); } else { vec_field_schema->set_data_type(schema::DataType::FloatVector); } auto metric_type_param = vec_field_schema->add_index_params(); metric_type_param->set_key("metric_type"); metric_type_param->set_value(metric_type); auto dim_param = vec_field_schema->add_type_params(); dim_param->set_key("dim"); dim_param->set_value(std::to_string(dim)); auto other_field_schema = collection_schema.add_fields(); other_field_schema->set_name("counter"); other_field_schema->set_fieldid(101); other_field_schema->set_data_type(schema::DataType::Int64); other_field_schema->set_is_primary_key(true); auto other_field_schema2 = collection_schema.add_fields(); other_field_schema2->set_name("doubleField"); other_field_schema2->set_fieldid(102); other_field_schema2->set_data_type(schema::DataType::Double); std::string schema_string; auto marshal = google::protobuf::TextFormat::PrintToString(collection_schema, &schema_string); assert(marshal == true); return schema_string; } // VecIndexPtr // generate_index( // void* raw_data, knowhere::Config conf, int64_t dim, int64_t topK, int64_t N, knowhere::IndexType index_type) { // auto indexing = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type, // knowhere::IndexMode::MODE_CPU); // // auto database = knowhere::GenDataset(N, dim, raw_data); // indexing->Train(database, conf); // indexing->AddWithoutIds(database, conf); // EXPECT_EQ(indexing->Count(), N); // EXPECT_EQ(indexing->Dim(), dim); // // EXPECT_EQ(indexing->Count(), N); // EXPECT_EQ(indexing->Dim(), dim); // return indexing; //} //} // namespace IndexBasePtr generate_index( void* raw_data, DataType field_type, MetricType metric_type, IndexType index_type, int64_t dim, int64_t N) { CreateIndexInfo create_index_info{field_type, index_type, metric_type}; auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr); auto database = knowhere::GenDataset(N, dim, raw_data); auto build_config = generate_build_conf(index_type, metric_type); indexing->BuildWithDataset(database, build_config); auto vec_indexing = dynamic_cast(indexing.get()); EXPECT_EQ(vec_indexing->Count(), N); EXPECT_EQ(vec_indexing->GetDim(), dim); return indexing; } } // namespace TEST(CApiTest, CollectionTest) { auto collection = NewCollection(get_default_schema_config()); DeleteCollection(collection); } TEST(CApiTest, GetCollectionNameTest) { auto collection = NewCollection(get_default_schema_config()); auto name = GetCollectionName(collection); assert(strcmp(name, "default-collection") == 0); DeleteCollection(collection); free((void*)(name)); } TEST(CApiTest, SegmentTest) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, CPlan) { std::string schema_string = generate_collection_schema("JACCARD", DIM, true); auto collection = NewCollection(schema_string.c_str()); const char* dsl_string = R"( { "bool": { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 10, "round_decimal": 3 } } } })"; void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); int64_t field_id = -1; status = GetFieldID(plan, &field_id); assert(status.error_code == Success); auto col = static_cast(collection); for (auto& [target_field_id, field_meta] : col->get_schema()->get_fields()) { if (field_meta.is_vector()) { assert(field_id == target_field_id.get()); } } assert(field_id != -1); DeleteSearchPlan(plan); DeleteCollection(collection); } TEST(CApiTest, InsertTest) { auto c_collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(c_collection, Growing, -1); auto col = (milvus::segcore::Collection*)c_collection; int N = 10000; auto dataset = DataGen(col->get_schema(), N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); DeleteCollection(c_collection); DeleteSegment(segment); } TEST(CApiTest, DeleteTest) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); std::vector delete_row_ids = {100000, 100001, 100002}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); auto delete_data = serialize(ids.get()); uint64_t delete_timestamps[] = {0, 0, 0}; auto offset = PreDelete(segment, 3); auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps); assert(del_res.error_code == Success); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, MultiDeleteGrowingSegment) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto col = (milvus::segcore::Collection*)collection; int N = 10; auto dataset = DataGen(col->get_schema(), N); auto insert_data = serialize(dataset.raw_); // insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} int64_t offset; PreInsert(segment, N, &offset); auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); // delete data pks = {1} std::vector delete_pks = {1}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(), delete_pks.end()); auto delete_data = serialize(ids.get()); std::vector delete_timestamps(1, dataset.timestamps_[N - 1]); offset = PreDelete(segment, 1); auto del_res = Delete(segment, offset, 1, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks = {1} std::vector retrive_pks = {1}; auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto plan = std::make_unique(*schema); auto term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_pks); plan->plan_node_ = std::make_unique(); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids{FieldId(100), FieldId(101)}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); auto query_result = std::make_unique(); auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 0); DeleteRetrieveResult(&retrieve_result); // retrieve pks = {2} retrive_pks = {2}; term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_pks); plan->plan_node_->predicate_ = std::move(term_expr); res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 1); DeleteRetrieveResult(&retrieve_result); // delete pks = {2} delete_pks = {2}; ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(), delete_pks.end()); delete_data = serialize(ids.get()); offset = PreDelete(segment, 1); del_res = Delete(segment, offset, 1, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks in {2} res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 0); DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, MultiDeleteSealedSegment) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Sealed, -1); auto col = (milvus::segcore::Collection*)collection; int N = 10; auto dataset = DataGen(col->get_schema(), N); // load field data for (auto& [field_id, field_meta] : col->get_schema()->get_fields()) { auto array = dataset.get_col(field_id); auto data = serialize(array.get()); auto load_info = CLoadFieldDataInfo{field_id.get(), data.data(), data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); } // load timestamps FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto ts_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta); auto ts_data = serialize(ts_array.get()); auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.data(), ts_data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); // load rowID FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_id_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); auto row_id_data = serialize(row_id_array.get()); load_info = CLoadFieldDataInfo{RowFieldID.get(), row_id_data.data(), row_id_data.size(), N}; res = LoadFieldData(segment, load_info); assert(res.error_code == Success); count = GetRowCount(segment); assert(count == N); // delete data pks = {1} std::vector delete_pks = {1}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(), delete_pks.end()); auto delete_data = serialize(ids.get()); std::vector delete_timestamps(1, dataset.timestamps_[N - 1]); auto offset = PreDelete(segment, 1); auto del_res = Delete(segment, offset, 1, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks = {1} std::vector retrive_pks = {1}; auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto plan = std::make_unique(*schema); auto term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_pks); plan->plan_node_ = std::make_unique(); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids{FieldId(100), FieldId(101)}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); auto query_result = std::make_unique(); auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 0); DeleteRetrieveResult(&retrieve_result); // retrieve pks = {2} retrive_pks = {2}; term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_pks); plan->plan_node_->predicate_ = std::move(term_expr); res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 1); DeleteRetrieveResult(&retrieve_result); // delete pks = {2} delete_pks = {2}; ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(), delete_pks.end()); delete_data = serialize(ids.get()); offset = PreDelete(segment, 1); del_res = Delete(segment, offset, 1, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks in {2} res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 0); DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto col = (milvus::segcore::Collection*)collection; int N = 10; auto dataset = DataGen(col->get_schema(), N); auto insert_data = serialize(dataset.raw_); // first insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} int64_t offset; PreInsert(segment, N, &offset); auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); // second insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} PreInsert(segment, N, &offset); res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); // create retrieve plan pks in {1, 2, 3} std::vector retrive_row_ids = {1, 2, 3}; auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto plan = std::make_unique(*schema); auto term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_row_ids); plan->plan_node_ = std::make_unique(); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids{FieldId(100), FieldId(101)}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); auto query_result = std::make_unique(); auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 6); DeleteRetrieveResult(&retrieve_result); // delete data pks = {1, 2, 3} std::vector delete_row_ids = {1, 2, 3}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); auto delete_data = serialize(ids.get()); std::vector delete_timestamps(3, dataset.timestamps_[N - 1]); offset = PreDelete(segment, 3); auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks in {1, 2, 3} res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); query_result = std::make_unique(); suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 0); DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Sealed, -1); auto col = (milvus::segcore::Collection*)collection; int N = 20; auto dataset = DataGen(col->get_schema(), N, 42, 0, 2); for (auto& [field_id, field_meta] : col->get_schema()->get_fields()) { auto array = dataset.get_col(field_id); auto data = serialize(array.get()); auto load_info = CLoadFieldDataInfo{field_id.get(), data.data(), data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); } FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto ts_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta); auto ts_data = serialize(ts_array.get()); auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.data(), ts_data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_id_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); auto row_id_data = serialize(row_id_array.get()); load_info = CLoadFieldDataInfo{RowFieldID.get(), row_id_data.data(), row_id_data.size(), N}; res = LoadFieldData(segment, load_info); assert(res.error_code == Success); count = GetRowCount(segment); assert(count == N); // create retrieve plan pks in {1, 2, 3} std::vector retrive_row_ids = {1, 2, 3}; auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto plan = std::make_unique(*schema); auto term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_row_ids); plan->plan_node_ = std::make_unique(); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids{FieldId(100), FieldId(101)}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); auto query_result = std::make_unique(); auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 6); DeleteRetrieveResult(&retrieve_result); // delete data pks = {1, 2, 3} std::vector delete_row_ids = {1, 2, 3}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); auto delete_data = serialize(ids.get()); std::vector delete_timestamps(3, dataset.timestamps_[N - 1]); auto offset = PreDelete(segment, 3); auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks in {1, 2, 3} res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); query_result = std::make_unique(); suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 0); DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, InsertSamePkAfterDeleteOnGrowingSegment) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto col = (milvus::segcore::Collection*)collection; int N = 10; auto dataset = DataGen(col->get_schema(), N); auto insert_data = serialize(dataset.raw_); // first insert data // insert data with pks = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} , timestamps = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} int64_t offset; PreInsert(segment, N, &offset); auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); // delete data pks = {1, 2, 3}, timestamps = {9, 9, 9} std::vector delete_row_ids = {1, 2, 3}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); auto delete_data = serialize(ids.get()); std::vector delete_timestamps(3, dataset.timestamps_[N - 1]); offset = PreDelete(segment, 3); auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // create retrieve plan pks in {1, 2, 3}, timestamp = 9 std::vector retrive_row_ids = {1, 2, 3}; auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto plan = std::make_unique(*schema); auto term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_row_ids); plan->plan_node_ = std::make_unique(); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids{FieldId(100), FieldId(101)}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); auto query_result = std::make_unique(); auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 0); DeleteRetrieveResult(&retrieve_result); // second insert data // insert data with pks = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} , timestamps = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19} dataset = DataGen(col->get_schema(), N, 42, N); insert_data = serialize(dataset.raw_); PreInsert(segment, N, &offset); res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); // retrieve pks in {1, 2, 3}, timestamp = 19 res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); query_result = std::make_unique(); suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 3); DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, InsertSamePkAfterDeleteOnSealedSegment) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Sealed, -1); auto col = (milvus::segcore::Collection*)collection; int N = 10; auto dataset = DataGen(col->get_schema(), N, 42, 0, 2); // insert data with pks = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5} , timestamps = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} for (auto& [field_id, field_meta] : col->get_schema()->get_fields()) { auto array = dataset.get_col(field_id); auto data = serialize(array.get()); auto load_info = CLoadFieldDataInfo{field_id.get(), data.data(), data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); } FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto ts_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta); auto ts_data = serialize(ts_array.get()); auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.data(), ts_data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_id_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); auto row_id_data = serialize(row_id_array.get()); load_info = CLoadFieldDataInfo{RowFieldID.get(), row_id_data.data(), row_id_data.size(), N}; res = LoadFieldData(segment, load_info); assert(res.error_code == Success); count = GetRowCount(segment); assert(count == N); // delete data pks = {1, 2, 3}, timestamps = {4, 4, 4} std::vector delete_row_ids = {1, 2, 3}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); auto delete_data = serialize(ids.get()); std::vector delete_timestamps(3, dataset.timestamps_[4]); auto offset = PreDelete(segment, 3); auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // create retrieve plan pks in {1, 2, 3}, timestamp = 9 std::vector retrive_row_ids = {1, 2, 3}; auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto plan = std::make_unique(*schema); auto term_expr = std::make_unique>(FieldId(101), DataType::INT64, retrive_row_ids); plan->plan_node_ = std::make_unique(); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids{FieldId(100), FieldId(101)}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; res = Retrieve(segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); auto query_result = std::make_unique(); auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->ids().int_id().data().size(), 3); DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, SearchTest) { auto c_collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(c_collection, Growing, -1); auto col = (milvus::segcore::Collection*)c_collection; int N = 10000; auto dataset = DataGen(col->get_schema(), N); int64_t ts_offset = 1000; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); ASSERT_EQ(ins_res.error_code, Success); const char* dsl_string = R"( { "bool": { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 10, "round_decimal": 3 } } } })"; int num_queries = 10; auto blob = generate_query_data(num_queries); void* plan = nullptr; auto status = CreateSearchPlan(c_collection, dsl_string, &plan); ASSERT_EQ(status.error_code, Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); ASSERT_EQ(status.error_code, Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); CSearchResult search_result; auto res = Search(segment, plan, placeholderGroup, N + ts_offset, &search_result); ASSERT_EQ(res.error_code, Success); CSearchResult search_result2; auto res2 = Search(segment, plan, placeholderGroup, ts_offset, &search_result2); ASSERT_EQ(res2.error_code, Success); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(search_result); DeleteSearchResult(search_result2); DeleteCollection(c_collection); DeleteSegment(segment); } TEST(CApiTest, SearchTestWithExpr) { auto c_collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(c_collection, Growing, -1); auto col = (milvus::segcore::Collection*)c_collection; int N = 10000; auto dataset = DataGen(col->get_schema(), N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); ASSERT_EQ(ins_res.error_code, Success); const char* serialized_expr_plan = R"(vector_anns: < field_id: 100 query_info: < topk: 10 metric_type: "L2" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0" >)"; int num_queries = 10; auto blob = generate_query_data(num_queries); void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); auto status = CreateSearchPlanByExpr(c_collection, binary_plan.data(), binary_plan.size(), &plan); ASSERT_EQ(status.error_code, Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); ASSERT_EQ(status.error_code, Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); dataset.timestamps_.clear(); dataset.timestamps_.push_back(1); CSearchResult search_result; auto res = Search(segment, plan, placeholderGroup, dataset.timestamps_[0], &search_result); ASSERT_EQ(res.error_code, Success); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(search_result); DeleteCollection(c_collection); DeleteSegment(segment); } TEST(CApiTest, RetrieveTestWithExpr) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto plan = std::make_unique(*schema); int N = 10000; auto dataset = DataGen(schema, N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); ASSERT_EQ(ins_res.error_code, Success); // create retrieve plan "age in [0]" std::vector values(1, 0); auto term_expr = std::make_unique>(FieldId(101), DataType::INT64, values); plan->plan_node_ = std::make_unique(); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids{FieldId(100), FieldId(101)}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; auto res = Retrieve(segment, plan.get(), dataset.timestamps_[0], &retrieve_result); ASSERT_EQ(res.error_code, Success); DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, GetMemoryUsageInBytesTest) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto old_memory_usage_size = GetMemoryUsageInBytes(segment); // std::cout << "old_memory_usage_size = " << old_memory_usage_size << std::endl; assert(old_memory_usage_size == 0); auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); int N = 10000; auto dataset = DataGen(schema, N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); auto memory_usage_size = GetMemoryUsageInBytes(segment); // std::cout << "new_memory_usage_size = " << memory_usage_size << std::endl; // TODO:: assert // assert(memory_usage_size == 2785280); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, GetDeletedCountTest) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); std::vector delete_row_ids = {100000, 100001, 100002}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); auto delete_data = serialize(ids.get()); uint64_t delete_timestamps[] = {0, 0, 0}; auto offset = PreDelete(segment, 3); auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps); assert(del_res.error_code == Success); // TODO: assert(deleted_count == len(delete_row_ids)) auto deleted_count = GetDeletedCount(segment); assert(deleted_count == delete_row_ids.size()); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, GetRowCountTest) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); int N = 10000; auto dataset = DataGen(schema, N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); auto row_count = GetRowCount(segment); assert(row_count == N); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, GetRealCount) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); int N = 10000; auto dataset = DataGen(schema, N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); auto pks = dataset.get_col(schema->get_primary_field_id().value()); std::vector delete_row_ids(pks.begin(), pks.begin() + 3); auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); auto delete_data = serialize(ids.get()); uint64_t delete_timestamps[] = {dataset.timestamps_[N - 1] + 1, dataset.timestamps_[N - 1] + 2, dataset.timestamps_[N - 1] + 3}; auto del_offset = PreDelete(segment, 3); auto del_res = Delete(segment, del_offset, 3, delete_data.data(), delete_data.size(), delete_timestamps); assert(del_res.error_code == Success); auto real_count = GetRealCount(segment); assert(real_count == N - delete_row_ids.size()); DeleteCollection(collection); DeleteSegment(segment); } // TEST(CApiTest, SchemaTest) { // std::string schema_string = // "id: 6873737669791618215\nname: \"collection0\"\nschema: \u003c\n " // "field_metas: \u003c\n field_name: \"age\"\n type: INT32\n dim: 1\n \u003e\n " // "field_metas: \u003c\n field_name: \"field_1\"\n type: VECTOR_FLOAT\n dim: 16\n \u003e\n" // "\u003e\ncreate_time: 1600416765\nsegment_ids: 6873737669791618215\npartition_tags: \"default\"\n"; // // auto collection = NewCollection(schema_string.data()); // auto segment = NewSegment(collection, Growing, -1); // DeleteCollection(collection); // DeleteSegment(segment); //} void CheckSearchResultDuplicate(const std::vector& results) { auto nq = ((SearchResult*)results[0])->total_nq_; std::unordered_set pk_set; for (int qi = 0; qi < nq; qi++) { pk_set.clear(); for (int i = 0; i < results.size(); i++) { auto search_result = (SearchResult*)results[i]; ASSERT_EQ(nq, search_result->total_nq_); auto topk_beg = search_result->topk_per_nq_prefix_sum_[qi]; auto topk_end = search_result->topk_per_nq_prefix_sum_[qi + 1]; for (int ki = topk_beg; ki < topk_end; ki++) { ASSERT_NE(search_result->seg_offsets_[ki], INVALID_SEG_OFFSET); auto ret = pk_set.insert(search_result->primary_keys_[ki]); ASSERT_TRUE(ret.second); } } } } TEST(CApiTest, ReudceNullResult) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); int N = 10000; auto dataset = DataGen(schema, N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"( { "bool": { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 10, "round_decimal": 3 } } } })"; int num_queries = 10; int topK = 10; auto blob = generate_max_float_query_data(num_queries, num_queries / 2); void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); dataset.timestamps_.clear(); dataset.timestamps_.push_back(1); { auto slice_nqs = std::vector{10}; auto slice_topKs = std::vector{1}; std::vector results; CSearchResult res; status = Search(segment, plan, placeholderGroup, dataset.timestamps_[0], &res); assert(status.error_code == Success); results.push_back(res); CSearchResultDataBlobs cSearchResultData; status = ReduceSearchResultsAndFillData(&cSearchResultData, plan, results.data(), results.size(), slice_nqs.data(), slice_topKs.data(), slice_nqs.size()); assert(status.error_code == Success); auto search_result = (SearchResult*)results[0]; auto size = search_result->result_offsets_.size(); EXPECT_EQ(size, num_queries / 2); DeleteSearchResult(res); DeleteSearchResultDataBlobs(cSearchResultData); } DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, ReduceRemoveDuplicates) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); int N = 10000; auto dataset = DataGen(schema, N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"( { "bool": { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 10, "round_decimal": 3 } } } })"; int num_queries = 10; int topK = 10; auto blob = generate_query_data(num_queries); void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); dataset.timestamps_.clear(); dataset.timestamps_.push_back(1); { auto slice_nqs = std::vector{num_queries / 2, num_queries / 2}; auto slice_topKs = std::vector{topK / 2, topK}; std::vector results; CSearchResult res1, res2; status = Search(segment, plan, placeholderGroup, dataset.timestamps_[0], &res1); assert(status.error_code == Success); status = Search(segment, plan, placeholderGroup, dataset.timestamps_[0], &res2); assert(status.error_code == Success); results.push_back(res1); results.push_back(res2); CSearchResultDataBlobs cSearchResultData; status = ReduceSearchResultsAndFillData(&cSearchResultData, plan, results.data(), results.size(), slice_nqs.data(), slice_topKs.data(), slice_nqs.size()); assert(status.error_code == Success); // TODO:: insert no duplicate pks and check reduce results CheckSearchResultDuplicate(results); DeleteSearchResult(res1); DeleteSearchResult(res2); DeleteSearchResultDataBlobs(cSearchResultData); } { int nq1 = num_queries / 3; int nq2 = num_queries / 3; int nq3 = num_queries - nq1 - nq2; auto slice_nqs = std::vector{nq1, nq2, nq3}; auto slice_topKs = std::vector{topK / 2, topK, topK}; std::vector results; CSearchResult res1, res2, res3; status = Search(segment, plan, placeholderGroup, dataset.timestamps_[0], &res1); assert(status.error_code == Success); status = Search(segment, plan, placeholderGroup, dataset.timestamps_[0], &res2); assert(status.error_code == Success); status = Search(segment, plan, placeholderGroup, dataset.timestamps_[0], &res3); assert(status.error_code == Success); results.push_back(res1); results.push_back(res2); results.push_back(res3); CSearchResultDataBlobs cSearchResultData; status = ReduceSearchResultsAndFillData(&cSearchResultData, plan, results.data(), results.size(), slice_nqs.data(), slice_topKs.data(), slice_nqs.size()); assert(status.error_code == Success); // TODO:: insert no duplicate pks and check reduce results CheckSearchResultDuplicate(results); DeleteSearchResult(res1); DeleteSearchResult(res2); DeleteSearchResult(res3); DeleteSearchResultDataBlobs(cSearchResultData); } DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteCollection(collection); DeleteSegment(segment); } void testReduceSearchWithExpr(int N, int topK, int num_queries) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Growing, -1); auto schema = ((milvus::segcore::Collection*)collection)->get_schema(); auto dataset = DataGen(schema, N); int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); auto fmt = boost::format(R"(vector_anns: < field_id: 100 query_info: < topk: %1% metric_type: "L2" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0"> output_field_ids: 100)") % topK; auto serialized_expr_plan = fmt.str(); auto blob = generate_query_data(num_queries); void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan.data()); auto status = CreateSearchPlanByExpr(collection, binary_plan.data(), binary_plan.size(), &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); dataset.timestamps_.clear(); dataset.timestamps_.push_back(1); std::vector results; CSearchResult res1; CSearchResult res2; auto res = Search(segment, plan, placeholderGroup, dataset.timestamps_[N - 1], &res1); assert(res.error_code == Success); res = Search(segment, plan, placeholderGroup, dataset.timestamps_[N - 1], &res2); assert(res.error_code == Success); results.push_back(res1); results.push_back(res2); auto slice_nqs = std::vector{num_queries / 2, num_queries / 2}; if (num_queries == 1) { slice_nqs = std::vector{num_queries}; } auto slice_topKs = std::vector{topK / 2, topK}; if (topK == 1) { slice_topKs = std::vector{topK, topK}; } // 1. reduce CSearchResultDataBlobs cSearchResultData; status = ReduceSearchResultsAndFillData(&cSearchResultData, plan, results.data(), results.size(), slice_nqs.data(), slice_topKs.data(), slice_nqs.size()); assert(status.error_code == Success); auto search_result_data_blobs = reinterpret_cast(cSearchResultData); // check result for (int i = 0; i < slice_nqs.size(); i++) { milvus::proto::schema::SearchResultData search_result_data; auto suc = search_result_data.ParseFromArray(search_result_data_blobs->blobs[i].data(), search_result_data_blobs->blobs[i].size()); assert(suc); assert(search_result_data.num_queries() == slice_nqs[i]); assert(search_result_data.top_k() == slice_topKs[i]); assert(search_result_data.scores().size() == search_result_data.topks().at(0) * slice_nqs[i]); assert(search_result_data.ids().int_id().data_size() == search_result_data.topks().at(0) * slice_nqs[i]); // check real topks assert(search_result_data.topks().size() == slice_nqs[i]); for (auto real_topk : search_result_data.topks()) { assert(real_topk <= slice_topKs[i]); } } DeleteSearchResultDataBlobs(cSearchResultData); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(res1); DeleteSearchResult(res2); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, ReduceSearchWithExpr) { testReduceSearchWithExpr(2, 1, 1); testReduceSearchWithExpr(2, 10, 10); testReduceSearchWithExpr(100, 1, 1); testReduceSearchWithExpr(100, 10, 10); testReduceSearchWithExpr(10000, 1, 1); testReduceSearchWithExpr(10000, 10, 10); } TEST(CApiTest, LoadIndexInfo) { // generator index constexpr auto TOPK = 10; auto N = 1024 * 10; auto [raw_data, timestamps, uids] = generate_data(N); auto indexing = std::make_shared(); auto conf = knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, {knowhere::meta::DIM, DIM}, {knowhere::meta::TOPK, TOPK}, {knowhere::indexparam::NLIST, 100}, {knowhere::indexparam::NPROBE, 4}, {knowhere::indexparam::M, 4}, {knowhere::indexparam::NBITS, 8}, {knowhere::meta::DEVICE_ID, 0}}; auto database = knowhere::GenDataset(N, DIM, raw_data.data()); indexing->Train(database, conf); indexing->AddWithoutIds(database, conf); EXPECT_EQ(indexing->Count(), N); EXPECT_EQ(indexing->Dim(), DIM); auto binary_set = indexing->Serialize(conf); CBinarySet c_binary_set = (CBinarySet)&binary_set; void* c_load_index_info = nullptr; auto status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_param_key1 = "index_type"; std::string index_param_value1 = "IVF_PQ"; status = AppendIndexParam(c_load_index_info, index_param_key1.data(), index_param_value1.data()); std::string index_param_key2 = "index_mode"; std::string index_param_value2 = "CPU"; status = AppendIndexParam(c_load_index_info, index_param_key2.data(), index_param_value2.data()); assert(status.error_code == Success); std::string index_param_key3 = knowhere::meta::METRIC_TYPE; std::string index_param_value3 = knowhere::metric::L2; status = AppendIndexParam(c_load_index_info, index_param_key3.data(), index_param_value3.data()); assert(status.error_code == Success); std::string field_name = "field0"; status = AppendFieldInfo(c_load_index_info, 0, 0, 0, 0, CDataType::FloatVector); assert(status.error_code == Success); status = AppendIndex(c_load_index_info, c_binary_set); assert(status.error_code == Success); DeleteLoadIndexInfo(c_load_index_info); } TEST(CApiTest, LoadIndex_Search) { // generator index constexpr auto TOPK = 10; auto N = 1024 * 1024; auto num_query = 100; auto [raw_data, timestamps, uids] = generate_data(N); auto indexing = std::make_shared(); auto conf = knowhere::Config{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, {knowhere::meta::DIM, DIM}, {knowhere::meta::TOPK, TOPK}, {knowhere::indexparam::NLIST, 100}, {knowhere::indexparam::NPROBE, 4}, {knowhere::indexparam::M, 4}, {knowhere::indexparam::NBITS, 8}, {knowhere::meta::DEVICE_ID, 0}}; auto database = knowhere::GenDataset(N, DIM, raw_data.data()); indexing->Train(database, conf); indexing->AddWithoutIds(database, conf); EXPECT_EQ(indexing->Count(), N); EXPECT_EQ(indexing->Dim(), DIM); // serializ index to binarySet auto binary_set = indexing->Serialize(conf); // fill loadIndexInfo milvus::segcore::LoadIndexInfo load_index_info; auto& index_params = load_index_info.index_params; index_params["index_type"] = "IVF_PQ"; index_params["index_mode"] = "CPU"; auto mode = knowhere::IndexMode::MODE_CPU; load_index_info.index = std::make_unique(index_params["index_type"], knowhere::metric::L2, mode); load_index_info.index->Load(binary_set); // search auto query_dataset = knowhere::GenDataset(num_query, DIM, raw_data.data() + DIM * 4200); auto result = indexing->Query(query_dataset, conf, nullptr); auto ids = knowhere::GetDatasetIDs(result); auto dis = knowhere::GetDatasetDistance(result); // for (int i = 0; i < std::min(num_query * K, 100); ++i) { // std::cout << ids[i] << "->" << dis[i] << std::endl; //} } TEST(CApiTest, Indexing_Without_Predicate) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"( { "bool": { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 5, "round_decimal": -1 } } } })"; // create place_holder_group int num_queries = 5; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_raw_index_json = SearchResultToJson(*search_result_on_raw_index); auto search_result_on_bigIndex_json = SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex)); // std::cout << search_result_on_raw_index_json.dump(1) << std::endl; // std::cout << search_result_on_bigIndex_json.dump(1) << std::endl; ASSERT_EQ(search_result_on_raw_index_json.dump(1), search_result_on_bigIndex_json.dump(1)); DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_Expr_Without_Predicate) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"(vector_anns: < field_id: 100 query_info: < topk: 5 round_decimal: -1 metric_type: "L2" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0" >)"; // create place_holder_group int num_queries = 5; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); auto status = CreateSearchPlanByExpr(collection, binary_plan.data(), binary_plan.size(), &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_raw_index_json = SearchResultToJson(*search_result_on_raw_index); auto search_result_on_bigIndex_json = SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex)); // std::cout << search_result_on_raw_index_json.dump(1) << std::endl; // std::cout << search_result_on_bigIndex_json.dump(1) << std::endl; ASSERT_EQ(search_result_on_raw_index_json.dump(1), search_result_on_bigIndex_json.dump(1)); DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_With_float_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ "bool": { "must": [ { "range": { "counter": { "GE": 42000, "LT": 42010 } } }, { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 5, "round_decimal": -1 } } } ] } })"; // create place_holder_group int num_queries = 10; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 42000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[offset]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = 1000 * 1000; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 420000 * DIM; { int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); } const char* serialized_expr_plan = R"(vector_anns: < field_id: 100 predicates: < binary_expr: < op: LogicalAnd left: < unary_range_expr: < column_info: < field_id: 101 data_type: Int64 > op: GreaterEqual value: < int64_val: 420000 > > > right: < unary_range_expr: < column_info: < field_id: 101 data_type: Int64 > op: LessThan value: < int64_val: 420010 > > > > > query_info: < topk: 5 round_decimal: -1 metric_type: "L2" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0" >)"; // create place_holder_group int num_queries = 10; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); auto status = CreateSearchPlanByExpr(collection, binary_plan.data(), binary_plan.size(), &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 420000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[offset]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_With_float_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ "bool": { "must": [ { "term": { "counter": { "values": [42000, 42001, 42002, 42003, 42004] } } }, { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 5, "round_decimal": -1 } } } ] } })"; // create place_holder_group int num_queries = 5; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 42000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[offset]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = 1000 * 1000; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 420000 * DIM; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"( vector_anns: < field_id: 100 predicates: < term_expr: < column_info: < field_id: 101 data_type: Int64 > values: < int64_val: 420000 > values: < int64_val: 420001 > values: < int64_val: 420002 > values: < int64_val: 420003 > values: < int64_val: 420004 > > > query_info: < topk: 5 round_decimal: -1 metric_type: "L2" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0" >)"; // create place_holder_group int num_queries = 5; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); auto status = CreateSearchPlanByExpr(collection, binary_plan.data(), binary_plan.size(), &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 420000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[offset]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_With_binary_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("JACCARD", DIM, true); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = 1000 * 1000; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 420000 * DIM / 8; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ "bool": { "must": [ { "range": { "counter": { "GE": 420000, "LT": 420010 } } }, { "vector": { "fakevec": { "metric_type": "JACCARD", "params": { "nprobe": 10 }, "query": "$0", "topk": 5, "round_decimal": -1 } } } ] } })"; // create place_holder_group int num_queries = 5; auto raw_group = CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "BIN_IVF_FLAT"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "JACCARD"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 420000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[offset]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("JACCARD", DIM, true); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM / 8; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"(vector_anns: < field_id: 100 predicates: < binary_expr: < op: LogicalAnd left: < unary_range_expr: < column_info: < field_id: 101 data_type: Int64 > op: GreaterEqual value: < int64_val: 42000 > > > right: < unary_range_expr: < column_info: < field_id: 101 data_type: Int64 > op: LessThan value: < int64_val: 42010 > > > > > query_info: < topk: 5 round_decimal: -1 metric_type: "JACCARD" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0" >)"; // create place_holder_group int num_queries = 5; auto raw_group = CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); auto status = CreateSearchPlanByExpr(collection, binary_plan.data(), binary_plan.size(), &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); ASSERT_TRUE(res_before_load_index.error_code == Success) << res_before_load_index.error_msg; // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "BIN_IVF_FLAT"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "JACCARD"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 42000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[offset]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, Indexing_With_binary_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("JACCARD", DIM, true); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM / 8; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ "bool": { "must": [ { "term": { "counter": { "values": [42000, 42001, 42002, 42003, 42004] } } }, { "vector": { "fakevec": { "metric_type": "JACCARD", "params": { "nprobe": 10 }, "query": "$0", "topk": 5, "round_decimal": -1 } } } ] } })"; // create place_holder_group int num_queries = 5; int topK = 5; auto raw_group = CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "BIN_IVF_FLAT"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "JACCARD"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); std::vector results; results.push_back(c_search_result_on_bigIndex); auto slice_nqs = std::vector{num_queries}; auto slice_topKs = std::vector{topK}; CSearchResultDataBlobs cSearchResultData; status = ReduceSearchResultsAndFillData(&cSearchResultData, plan, results.data(), results.size(), slice_nqs.data(), slice_topKs.data(), slice_nqs.size()); assert(status.error_code == Success); // status = ReduceSearchResultsAndFillData(plan, results.data(), results.size()); // assert(status.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { ASSERT_EQ(search_result_on_bigIndex->topk_per_nq_prefix_sum_.size(), search_result_on_bigIndex->total_nq_ + 1); auto offset = search_result_on_bigIndex->topk_per_nq_prefix_sum_[i]; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 42000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[i * TOPK]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); DeleteSearchResultDataBlobs(cSearchResultData); } TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("JACCARD", DIM, true); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Growing, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM / 8; int64_t offset; PreInsert(segment, N, &offset); auto insert_data = serialize(dataset.raw_); auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"(vector_anns: < field_id: 100 predicates: < term_expr: < column_info: < field_id: 101 data_type: Int64 > values: < int64_val: 42000 > values: < int64_val: 42001 > values: < int64_val: 42002 > values: < int64_val: 42003 > values: < int64_val: 42004 > > > query_info: < topk: 5 round_decimal: -1 metric_type: "JACCARD" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0" >)"; // create place_holder_group int num_queries = 5; int topK = 5; auto raw_group = CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); auto status = CreateSearchPlanByExpr(collection, binary_plan.data(), binary_plan.size(), &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; CSearchResult c_search_result_on_smallIndex; auto res_before_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_smallIndex); assert(res_before_load_index.error_code == Success); // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, DIM, N); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } auto search_result_on_raw_index = (SearchResult*)c_search_result_on_smallIndex; search_result_on_raw_index->seg_offsets_ = vec_ids; search_result_on_raw_index->distances_ = vec_dis; auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "BIN_IVF_FLAT"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "JACCARD"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); std::vector results; results.push_back(c_search_result_on_bigIndex); auto slice_nqs = std::vector{num_queries}; auto slice_topKs = std::vector{topK}; CSearchResultDataBlobs cSearchResultData; status = ReduceSearchResultsAndFillData(&cSearchResultData, plan, results.data(), results.size(), slice_nqs.data(), slice_topKs.data(), slice_nqs.size()); assert(status.error_code == Success); // status = ReduceSearchResultsAndFillData(plan, results.data(), results.size()); // assert(status.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { ASSERT_EQ(search_result_on_bigIndex->topk_per_nq_prefix_sum_.size(), search_result_on_bigIndex->total_nq_ + 1); auto offset = search_result_on_bigIndex->topk_per_nq_prefix_sum_[i]; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 42000 + i); ASSERT_EQ(search_result_on_bigIndex->distances_[offset], search_result_on_raw_index->distances_[i * TOPK]); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_smallIndex); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); DeleteSearchResultDataBlobs(cSearchResultData); } TEST(CApiTest, SealedSegmentTest) { auto collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(collection, Sealed, -1); int N = 10000; std::default_random_engine e(67); auto ages = std::vector(N); for (auto& age : ages) { age = e() % 2000; } auto blob = (void*)(&ages[0]); FieldMeta field_meta(FieldName("age"), FieldId(101), DataType::INT64); auto array = CreateScalarDataArrayFrom(ages.data(), N, field_meta); auto age_data = serialize(array.get()); auto load_info = CLoadFieldDataInfo{101, age_data.data(), age_data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, SealedSegment_search_float_Predicate_Range) { constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Sealed, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM; auto counter_col = dataset.get_col(FieldId(101)); FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64); auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta); auto counter_data = serialize(count_array.get()); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); auto row_ids_data = serialize(row_ids_array.get()); FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta); auto timestamps_data = serialize(timestamps_array.get()); const char* dsl_string = R"({ "bool": { "must": [ { "range": { "counter": { "GE": 42000, "LT": 42010 } } }, { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 5, "round_decimal": -1 } } } ] } })"; // create place_holder_group int num_queries = 10; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto status = CreateSearchPlan(collection, dsl_string, &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); auto load_index_info = (LoadIndexInfo*)c_load_index_info; auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); SearchInfo search_info; search_info.topk_ = TOPK; search_info.metric_type_ = knowhere::metric::L2; search_info.search_params_ = generate_search_conf(IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2); auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); EXPECT_EQ(result_on_index->distances_.size(), num_queries * TOPK); auto c_counter_field_data = CLoadFieldDataInfo{ 101, counter_data.data(), counter_data.size(), N, }; status = LoadFieldData(segment, c_counter_field_data); assert(status.error_code == Success); auto c_id_field_data = CLoadFieldDataInfo{ 0, row_ids_data.data(), row_ids_data.size(), N, }; status = LoadFieldData(segment, c_id_field_data); assert(status.error_code == Success); auto c_ts_field_data = CLoadFieldDataInfo{ 1, timestamps_data.data(), timestamps_data.size(), N, }; status = LoadFieldData(segment, c_ts_field_data); assert(status.error_code == Success); // load index for vec field, load raw data for scalar filed auto sealed_segment = SealedCreator(schema, dataset); sealed_segment->DropFieldData(FieldId(100)); sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info); CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 42000 + i); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, SealedSegment_search_without_predicates) { constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Sealed, -1); auto N = ROW_COUNT; uint64_t ts_offset = 1000; auto dataset = DataGen(schema, N, ts_offset); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM; auto vec_array = dataset.get_col(FieldId(100)); auto vec_data = serialize(vec_array.get()); auto counter_col = dataset.get_col(FieldId(101)); FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64); auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta); auto counter_data = serialize(count_array.get()); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); auto row_ids_data = serialize(row_ids_array.get()); FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta); auto timestamps_data = serialize(timestamps_array.get()); const char* dsl_string = R"( { "bool": { "vector": { "fakevec": { "metric_type": "L2", "params": { "nprobe": 10 }, "query": "$0", "topk": 5, "round_decimal": -1 } } } })"; auto c_vec_field_data = CLoadFieldDataInfo{ 100, vec_data.data(), vec_data.size(), N, }; auto status = LoadFieldData(segment, c_vec_field_data); assert(status.error_code == Success); auto c_counter_field_data = CLoadFieldDataInfo{ 101, counter_data.data(), counter_data.size(), N, }; status = LoadFieldData(segment, c_counter_field_data); assert(status.error_code == Success); auto c_id_field_data = CLoadFieldDataInfo{ 0, row_ids_data.data(), row_ids_data.size(), N, }; status = LoadFieldData(segment, c_id_field_data); assert(status.error_code == Success); auto c_ts_field_data = CLoadFieldDataInfo{ 1, timestamps_data.data(), timestamps_data.size(), N, }; status = LoadFieldData(segment, c_ts_field_data); assert(status.error_code == Success); int num_queries = 10; auto blob = generate_query_data(num_queries); void* plan = nullptr; status = CreateSearchPlan(collection, dsl_string, &plan); ASSERT_EQ(status.error_code, Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); ASSERT_EQ(status.error_code, Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); CSearchResult search_result; auto res = Search(segment, plan, placeholderGroup, N + ts_offset, &search_result); std::cout << res.error_msg << std::endl; ASSERT_EQ(res.error_code, Success); CSearchResult search_result2; auto res2 = Search(segment, plan, placeholderGroup, ts_offset, &search_result2); ASSERT_EQ(res2.error_code, Success); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(search_result); DeleteSearchResult(search_result2); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) { constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema("L2", DIM, false); auto collection = NewCollection(schema_string.c_str()); auto schema = ((segcore::Collection*)collection)->get_schema(); auto segment = NewSegment(collection, Sealed, -1); auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); auto query_ptr = vec_col.data() + 42000 * DIM; auto counter_col = dataset.get_col(FieldId(101)); FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64); auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta); auto counter_data = serialize(count_array.get()); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); auto row_ids_data = serialize(row_ids_array.get()); FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta); auto timestamps_data = serialize(timestamps_array.get()); const char* serialized_expr_plan = R"(vector_anns: < field_id: 100 predicates: < binary_expr: < op: LogicalAnd left: < unary_range_expr: < column_info: < field_id: 101 data_type: Int64 > op: GreaterEqual value: < int64_val: 42000 > > > right: < unary_range_expr: < column_info: < field_id: 101 data_type: Int64 > op: LessThan value: < int64_val: 42010 > > > > > query_info: < topk: 5 round_decimal: -1 metric_type: "L2" search_params: "{\"nprobe\": 10}" > placeholder_tag: "$0" >)"; // create place_holder_group int num_queries = 10; auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index void* plan = nullptr; auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan); auto status = CreateSearchPlanByExpr(collection, binary_plan.data(), binary_plan.size(), &plan); assert(status.error_code == Success); void* placeholderGroup = nullptr; status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); assert(status.error_code == Success); std::vector placeholderGroups; placeholderGroups.push_back(placeholderGroup); Timestamp time = 10000000; // load index to segment auto indexing = generate_index(vec_col.data(), DataType::VECTOR_FLOAT, knowhere::metric::L2, IndexEnum::INDEX_FAISS_IVFPQ, DIM, N); auto binary_set = indexing->Serialize(milvus::Config{}); void* c_load_index_info = nullptr; status = NewLoadIndexInfo(&c_load_index_info, c_storage_config); assert(status.error_code == Success); std::string index_type_key = "index_type"; std::string index_type_value = "IVF_PQ"; std::string index_mode_key = "index_mode"; std::string index_mode_value = "CPU"; std::string metric_type_key = "metric_type"; std::string metric_type_value = "L2"; AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); AppendFieldInfo(c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector); AppendIndex(c_load_index_info, (CBinarySet)&binary_set); // load vec index status = UpdateSealedSegmentIndex(segment, c_load_index_info); assert(status.error_code == Success); // load raw data auto c_counter_field_data = CLoadFieldDataInfo{ 101, counter_data.data(), counter_data.size(), N, }; status = LoadFieldData(segment, c_counter_field_data); assert(status.error_code == Success); auto c_id_field_data = CLoadFieldDataInfo{ 0, row_ids_data.data(), row_ids_data.size(), N, }; status = LoadFieldData(segment, c_id_field_data); assert(status.error_code == Success); auto c_ts_field_data = CLoadFieldDataInfo{ 1, timestamps_data.data(), timestamps_data.size(), N, }; status = LoadFieldData(segment, c_ts_field_data); assert(status.error_code == Success); // gen query dataset auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; auto result_on_index = vec_index->Query(query_dataset, search_info, nullptr); auto ids = result_on_index->seg_offsets_.data(); auto dis = result_on_index->distances_.data(); std::vector vec_ids(ids, ids + TOPK * num_queries); std::vector vec_dis; for (int j = 0; j < TOPK * num_queries; ++j) { vec_dis.push_back(dis[j] * -1); } CSearchResult c_search_result_on_bigIndex; auto res_after_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_bigIndex); assert(res_after_load_index.error_code == Success); auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex; for (int i = 0; i < num_queries; ++i) { auto offset = i * TOPK; ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], 42000 + i); } DeleteLoadIndexInfo(c_load_index_info); DeleteSearchPlan(plan); DeletePlaceholderGroup(placeholderGroup); DeleteSearchResult(c_search_result_on_bigIndex); DeleteCollection(collection); DeleteSegment(segment); } TEST(CApiTest, RetriveScalarFieldFromSealedSegmentWithIndex) { auto schema = std::make_shared(); auto i8_fid = schema->AddDebugField("age8", DataType::INT8); auto i16_fid = schema->AddDebugField("age16", DataType::INT16); auto i32_fid = schema->AddDebugField("age32", DataType::INT32); auto i64_fid = schema->AddDebugField("age64", DataType::INT64); auto float_fid = schema->AddDebugField("age_float", DataType::FLOAT); auto double_fid = schema->AddDebugField("age_double", DataType::DOUBLE); schema->set_primary_field_id(i64_fid); auto segment = CreateSealedSegment(schema).release(); int N = ROW_COUNT; auto raw_data = DataGen(schema, N); LoadIndexInfo load_index_info; // load timestamp field FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto ts_array = CreateScalarDataArrayFrom(raw_data.timestamps_.data(), N, ts_field_meta); auto ts_data = serialize(ts_array.get()); auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.data(), ts_data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); assert(count == N); // load rowid field FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_id_array = CreateScalarDataArrayFrom(raw_data.row_ids_.data(), N, row_id_field_meta); auto row_id_data = serialize(row_id_array.get()); load_info = CLoadFieldDataInfo{RowFieldID.get(), row_id_data.data(), row_id_data.size(), N}; res = LoadFieldData(segment, load_info); assert(res.error_code == Success); count = GetRowCount(segment); assert(count == N); // load index for int8 field auto age8_col = raw_data.get_col(i8_fid); GenScalarIndexing(N, age8_col.data()); auto age8_index = milvus::index::CreateScalarIndexSort(); age8_index->Build(N, age8_col.data()); load_index_info.field_id = i8_fid.get(); load_index_info.field_type = DataType::INT8; load_index_info.index = std::move(age8_index); segment->LoadIndex(load_index_info); // load index for 16 field auto age16_col = raw_data.get_col(i16_fid); GenScalarIndexing(N, age16_col.data()); auto age16_index = milvus::index::CreateScalarIndexSort(); age16_index->Build(N, age16_col.data()); load_index_info.field_id = i16_fid.get(); load_index_info.field_type = DataType::INT16; load_index_info.index = std::move(age16_index); segment->LoadIndex(load_index_info); // load index for int32 field auto age32_col = raw_data.get_col(i32_fid); GenScalarIndexing(N, age32_col.data()); auto age32_index = milvus::index::CreateScalarIndexSort(); age32_index->Build(N, age32_col.data()); load_index_info.field_id = i32_fid.get(); load_index_info.field_type = DataType::INT32; load_index_info.index = std::move(age32_index); segment->LoadIndex(load_index_info); // load index for int64 field auto age64_col = raw_data.get_col(i64_fid); GenScalarIndexing(N, age64_col.data()); auto age64_index = milvus::index::CreateScalarIndexSort(); age64_index->Build(N, age64_col.data()); load_index_info.field_id = i64_fid.get(); load_index_info.field_type = DataType::INT64; load_index_info.index = std::move(age64_index); segment->LoadIndex(load_index_info); // load index for float field auto age_float_col = raw_data.get_col(float_fid); GenScalarIndexing(N, age_float_col.data()); auto age_float_index = milvus::index::CreateScalarIndexSort(); age_float_index->Build(N, age_float_col.data()); load_index_info.field_id = float_fid.get(); load_index_info.field_type = DataType::FLOAT; load_index_info.index = std::move(age_float_index); segment->LoadIndex(load_index_info); // load index for double field auto age_double_col = raw_data.get_col(double_fid); GenScalarIndexing(N, age_double_col.data()); auto age_double_index = milvus::index::CreateScalarIndexSort(); age_double_index->Build(N, age_double_col.data()); load_index_info.field_id = double_fid.get(); load_index_info.field_type = DataType::FLOAT; load_index_info.index = std::move(age_double_index); segment->LoadIndex(load_index_info); // create retrieve plan auto plan = std::make_unique(*schema); plan->plan_node_ = std::make_unique(); std::vector retrive_row_ids = {age64_col[0]}; auto term_expr = std::make_unique>(i64_fid, DataType::INT64, retrive_row_ids); plan->plan_node_->predicate_ = std::move(term_expr); std::vector target_field_ids; // retrieve value target_field_ids = {i8_fid, i16_fid, i32_fid, i64_fid, float_fid, double_fid}; plan->field_ids_ = target_field_ids; CRetrieveResult retrieve_result; res = Retrieve(segment, plan.get(), raw_data.timestamps_[N - 1], &retrieve_result); ASSERT_EQ(res.error_code, Success); auto query_result = std::make_unique(); auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size); ASSERT_TRUE(suc); ASSERT_EQ(query_result->fields_data().size(), 6); auto fields_data = query_result->fields_data(); for (auto iter = fields_data.begin(); iter < fields_data.end(); ++iter) { switch (iter->type()) { case proto::schema::DataType::Int8: { ASSERT_EQ(iter->scalars().int_data().data(0), age8_col[0]); break; } case proto::schema::DataType::Int16: { ASSERT_EQ(iter->scalars().int_data().data(0), age16_col[0]); break; } case proto::schema::DataType::Int32: { ASSERT_EQ(iter->scalars().int_data().data(0), age32_col[0]); break; } case proto::schema::DataType::Int64: { ASSERT_EQ(iter->scalars().long_data().data(0), age64_col[0]); break; } case proto::schema::DataType::Float: { ASSERT_EQ(iter->scalars().float_data().data(0), age_float_col[0]); break; } case proto::schema::DataType::Double: { ASSERT_EQ(iter->scalars().double_data().data(0), age_double_col[0]); break; } default: { PanicInfo("not supported type"); } } } DeleteRetrievePlan(plan.release()); DeleteRetrieveResult(&retrieve_result); DeleteSegment(segment); }