Fix cpplint (#22657)

Signed-off-by: longjiquan <jiquan.long@zilliz.com>
pull/22681/head
Jiquan Long 2023-03-10 09:47:54 +08:00 committed by GitHub
parent bcd316a44e
commit a36fefb009
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 2571 additions and 1081 deletions

View File

@ -32,8 +32,8 @@ class BitsetView : public knowhere::BitsetView {
BitsetView() = default;
~BitsetView() = default;
BitsetView(const std::nullptr_t value)
: knowhere::BitsetView(value) { // NOLINT
BitsetView(const std::nullptr_t value) // NOLINT
: knowhere::BitsetView(value) { // NOLINT
}
BitsetView(const uint8_t* data, size_t num_bits)

View File

@ -11,10 +11,12 @@
#include <queue>
#include <vector>
#include "common/Utils.h"
#include <functional>
#include <iostream>
#include "common/Utils.h"
#include "common/RangeSearchHelper.h"
namespace milvus {
namespace {
using ResultPair = std::pair<float, int64_t>;

View File

@ -17,8 +17,10 @@
#include <sys/mman.h>
#include <filesystem>
#include <memory>
#include <string>
#include <string_view>
#include <vector>
#include "common/Consts.h"
#include "common/FieldMeta.h"

View File

@ -111,7 +111,7 @@ get_thread_starttime() {
snprintf(filename,
sizeof(filename),
"/proc/%lld/task/%lld/stat",
(long long)pid,
(long long)pid, // NOLINT, TODO: How to solve this?
(long long)tid); // NOLINT
int64_t val = 0;

View File

@ -176,8 +176,9 @@ class ConcurrentVectorImpl : public VectorBase {
auto& chunk = get_chunk(chunk_id);
if constexpr (is_scalar) {
return Span<TraitType>(chunk.data(), chunk.size());
} else if constexpr (std::is_same_v<Type, int64_t> ||
} else if constexpr (std::is_same_v<Type, int64_t> || // NOLINT
std::is_same_v<Type, int>) {
// TODO: where should the braces be placed?
// only for testing
PanicInfo("unimplemented");
} else {

View File

@ -20,7 +20,7 @@
#include "TimestampIndex.h"
#include "common/Schema.h"
#include "easylogging++.h"
#include "easyloggingpp/easylogging++.h"
#include "segcore/AckResponder.h"
#include "segcore/ConcurrentVector.h"
#include "segcore/Record.h"

View File

@ -38,7 +38,7 @@ class SegmentGrowing : public SegmentInternalInterface {
const Timestamp* timestamps,
const InsertData* insert_data) = 0;
virtual SegmentType
SegmentType
type() const override {
return SegmentType::Growing;
}

View File

@ -34,7 +34,7 @@ class SegmentSealed : public SegmentInternalInterface {
virtual void
DropFieldData(const FieldId field_id) = 0;
virtual SegmentType
SegmentType
type() const override {
return SegmentType::Sealed;
}

View File

@ -1,9 +1,22 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include <sys/mman.h>
#include <string_view>
#include <vector>
#include <string>
#include <utility>
#include "common/LoadInfo.h"

View File

@ -23,9 +23,21 @@
#include "velox/exec/tests/utils/TempDirectoryPath.h"
#include "velox/vector/BaseVector.h"
using namespace facebook::velox;
using namespace facebook::velox::dwio::common;
using namespace facebook::velox::dwrf;
namespace filesystems = facebook::velox::filesystems;
namespace dwio = facebook::velox::dwio;
namespace dwrf = facebook::velox::dwrf;
using facebook::velox::LocalReadFile;
using facebook::velox::RowVector;
using facebook::velox::vector_size_t;
using facebook::velox::VectorPtr;
using dwio::common::BufferedInput;
using dwio::common::FileFormat;
using dwio::common::ReaderOptions;
using dwio::common::RowReaderOptions;
using dwrf::DwrfReader;
// A temporary program that reads from ORC file and prints its content
// Used to compare the ORC data read by DWRFReader against apache-orc repo.

View File

@ -47,13 +47,16 @@ IndexBuilder_build(benchmark::State& state) {
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::tie(type_params, index_params) =
generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
ok = google::protobuf::TextFormat::PrintToString(type_params,
&type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto is_binary = state.range(2);
@ -63,7 +66,9 @@ IndexBuilder_build(benchmark::State& state) {
for (auto _ : state) {
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str(),
milvus::DataType::VECTOR_FLOAT,
type_params_str.c_str(),
index_params_str.c_str(),
get_default_storage_config());
index->Build(xb_dataset);
}
@ -77,13 +82,16 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::tie(type_params, index_params) =
generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
ok = google::protobuf::TextFormat::PrintToString(type_params,
&type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto is_binary = state.range(2);
@ -93,7 +101,9 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
for (auto _ : state) {
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str(),
milvus::DataType::VECTOR_FLOAT,
type_params_str.c_str(),
index_params_str.c_str(),
get_default_storage_config());
index->Build(xb_dataset);

View File

@ -24,7 +24,8 @@ static int dim = 768;
const auto schema = []() {
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
return schema;
@ -56,7 +57,8 @@ const auto plan = [] {
auto ph_group = [] {
auto num_queries = 10;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
return ph_group;
}();
@ -79,7 +81,11 @@ Search_SmallIndex(benchmark::State& state) {
segment->disable_small_index();
}
segment->PreInsert(N);
segment->Insert(0, N, dataset_.row_ids_.data(), dataset_.timestamps_.data(), dataset_.raw_);
segment->Insert(0,
N,
dataset_.row_ids_.data(),
dataset_.timestamps_.data(),
dataset_.raw_);
Timestamp time = 10000000;
@ -88,7 +94,9 @@ Search_SmallIndex(benchmark::State& state) {
}
}
BENCHMARK(Search_SmallIndex)->MinTime(5)->ArgsProduct({{true, false}, {8, 16, 32}});
BENCHMARK(Search_SmallIndex)
->MinTime(5)
->ArgsProduct({{true, false}, {8, 16, 32}});
static void
Search_Sealed(benchmark::State& state) {

View File

@ -25,9 +25,13 @@ using namespace milvus::query;
namespace {
auto
GenFloatVecs(int dim, int n, const knowhere::MetricType& metric, int seed = 42) {
GenFloatVecs(int dim,
int n,
const knowhere::MetricType& metric,
int seed = 42) {
auto schema = std::make_shared<Schema>();
auto fvec = schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, dim, metric);
auto fvec =
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, dim, metric);
auto dataset = DataGen(schema, n, seed);
return dataset.get_col<float>(fvec);
}
@ -98,7 +102,8 @@ AssertMatch(const std::vector<int>& ref, const int64_t* ans) {
bool
is_supported_float_metric(const std::string& metric) {
return milvus::IsMetricType(metric, knowhere::metric::L2) || milvus::IsMetricType(metric, knowhere::metric::IP);
return milvus::IsMetricType(metric, knowhere::metric::L2) ||
milvus::IsMetricType(metric, knowhere::metric::IP);
}
} // namespace
@ -106,7 +111,11 @@ is_supported_float_metric(const std::string& metric) {
class TestFloatSearchBruteForce : public ::testing::Test {
public:
void
Run(int nb, int nq, int topk, int dim, const knowhere::MetricType& metric_type) {
Run(int nb,
int nq,
int topk,
int dim,
const knowhere::MetricType& metric_type) {
auto bitset = std::make_shared<BitsetType>();
bitset->resize(nb);
auto bitset_view = BitsetView(*bitset);
@ -114,15 +123,22 @@ class TestFloatSearchBruteForce : public ::testing::Test {
auto base = GenFloatVecs(dim, nb, metric_type);
auto query = GenFloatVecs(dim, nq, metric_type);
dataset::SearchDataset dataset{metric_type, nq, topk, -1, dim, query.data()};
dataset::SearchDataset dataset{
metric_type, nq, topk, -1, dim, query.data()};
if (!is_supported_float_metric(metric_type)) {
// Memory leak in knowhere.
// ASSERT_ANY_THROW(BruteForceSearch(dataset, base.data(), nb, bitset_view));
return;
}
auto result = BruteForceSearch(dataset, base.data(), nb, knowhere::Json(), bitset_view);
auto result = BruteForceSearch(
dataset, base.data(), nb, knowhere::Json(), bitset_view);
for (int i = 0; i < nq; i++) {
auto ref = Ref(base.data(), query.data() + i * dim, nb, dim, topk, metric_type);
auto ref = Ref(base.data(),
query.data() + i * dim,
nb,
dim,
topk,
metric_type);
auto ans = result.get_seg_offsets() + i * topk;
AssertMatch(ref, ans);
}

View File

@ -18,11 +18,16 @@ using namespace milvus::segcore;
TEST(Binary, Insert) {
int64_t N = 100000;
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, knowhere::metric::JACCARD);
auto vec_fid = schema->AddDebugField(
"vecbin", DataType::VECTOR_BINARY, 128, knowhere::metric::JACCARD);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
auto dataset = DataGen(schema, N, 10);
auto segment = CreateGrowingSegment(schema);
auto offset = segment->PreInsert(N);
segment->Insert(offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
}

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,9 @@ using namespace milvus;
TEST(storage, InsertDataFloat) {
std::vector<float> data = {1, 2, 3, 4, 5};
storage::Payload payload{storage::DataType::FLOAT, reinterpret_cast<const uint8_t*>(data.data()), int(data.size())};
storage::Payload payload{storage::DataType::FLOAT,
reinterpret_cast<const uint8_t*>(data.data()),
int(data.size())};
auto field_data = std::make_shared<storage::FieldData>(payload);
storage::InsertData insert_data(field_data);
@ -35,23 +37,29 @@ TEST(storage, InsertDataFloat) {
insert_data.SetTimestamps(0, 100);
auto serialized_bytes = insert_data.Serialize(storage::StorageType::Remote);
auto new_insert_data = storage::DeserializeFileData(reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
serialized_bytes.size());
auto new_insert_data = storage::DeserializeFileData(
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
serialized_bytes.size());
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
ASSERT_EQ(new_insert_data->GetTimeRage(), std::make_pair(Timestamp(0), Timestamp(100)));
ASSERT_EQ(new_insert_data->GetTimeRage(),
std::make_pair(Timestamp(0), Timestamp(100)));
auto new_payload = new_insert_data->GetPayload();
ASSERT_EQ(new_payload->data_type, storage::DataType::FLOAT);
ASSERT_EQ(new_payload->rows, data.size());
std::vector<float> new_data(data.size());
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(float));
memcpy(new_data.data(),
new_payload->raw_data,
new_payload->rows * sizeof(float));
ASSERT_EQ(data, new_data);
}
TEST(storage, InsertDataVectorFloat) {
std::vector<float> data = {1, 2, 3, 4, 5, 6, 7, 8};
int DIM = 2;
storage::Payload payload{storage::DataType::VECTOR_FLOAT, reinterpret_cast<const uint8_t*>(data.data()),
int(data.size()) / DIM, DIM};
storage::Payload payload{storage::DataType::VECTOR_FLOAT,
reinterpret_cast<const uint8_t*>(data.data()),
int(data.size()) / DIM,
DIM};
auto field_data = std::make_shared<storage::FieldData>(payload);
storage::InsertData insert_data(field_data);
@ -60,56 +68,72 @@ TEST(storage, InsertDataVectorFloat) {
insert_data.SetTimestamps(0, 100);
auto serialized_bytes = insert_data.Serialize(storage::StorageType::Remote);
auto new_insert_data = storage::DeserializeFileData(reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
serialized_bytes.size());
auto new_insert_data = storage::DeserializeFileData(
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
serialized_bytes.size());
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
ASSERT_EQ(new_insert_data->GetTimeRage(), std::make_pair(Timestamp(0), Timestamp(100)));
ASSERT_EQ(new_insert_data->GetTimeRage(),
std::make_pair(Timestamp(0), Timestamp(100)));
auto new_payload = new_insert_data->GetPayload();
ASSERT_EQ(new_payload->data_type, storage::DataType::VECTOR_FLOAT);
ASSERT_EQ(new_payload->rows, data.size() / DIM);
std::vector<float> new_data(data.size());
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(float) * DIM);
memcpy(new_data.data(),
new_payload->raw_data,
new_payload->rows * sizeof(float) * DIM);
ASSERT_EQ(data, new_data);
}
TEST(storage, LocalInsertDataVectorFloat) {
std::vector<float> data = {1, 2, 3, 4, 5, 6, 7, 8};
int DIM = 2;
storage::Payload payload{storage::DataType::VECTOR_FLOAT, reinterpret_cast<const uint8_t*>(data.data()),
int(data.size()) / DIM, DIM};
storage::Payload payload{storage::DataType::VECTOR_FLOAT,
reinterpret_cast<const uint8_t*>(data.data()),
int(data.size()) / DIM,
DIM};
auto field_data = std::make_shared<storage::FieldData>(payload);
storage::InsertData insert_data(field_data);
storage::FieldDataMeta field_data_meta{100, 101, 102, 103};
insert_data.SetFieldDataMeta(field_data_meta);
auto serialized_bytes = insert_data.Serialize(storage::StorageType::LocalDisk);
auto new_insert_data =
storage::DeserializeLocalInsertFileData(reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
serialized_bytes.size(), storage::DataType::VECTOR_FLOAT);
auto serialized_bytes =
insert_data.Serialize(storage::StorageType::LocalDisk);
auto new_insert_data = storage::DeserializeLocalInsertFileData(
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
serialized_bytes.size(),
storage::DataType::VECTOR_FLOAT);
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
auto new_payload = new_insert_data->GetPayload();
ASSERT_EQ(new_payload->data_type, storage::DataType::VECTOR_FLOAT);
ASSERT_EQ(new_payload->rows, data.size() / DIM);
std::vector<float> new_data(data.size());
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(float) * DIM);
memcpy(new_data.data(),
new_payload->raw_data,
new_payload->rows * sizeof(float) * DIM);
ASSERT_EQ(data, new_data);
}
TEST(storage, LocalIndexData) {
std::vector<uint8_t> data = {1, 2, 3, 4, 5, 6, 7, 8};
storage::Payload payload{storage::DataType::INT8, reinterpret_cast<const uint8_t*>(data.data()), int(data.size())};
storage::Payload payload{storage::DataType::INT8,
reinterpret_cast<const uint8_t*>(data.data()),
int(data.size())};
auto field_data = std::make_shared<storage::FieldData>(payload);
storage::IndexData indexData_data(field_data);
auto serialized_bytes = indexData_data.Serialize(storage::StorageType::LocalDisk);
auto serialized_bytes =
indexData_data.Serialize(storage::StorageType::LocalDisk);
auto new_index_data = storage::DeserializeLocalIndexFileData(
reinterpret_cast<const uint8_t*>(serialized_bytes.data()), serialized_bytes.size());
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
serialized_bytes.size());
ASSERT_EQ(new_index_data->GetCodecType(), storage::IndexDataType);
auto new_payload = new_index_data->GetPayload();
ASSERT_EQ(new_payload->data_type, storage::DataType::INT8);
ASSERT_EQ(new_payload->rows, data.size());
std::vector<uint8_t> new_data(data.size());
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(uint8_t));
memcpy(new_data.data(),
new_payload->raw_data,
new_payload->rows * sizeof(uint8_t));
ASSERT_EQ(data, new_data);
}

View File

@ -71,13 +71,15 @@ TEST_F(DiskAnnFileManagerTest, AddFilePositive) {
IndexMeta index_meta = {3, 100, 1000, 1, "index"};
int64_t slice_size = milvus::index_file_slice_size << 20;
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(filed_data_meta, index_meta, storage_config_);
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(
filed_data_meta, index_meta, storage_config_);
auto ok = diskAnnFileManager->AddFile(indexFilePath);
EXPECT_EQ(ok, true);
auto remote_files_to_size = diskAnnFileManager->GetRemotePathsToFileSize();
auto num_slice = index_size / slice_size;
EXPECT_EQ(remote_files_to_size.size(), index_size % slice_size == 0 ? num_slice : num_slice + 1);
EXPECT_EQ(remote_files_to_size.size(),
index_size % slice_size == 0 ? num_slice : num_slice + 1);
std::vector<std::string> remote_files;
for (auto& file2size : remote_files_to_size) {
@ -125,13 +127,15 @@ TEST_F(DiskAnnFileManagerTest, AddFilePositiveParallel) {
IndexMeta index_meta = {3, 100, 1000, 1, "index"};
int64_t slice_size = milvus::index_file_slice_size << 20;
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(filed_data_meta, index_meta, storage_config_);
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(
filed_data_meta, index_meta, storage_config_);
auto ok = diskAnnFileManager->AddFile(indexFilePath);
EXPECT_EQ(ok, true);
auto remote_files_to_size = diskAnnFileManager->GetRemotePathsToFileSize();
auto num_slice = index_size / slice_size;
EXPECT_EQ(remote_files_to_size.size(), index_size % slice_size == 0 ? num_slice : num_slice + 1);
EXPECT_EQ(remote_files_to_size.size(),
index_size % slice_size == 0 ? num_slice : num_slice + 1);
std::vector<std::string> remote_files;
for (auto& file2size : remote_files_to_size) {
@ -169,14 +173,16 @@ TEST_F(DiskAnnFileManagerTest, TestThreadPool) {
std::vector<std::future<int>> futures;
auto start = chrono::system_clock::now();
for (int i = 0; i < 100; i++) {
futures.push_back(thread_pool->Submit(test_worker, "test_id" + std::to_string(i)));
futures.push_back(
thread_pool->Submit(test_worker, "test_id" + std::to_string(i)));
}
for (auto& future : futures) {
EXPECT_EQ(future.get(), 1);
}
auto end = chrono::system_clock::now();
auto duration = chrono::duration_cast<chrono::microseconds>(end - start);
auto second = double(duration.count()) * chrono::microseconds::period::num / chrono::microseconds::period::den;
auto second = double(duration.count()) * chrono::microseconds::period::num /
chrono::microseconds::period::den;
EXPECT_LT(second, 4 * 100);
}
@ -193,7 +199,8 @@ TEST_F(DiskAnnFileManagerTest, TestThreadPoolException) {
auto thread_pool = new milvus::ThreadPool(50);
std::vector<std::future<int>> futures;
for (int i = 0; i < 100; i++) {
futures.push_back(thread_pool->Submit(test_exception, "test_id" + std::to_string(i)));
futures.push_back(thread_pool->Submit(
test_exception, "test_id" + std::to_string(i)));
}
for (auto& future : futures) {
future.get();

View File

@ -97,11 +97,13 @@ TEST(Expr, Range) {
}
})";
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::INT32);
auto plan = CreatePlan(*schema, dsl_string);
ShowPlanNodeVisitor shower;
Assert(plan->tag2field_.at("$0") == schema->get_field_id(FieldName("fakevec")));
Assert(plan->tag2field_.at("$0") ==
schema->get_field_id(FieldName("fakevec")));
auto out = shower.call_child(*plan->plan_node_);
std::cout << out.dump(4);
}
@ -139,11 +141,13 @@ TEST(Expr, RangeBinary) {
}
})";
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
schema->AddDebugField(
"fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
schema->AddDebugField("age", DataType::INT32);
auto plan = CreatePlan(*schema, dsl_string);
ShowPlanNodeVisitor shower;
Assert(plan->tag2field_.at("$0") == schema->get_field_id(FieldName("fakevec")));
Assert(plan->tag2field_.at("$0") ==
schema->get_field_id(FieldName("fakevec")));
auto out = shower.call_child(*plan->plan_node_);
std::cout << out.dump(4);
}
@ -181,7 +185,8 @@ TEST(Expr, InvalidRange) {
}
})";
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::INT32);
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
}
@ -219,7 +224,8 @@ TEST(Expr, InvalidDSL) {
})";
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::INT32);
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
}
@ -230,7 +236,8 @@ TEST(Expr, ShowExecutor) {
auto node = std::make_unique<FloatVectorANNS>();
auto schema = std::make_shared<Schema>();
auto metric_type = knowhere::metric::L2;
auto field_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
auto field_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
int64_t num_queries = 100L;
auto raw_data = DataGen(schema, num_queries);
auto& info = node->search_info_;
@ -251,10 +258,14 @@ TEST(Expr, TestRange) {
using namespace milvus::query;
using namespace milvus::segcore;
std::vector<std::tuple<std::string, std::function<bool(int)>>> testcases = {
{R"("GT": 2000, "LT": 3000)", [](int v) { return 2000 < v && v < 3000; }},
{R"("GE": 2000, "LT": 3000)", [](int v) { return 2000 <= v && v < 3000; }},
{R"("GT": 2000, "LE": 3000)", [](int v) { return 2000 < v && v <= 3000; }},
{R"("GE": 2000, "LE": 3000)", [](int v) { return 2000 <= v && v <= 3000; }},
{R"("GT": 2000, "LT": 3000)",
[](int v) { return 2000 < v && v < 3000; }},
{R"("GE": 2000, "LT": 3000)",
[](int v) { return 2000 <= v && v < 3000; }},
{R"("GT": 2000, "LE": 3000)",
[](int v) { return 2000 < v && v <= 3000; }},
{R"("GE": 2000, "LE": 3000)",
[](int v) { return 2000 <= v && v <= 3000; }},
{R"("GE": 2000)", [](int v) { return v >= 2000; }},
{R"("GT": 2000)", [](int v) { return v > 2000; }},
{R"("LE": 2000)", [](int v) { return v <= 2000; }},
@ -290,7 +301,8 @@ TEST(Expr, TestRange) {
}
})";
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -303,11 +315,16 @@ TEST(Expr, TestRange) {
auto new_age_col = raw_data.get_col<int>(i64_fid);
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (auto [clause, ref_func] : testcases) {
auto loc = dsl_string_tmp.find("@@@@");
auto dsl_string = dsl_string_tmp;
@ -373,7 +390,8 @@ TEST(Expr, TestTerm) {
}
})";
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -386,11 +404,16 @@ TEST(Expr, TestTerm) {
auto new_age_col = raw_data.get_col<int>(i64_fid);
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (auto [clause, ref_func] : testcases) {
auto loc = dsl_string_tmp.find("@@@@");
auto dsl_string = dsl_string_tmp;
@ -445,36 +468,45 @@ TEST(Expr, TestSimpleDsl) {
std::vector<std::tuple<Json, std::function<bool(int)>>> testcases;
{
Json dsl;
dsl["must"] = Json::array({vec_dsl, get_item(0), get_item(1), get_item(2, 0), get_item(3)});
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
dsl["must"] = Json::array(
{vec_dsl, get_item(0), get_item(1), get_item(2, 0), get_item(3)});
testcases.emplace_back(
dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
}
{
Json dsl;
Json sub_dsl;
sub_dsl["must"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
sub_dsl["must"] = Json::array(
{get_item(0), get_item(1), get_item(2, 0), get_item(3)});
dsl["must"] = Json::array({sub_dsl, vec_dsl});
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
testcases.emplace_back(
dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
}
{
Json dsl;
Json sub_dsl;
sub_dsl["should"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
sub_dsl["should"] = Json::array(
{get_item(0), get_item(1), get_item(2, 0), get_item(3)});
dsl["must"] = Json::array({sub_dsl, vec_dsl});
testcases.emplace_back(dsl, [](int64_t x) { return !!((x & 0b1111) ^ 0b0100); });
testcases.emplace_back(
dsl, [](int64_t x) { return !!((x & 0b1111) ^ 0b0100); });
}
{
Json dsl;
Json sub_dsl;
sub_dsl["must_not"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
sub_dsl["must_not"] = Json::array(
{get_item(0), get_item(1), get_item(2, 0), get_item(3)});
dsl["must"] = Json::array({sub_dsl, vec_dsl});
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) != 0b1011; });
testcases.emplace_back(
dsl, [](int64_t x) { return (x & 0b1111) != 0b1011; });
}
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -486,11 +518,16 @@ TEST(Expr, TestSimpleDsl) {
auto new_age_col = raw_data.get_col<int64_t>(i64_fid);
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (auto [clause, ref_func] : testcases) {
Json dsl;
dsl["bool"] = clause;
@ -511,11 +548,15 @@ TEST(Expr, TestSimpleDsl) {
TEST(Expr, TestCompare) {
using namespace milvus::query;
using namespace milvus::segcore;
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>> testcases = {
{R"("LT")", [](int a, int64_t b) { return a < b; }}, {R"("LE")", [](int a, int64_t b) { return a <= b; }},
{R"("GT")", [](int a, int64_t b) { return a > b; }}, {R"("GE")", [](int a, int64_t b) { return a >= b; }},
{R"("EQ")", [](int a, int64_t b) { return a == b; }}, {R"("NE")", [](int a, int64_t b) { return a != b; }},
};
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>>
testcases = {
{R"("LT")", [](int a, int64_t b) { return a < b; }},
{R"("LE")", [](int a, int64_t b) { return a <= b; }},
{R"("GT")", [](int a, int64_t b) { return a > b; }},
{R"("GE")", [](int a, int64_t b) { return a >= b; }},
{R"("EQ")", [](int a, int64_t b) { return a == b; }},
{R"("NE")", [](int a, int64_t b) { return a != b; }},
};
std::string dsl_string_tpl = R"({
"bool": {
@ -545,7 +586,8 @@ TEST(Expr, TestCompare) {
}
})";
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i32_fid = schema->AddDebugField("age1", DataType::INT32);
auto i64_fid = schema->AddDebugField("age2", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -559,14 +601,21 @@ TEST(Expr, TestCompare) {
auto raw_data = DataGen(schema, N, iter);
auto new_age1_col = raw_data.get_col<int>(i32_fid);
auto new_age2_col = raw_data.get_col<int64_t>(i64_fid);
age1_col.insert(age1_col.end(), new_age1_col.begin(), new_age1_col.end());
age2_col.insert(age2_col.end(), new_age2_col.begin(), new_age2_col.end());
age1_col.insert(
age1_col.end(), new_age1_col.begin(), new_age1_col.end());
age2_col.insert(
age2_col.end(), new_age2_col.begin(), new_age2_col.end());
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (auto [clause, ref_func] : testcases) {
auto dsl_string = boost::str(boost::format(dsl_string_tpl) % clause);
auto plan = CreatePlan(*schema, dsl_string);
@ -580,7 +629,8 @@ TEST(Expr, TestCompare) {
auto val1 = age1_col[i];
auto val2 = age2_col[i];
auto ref = ref_func(val1, val2);
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!"
<< boost::format("[%1%, %2%]") % val1 % val2;
}
}
}
@ -588,14 +638,15 @@ TEST(Expr, TestCompare) {
TEST(Expr, TestCompareWithScalarIndex) {
using namespace milvus::query;
using namespace milvus::segcore;
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>> testcases = {
{R"(LessThan)", [](int a, int64_t b) { return a < b; }},
{R"(LessEqual)", [](int a, int64_t b) { return a <= b; }},
{R"(GreaterThan)", [](int a, int64_t b) { return a > b; }},
{R"(GreaterEqual)", [](int a, int64_t b) { return a >= b; }},
{R"(Equal)", [](int a, int64_t b) { return a == b; }},
{R"(NotEqual)", [](int a, int64_t b) { return a != b; }},
};
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>>
testcases = {
{R"(LessThan)", [](int a, int64_t b) { return a < b; }},
{R"(LessEqual)", [](int a, int64_t b) { return a <= b; }},
{R"(GreaterThan)", [](int a, int64_t b) { return a > b; }},
{R"(GreaterEqual)", [](int a, int64_t b) { return a >= b; }},
{R"(Equal)", [](int a, int64_t b) { return a == b; }},
{R"(NotEqual)", [](int a, int64_t b) { return a != b; }},
};
std::string serialized_expr_plan = R"(vector_anns: <
field_id: %1%
@ -622,7 +673,8 @@ TEST(Expr, TestCompareWithScalarIndex) {
>)";
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -656,11 +708,14 @@ TEST(Expr, TestCompareWithScalarIndex) {
ExecExprVisitor visitor(*seg, seg->get_row_count(), MAX_TIMESTAMP);
for (auto [clause, ref_func] : testcases) {
auto dsl_string = boost::format(serialized_expr_plan) % vec_fid.get() % clause % i32_fid.get() %
proto::schema::DataType_Name(int(DataType::INT32)) % i64_fid.get() %
proto::schema::DataType_Name(int(DataType::INT64));
auto binary_plan = translate_text_plan_to_binary_plan(dsl_string.str().data());
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
auto dsl_string =
boost::format(serialized_expr_plan) % vec_fid.get() % clause %
i32_fid.get() % proto::schema::DataType_Name(int(DataType::INT32)) %
i64_fid.get() % proto::schema::DataType_Name(int(DataType::INT64));
auto binary_plan =
translate_text_plan_to_binary_plan(dsl_string.str().data());
auto plan = CreateSearchPlanByExpr(
*schema, binary_plan.data(), binary_plan.size());
// std::cout << ShowPlanNodeVisitor().call_child(*plan->plan_node_) << std::endl;
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N);
@ -670,7 +725,8 @@ TEST(Expr, TestCompareWithScalarIndex) {
auto val1 = age32_col[i];
auto val2 = age64_col[i];
auto ref = ref_func(val1, val2);
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!"
<< boost::format("[%1%, %2%]") % val1 % val2;
}
}
}
@ -678,14 +734,22 @@ TEST(Expr, TestCompareWithScalarIndex) {
TEST(Expr, TestCompareWithScalarIndexMaris) {
using namespace milvus::query;
using namespace milvus::segcore;
std::vector<std::tuple<std::string, std::function<bool(std::string, std::string)>>> testcases = {
{R"(LessThan)", [](std::string a, std::string b) { return a.compare(b) < 0; }},
{R"(LessEqual)", [](std::string a, std::string b) { return a.compare(b) <= 0; }},
{R"(GreaterThan)", [](std::string a, std::string b) { return a.compare(b) > 0; }},
{R"(GreaterEqual)", [](std::string a, std::string b) { return a.compare(b) >= 0; }},
{R"(Equal)", [](std::string a, std::string b) { return a.compare(b) == 0; }},
{R"(NotEqual)", [](std::string a, std::string b) { return a.compare(b) != 0; }},
};
std::vector<
std::tuple<std::string, std::function<bool(std::string, std::string)>>>
testcases = {
{R"(LessThan)",
[](std::string a, std::string b) { return a.compare(b) < 0; }},
{R"(LessEqual)",
[](std::string a, std::string b) { return a.compare(b) <= 0; }},
{R"(GreaterThan)",
[](std::string a, std::string b) { return a.compare(b) > 0; }},
{R"(GreaterEqual)",
[](std::string a, std::string b) { return a.compare(b) >= 0; }},
{R"(Equal)",
[](std::string a, std::string b) { return a.compare(b) == 0; }},
{R"(NotEqual)",
[](std::string a, std::string b) { return a.compare(b) != 0; }},
};
const char* serialized_expr_plan = R"(vector_anns: <
field_id: %1%
@ -712,7 +776,8 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
>)";
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto str1_fid = schema->AddDebugField("string1", DataType::VARCHAR);
auto str2_fid = schema->AddDebugField("string2", DataType::VARCHAR);
schema->set_primary_field_id(str1_fid);
@ -744,10 +809,12 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
ExecExprVisitor visitor(*seg, seg->get_row_count(), MAX_TIMESTAMP);
for (auto [clause, ref_func] : testcases) {
auto dsl_string =
boost::format(serialized_expr_plan) % vec_fid.get() % clause % str1_fid.get() % str2_fid.get();
auto binary_plan = translate_text_plan_to_binary_plan(dsl_string.str().data());
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
auto dsl_string = boost::format(serialized_expr_plan) % vec_fid.get() %
clause % str1_fid.get() % str2_fid.get();
auto binary_plan =
translate_text_plan_to_binary_plan(dsl_string.str().data());
auto plan = CreateSearchPlanByExpr(
*schema, binary_plan.data(), binary_plan.size());
// std::cout << ShowPlanNodeVisitor().call_child(*plan->plan_node_) << std::endl;
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N);
@ -757,7 +824,8 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
auto val1 = str1_col[i];
auto val2 = str2_col[i];
auto ref = ref_func(val1, val2);
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!"
<< boost::format("[%1%, %2%]") % val1 % val2;
}
}
}
@ -765,101 +833,115 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
TEST(Expr, TestBinaryArithOpEvalRange) {
using namespace milvus::query;
using namespace milvus::segcore;
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>> testcases = {
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
{R"("EQ": {
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>>
testcases = {
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
{R"("EQ": {
"ADD": {
"right_operand": 4,
"value": 8
}
})",
[](int8_t v) { return (v + 4) == 8; }, DataType::INT8},
{R"("EQ": {
[](int8_t v) { return (v + 4) == 8; },
DataType::INT8},
{R"("EQ": {
"SUB": {
"right_operand": 500,
"value": 1500
}
})",
[](int16_t v) { return (v - 500) == 1500; }, DataType::INT16},
{R"("EQ": {
[](int16_t v) { return (v - 500) == 1500; },
DataType::INT16},
{R"("EQ": {
"MUL": {
"right_operand": 2,
"value": 4000
}
})",
[](int32_t v) { return (v * 2) == 4000; }, DataType::INT32},
{R"("EQ": {
[](int32_t v) { return (v * 2) == 4000; },
DataType::INT32},
{R"("EQ": {
"DIV": {
"right_operand": 2,
"value": 1000
}
})",
[](int64_t v) { return (v / 2) == 1000; }, DataType::INT64},
{R"("EQ": {
[](int64_t v) { return (v / 2) == 1000; },
DataType::INT64},
{R"("EQ": {
"MOD": {
"right_operand": 100,
"value": 0
}
})",
[](int32_t v) { return (v % 100) == 0; }, DataType::INT32},
{R"("EQ": {
[](int32_t v) { return (v % 100) == 0; },
DataType::INT32},
{R"("EQ": {
"ADD": {
"right_operand": 500,
"value": 2500
}
})",
[](float v) { return (v + 500) == 2500; }, DataType::FLOAT},
{R"("EQ": {
[](float v) { return (v + 500) == 2500; },
DataType::FLOAT},
{R"("EQ": {
"ADD": {
"right_operand": 500,
"value": 2500
}
})",
[](double v) { return (v + 500) == 2500; }, DataType::DOUBLE},
// Add test cases for BinaryArithOpEvalRangeExpr NE of various data types
{R"("NE": {
[](double v) { return (v + 500) == 2500; },
DataType::DOUBLE},
// Add test cases for BinaryArithOpEvalRangeExpr NE of various data types
{R"("NE": {
"ADD": {
"right_operand": 500,
"value": 2500
}
})",
[](float v) { return (v + 500) != 2500; }, DataType::FLOAT},
{R"("NE": {
[](float v) { return (v + 500) != 2500; },
DataType::FLOAT},
{R"("NE": {
"SUB": {
"right_operand": 500,
"value": 2500
}
})",
[](double v) { return (v - 500) != 2500; }, DataType::DOUBLE},
{R"("NE": {
[](double v) { return (v - 500) != 2500; },
DataType::DOUBLE},
{R"("NE": {
"MUL": {
"right_operand": 2,
"value": 2
}
})",
[](int8_t v) { return (v * 2) != 2; }, DataType::INT8},
{R"("NE": {
[](int8_t v) { return (v * 2) != 2; },
DataType::INT8},
{R"("NE": {
"DIV": {
"right_operand": 2,
"value": 1000
}
})",
[](int16_t v) { return (v / 2) != 1000; }, DataType::INT16},
{R"("NE": {
[](int16_t v) { return (v / 2) != 1000; },
DataType::INT16},
{R"("NE": {
"MOD": {
"right_operand": 100,
"value": 0
}
})",
[](int32_t v) { return (v % 100) != 0; }, DataType::INT32},
{R"("NE": {
[](int32_t v) { return (v % 100) != 0; },
DataType::INT32},
{R"("NE": {
"ADD": {
"right_operand": 500,
"value": 2500
}
})",
[](int64_t v) { return (v + 500) != 2500; }, DataType::INT64},
};
[](int64_t v) { return (v + 500) != 2500; },
DataType::INT64},
};
std::string dsl_string_tmp = R"({
"bool": {
@ -917,7 +999,8 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
})";
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
@ -945,19 +1028,32 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
auto new_age_float_col = raw_data.get_col<float>(float_fid);
auto new_age_double_col = raw_data.get_col<double>(double_fid);
age8_col.insert(age8_col.end(), new_age8_col.begin(), new_age8_col.end());
age16_col.insert(age16_col.end(), new_age16_col.begin(), new_age16_col.end());
age32_col.insert(age32_col.end(), new_age32_col.begin(), new_age32_col.end());
age64_col.insert(age64_col.end(), new_age64_col.begin(), new_age64_col.end());
age_float_col.insert(age_float_col.end(), new_age_float_col.begin(), new_age_float_col.end());
age_double_col.insert(age_double_col.end(), new_age_double_col.begin(), new_age_double_col.end());
age8_col.insert(
age8_col.end(), new_age8_col.begin(), new_age8_col.end());
age16_col.insert(
age16_col.end(), new_age16_col.begin(), new_age16_col.end());
age32_col.insert(
age32_col.end(), new_age32_col.begin(), new_age32_col.end());
age64_col.insert(
age64_col.end(), new_age64_col.begin(), new_age64_col.end());
age_float_col.insert(age_float_col.end(),
new_age_float_col.begin(),
new_age_float_col.end());
age_double_col.insert(age_double_col.end(),
new_age_double_col.begin(),
new_age_double_col.end());
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (auto [clause, ref_func, dtype] : testcases) {
auto loc = dsl_string_tmp.find("@@@@@");
auto dsl_string = dsl_string_tmp;
@ -1026,28 +1122,32 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
"value": 2500.00
}
})",
"Assert \"(value.is_number_integer())\"", DataType::INT32},
"Assert \"(value.is_number_integer())\"",
DataType::INT32},
{R"("EQ": {
"ADD": {
"right_operand": 500.0,
"value": 2500
}
})",
"Assert \"(right_operand.is_number_integer())\"", DataType::INT32},
"Assert \"(right_operand.is_number_integer())\"",
DataType::INT32},
{R"("EQ": {
"ADD": {
"right_operand": 500.0,
"value": true
}
})",
"Assert \"(value.is_number())\"", DataType::FLOAT},
"Assert \"(value.is_number())\"",
DataType::FLOAT},
{R"("EQ": {
"ADD": {
"right_operand": "500",
"value": 2500.0
}
})",
"Assert \"(right_operand.is_number())\"", DataType::FLOAT},
"Assert \"(right_operand.is_number())\"",
DataType::FLOAT},
// Check unsupported arithmetic operator type
{R"("EQ": {
"EXP": {
@ -1055,7 +1155,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
"value": 2500
}
})",
"arith op(exp) not found", DataType::INT32},
"arith op(exp) not found",
DataType::INT32},
// Check unsupported data type
{R"("EQ": {
"ADD": {
@ -1063,7 +1164,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
"value": false
}
})",
"bool type is not supported", DataType::BOOL},
"bool type is not supported",
DataType::BOOL},
};
std::string dsl_string_tmp = R"({
@ -1107,7 +1209,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
})";
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::INT32);
schema->AddDebugField("FloatN", DataType::FLOAT);
schema->AddDebugField("BoolField", DataType::BOOL);
@ -1130,12 +1233,14 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
try {
auto plan = CreatePlan(*schema, dsl_string);
FAIL() << "Expected AssertionError: " << assert_info << " not thrown";
FAIL() << "Expected AssertionError: " << assert_info
<< " not thrown";
} catch (const std::exception& err) {
std::string err_msg = err.what();
ASSERT_TRUE(err_msg.find(assert_info) != std::string::npos);
} catch (...) {
FAIL() << "Expected AssertionError: " << assert_info << " not thrown";
FAIL() << "Expected AssertionError: " << assert_info
<< " not thrown";
}
}
}
@ -1143,9 +1248,10 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
using namespace milvus::query;
using namespace milvus::segcore;
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>> testcases = {
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
{R"(arith_op: Add
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>>
testcases = {
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
{R"(arith_op: Add
right_operand: <
int64_val: 4
>
@ -1153,8 +1259,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 8
>)",
[](int8_t v) { return (v + 4) == 8; }, DataType::INT8},
{R"(arith_op: Sub
[](int8_t v) { return (v + 4) == 8; },
DataType::INT8},
{R"(arith_op: Sub
right_operand: <
int64_val: 500
>
@ -1162,8 +1269,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 1500
>)",
[](int16_t v) { return (v - 500) == 1500; }, DataType::INT16},
{R"(arith_op: Mul
[](int16_t v) { return (v - 500) == 1500; },
DataType::INT16},
{R"(arith_op: Mul
right_operand: <
int64_val: 2
>
@ -1171,8 +1279,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 4000
>)",
[](int32_t v) { return (v * 2) == 4000; }, DataType::INT32},
{R"(arith_op: Div
[](int32_t v) { return (v * 2) == 4000; },
DataType::INT32},
{R"(arith_op: Div
right_operand: <
int64_val: 2
>
@ -1180,8 +1289,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 1000
>)",
[](int64_t v) { return (v / 2) == 1000; }, DataType::INT64},
{R"(arith_op: Mod
[](int64_t v) { return (v / 2) == 1000; },
DataType::INT64},
{R"(arith_op: Mod
right_operand: <
int64_val: 100
>
@ -1189,8 +1299,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 0
>)",
[](int32_t v) { return (v % 100) == 0; }, DataType::INT32},
{R"(arith_op: Add
[](int32_t v) { return (v % 100) == 0; },
DataType::INT32},
{R"(arith_op: Add
right_operand: <
float_val: 500
>
@ -1198,8 +1309,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
float_val: 2500
>)",
[](float v) { return (v + 500) == 2500; }, DataType::FLOAT},
{R"(arith_op: Add
[](float v) { return (v + 500) == 2500; },
DataType::FLOAT},
{R"(arith_op: Add
right_operand: <
float_val: 500
>
@ -1207,8 +1319,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
float_val: 2500
>)",
[](double v) { return (v + 500) == 2500; }, DataType::DOUBLE},
{R"(arith_op: Add
[](double v) { return (v + 500) == 2500; },
DataType::DOUBLE},
{R"(arith_op: Add
right_operand: <
float_val: 500
>
@ -1216,8 +1329,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
float_val: 2000
>)",
[](float v) { return (v + 500) != 2000; }, DataType::FLOAT},
{R"(arith_op: Sub
[](float v) { return (v + 500) != 2000; },
DataType::FLOAT},
{R"(arith_op: Sub
right_operand: <
float_val: 500
>
@ -1225,8 +1339,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
float_val: 2500
>)",
[](double v) { return (v - 500) != 2000; }, DataType::DOUBLE},
{R"(arith_op: Mul
[](double v) { return (v - 500) != 2000; },
DataType::DOUBLE},
{R"(arith_op: Mul
right_operand: <
int64_val: 2
>
@ -1234,8 +1349,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 2
>)",
[](int8_t v) { return (v * 2) != 2; }, DataType::INT8},
{R"(arith_op: Div
[](int8_t v) { return (v * 2) != 2; },
DataType::INT8},
{R"(arith_op: Div
right_operand: <
int64_val: 2
>
@ -1243,8 +1359,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 2000
>)",
[](int16_t v) { return (v / 2) != 2000; }, DataType::INT16},
{R"(arith_op: Mod
[](int16_t v) { return (v / 2) != 2000; },
DataType::INT16},
{R"(arith_op: Mod
right_operand: <
int64_val: 100
>
@ -1252,8 +1369,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 1
>)",
[](int32_t v) { return (v % 100) != 1; }, DataType::INT32},
{R"(arith_op: Add
[](int32_t v) { return (v % 100) != 1; },
DataType::INT32},
{R"(arith_op: Add
right_operand: <
int64_val: 500
>
@ -1261,8 +1379,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
value: <
int64_val: 2000
>)",
[](int64_t v) { return (v + 500) != 2000; }, DataType::INT64},
};
[](int64_t v) { return (v + 500) != 2000; },
DataType::INT64},
};
std::string serialized_expr_plan = R"(vector_anns: <
field_id: %1%
@ -1288,7 +1407,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
@@@@)";
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
@ -1369,7 +1489,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
seg->LoadIndex(load_index_info);
auto seg_promote = dynamic_cast<SegmentSealedImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
int offset = 0;
for (auto [clause, ref_func, dtype] : testcases) {
auto loc = serialized_expr_plan.find("@@@@@");
@ -1400,8 +1521,10 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
ASSERT_TRUE(false) << "No test case defined for this data type";
}
auto binary_plan = translate_text_plan_to_binary_plan(expr.str().data());
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
auto binary_plan =
translate_text_plan_to_binary_plan(expr.str().data());
auto plan = CreateSearchPlanByExpr(
*schema, binary_plan.data(), binary_plan.size());
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N);

View File

@ -51,7 +51,11 @@ TEST(Growing, RealCount) {
ASSERT_EQ(offset, 0);
auto dataset = DataGen(schema, c);
auto pks = dataset.get_col<int64_t>(pk);
segment->Insert(offset, c, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(offset,
c,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
// no delete.
ASSERT_EQ(c, segment->get_real_count());
@ -62,7 +66,8 @@ TEST(Growing, RealCount) {
ASSERT_EQ(del_offset1, 0);
auto del_ids1 = GenPKs(pks.begin(), pks.begin() + half);
auto del_tss1 = GenTss(half, c);
auto status = segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
auto status =
segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
ASSERT_TRUE(status.ok());
ASSERT_EQ(c - half, segment->get_real_count());
@ -70,7 +75,8 @@ TEST(Growing, RealCount) {
auto del_offset2 = segment->PreDelete(half);
ASSERT_EQ(del_offset2, half);
auto del_tss2 = GenTss(half, c + half);
status = segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
status =
segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
ASSERT_TRUE(status.ok());
ASSERT_EQ(c - half, segment->get_real_count());

View File

@ -27,12 +27,15 @@ TEST(FloatVecIndex, All) {
auto metric_type = knowhere::metric::L2;
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::tie(type_params, index_params) =
generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
ok = google::protobuf::TextFormat::PrintToString(type_params,
&type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, false);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
@ -44,7 +47,11 @@ TEST(FloatVecIndex, All) {
CIndex copy_index;
{
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
@ -56,7 +63,11 @@ TEST(FloatVecIndex, All) {
ASSERT_EQ(Success, status.error_code);
}
{
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &copy_index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&copy_index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
@ -79,12 +90,15 @@ TEST(BinaryVecIndex, All) {
auto metric_type = knowhere::metric::JACCARD;
indexcgo::TypeParams type_params;
indexcgo::IndexParams index_params;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::tie(type_params, index_params) =
generate_params(index_type, metric_type);
std::string type_params_str, index_params_str;
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
ok = google::protobuf::TextFormat::PrintToString(type_params,
&type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, true);
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
@ -96,7 +110,11 @@ TEST(BinaryVecIndex, All) {
CIndex copy_index;
{
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
@ -108,7 +126,11 @@ TEST(BinaryVecIndex, All) {
ASSERT_EQ(Success, status.error_code);
}
{
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &copy_index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&copy_index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
@ -149,11 +171,16 @@ TEST(CBoolIndexTest, All) {
CIndex copy_index;
{
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
status = BuildScalarIndex(index, half_ds->GetRows(), half_ds->GetTensor());
status = BuildScalarIndex(
index, half_ds->GetRows(), half_ds->GetTensor());
ASSERT_EQ(Success, status.error_code);
}
{
@ -161,8 +188,11 @@ TEST(CBoolIndexTest, All) {
ASSERT_EQ(Success, status.error_code);
}
{
status =
CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &copy_index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&copy_index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
@ -201,7 +231,11 @@ TEST(CInt64IndexTest, All) {
CIndex copy_index;
{
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
@ -213,8 +247,11 @@ TEST(CInt64IndexTest, All) {
ASSERT_EQ(Success, status.error_code);
}
{
status =
CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &copy_index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&copy_index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
@ -255,11 +292,16 @@ TEST(CStringIndexTest, All) {
CIndex copy_index;
{
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{
status = BuildScalarIndex(index, (str_ds->GetRows()), (str_ds->GetTensor()));
status = BuildScalarIndex(
index, (str_ds->GetRows()), (str_ds->GetTensor()));
ASSERT_EQ(Success, status.error_code);
}
{
@ -267,8 +309,11 @@ TEST(CStringIndexTest, All) {
ASSERT_EQ(Success, status.error_code);
}
{
status =
CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &copy_index, c_storage_config);
status = CreateIndex(dtype,
type_params_str.c_str(),
index_params_str.c_str(),
&copy_index,
c_storage_config);
ASSERT_EQ(Success, status.error_code);
}
{

View File

@ -34,11 +34,14 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
auto param = GetParam();
index_type = param.first;
metric_type = param.second;
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
std::tie(type_params, index_params) =
generate_params(index_type, metric_type);
bool ok;
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
ok = google::protobuf::TextFormat::PrintToString(type_params,
&type_params_str);
assert(ok);
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
search_conf = generate_search_conf(index_type, metric_type);
@ -65,11 +68,13 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
if (!is_binary) {
xb_data = dataset.get_col<float>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
xq_dataset = knowhere::GenDataSet(
NQ, DIM, xb_data.data() + DIM * query_offset);
} else {
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
xq_dataset = knowhere::GenDataSet(
NQ, DIM, xb_bin_data.data() + DIM * query_offset);
}
}
@ -97,19 +102,28 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
INSTANTIATE_TEST_CASE_P(
IndexTypeParameters,
IndexWrapperTest,
::testing::Values(std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::TANIMOTO),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
::testing::Values(
std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
knowhere::metric::TANIMOTO),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
TEST_P(IndexWrapperTest, BuildAndQuery) {
auto index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
vec_field_data_type, type_params_str.c_str(), index_params_str.c_str(), storage_config_);
vec_field_data_type,
type_params_str.c_str(),
index_params_str.c_str(),
storage_config_);
auto dataset = GenDataset(NB, metric_type, is_binary);
knowhere::DataSetPtr xb_dataset;
@ -125,9 +139,14 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
ASSERT_NO_THROW(index->Build(xb_dataset));
auto binary_set = index->Serialize();
auto copy_index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
vec_field_data_type, type_params_str.c_str(), index_params_str.c_str(), storage_config_);
auto vec_index = static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
auto copy_index =
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
vec_field_data_type,
type_params_str.c_str(),
index_params_str.c_str(),
storage_config_);
auto vec_index =
static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
ASSERT_EQ(vec_index->dim(), DIM);
ASSERT_NO_THROW(vec_index->Load(binary_set));

View File

@ -149,7 +149,8 @@ TEST(Indexing, BinaryBruteForce) {
auto metric_type = knowhere::metric::JACCARD;
auto result_count = topk * num_queries;
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, dim, metric_type);
auto vec_fid = schema->AddDebugField(
"vecbin", DataType::VECTOR_BINARY, dim, metric_type);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
auto dataset = DataGen(schema, N, 10);
auto bin_vec = dataset.get_col<uint8_t>(vec_fid);
@ -163,7 +164,8 @@ TEST(Indexing, BinaryBruteForce) {
query_data //
};
auto sub_result = query::BruteForceSearch(search_dataset, bin_vec.data(), N, knowhere::Json(), nullptr);
auto sub_result = query::BruteForceSearch(
search_dataset, bin_vec.data(), N, knowhere::Json(), nullptr);
SearchResult sr;
sr.total_nq_ = num_queries;
@ -222,7 +224,8 @@ TEST(Indexing, Naive) {
create_index_info.field_type = DataType::VECTOR_FLOAT;
create_index_info.metric_type = knowhere::metric::L2;
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, nullptr);
auto build_conf = knowhere::Json{
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
@ -276,7 +279,8 @@ TEST(Indexing, Naive) {
if (result->seg_offsets_[i] < N / 2) {
std::cout << "WRONG: ";
}
std::cout << result->seg_offsets_[i] << "->" << result->distances_[i] << std::endl;
std::cout << result->seg_offsets_[i] << "->" << result->distances_[i]
<< std::endl;
}
}
@ -319,11 +323,13 @@ class IndexTest : public ::testing::TestWithParam<Param> {
if (!is_binary) {
xb_data = dataset.get_col<float>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
xq_dataset = knowhere::GenDataSet(
NQ, DIM, xb_data.data() + DIM * query_offset);
} else {
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
xq_dataset = knowhere::GenDataSet(
NQ, DIM, xb_bin_data.data() + DIM * query_offset);
}
}
@ -351,19 +357,25 @@ class IndexTest : public ::testing::TestWithParam<Param> {
INSTANTIATE_TEST_CASE_P(
IndexTypeParameters,
IndexTest,
::testing::Values(std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::TANIMOTO),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
// ci ut not start minio, so not run ut about diskann index for now
// #ifdef BUILD_DISK_ANN
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
// #endif
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
::testing::Values(
std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
knowhere::metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
knowhere::metric::TANIMOTO),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
knowhere::metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
// ci ut not start minio, so not run ut about diskann index for now
// #ifdef BUILD_DISK_ANN
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
// #endif
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
TEST_P(IndexTest, BuildAndQuery) {
milvus::index::CreateIndexInfo create_index_info;
@ -376,11 +388,14 @@ TEST_P(IndexTest, BuildAndQuery) {
milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
milvus::storage::IndexMeta index_meta{3, 100, 1000, 1};
auto file_manager =
std::make_shared<milvus::storage::DiskFileManagerImpl>(field_data_meta, index_meta, storage_config_);
index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, file_manager);
std::make_shared<milvus::storage::DiskFileManagerImpl>(
field_data_meta, index_meta, storage_config_);
index = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, file_manager);
#endif
} else {
index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
index = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, nullptr);
}
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
milvus::index::IndexBasePtr new_index;
@ -394,8 +409,10 @@ TEST_P(IndexTest, BuildAndQuery) {
milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
milvus::storage::IndexMeta index_meta{3, 100, 1000, 1};
auto file_manager =
std::make_shared<milvus::storage::DiskFileManagerImpl>(field_data_meta, index_meta, storage_config_);
new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, file_manager);
std::make_shared<milvus::storage::DiskFileManagerImpl>(
field_data_meta, index_meta, storage_config_);
new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, file_manager);
vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());

View File

@ -30,7 +30,8 @@ class MinioChunkManagerTest : public testing::Test {
virtual void
SetUp() {
chunk_manager_ = std::make_unique<MinioChunkManager>(get_default_storage_config());
chunk_manager_ =
std::make_unique<MinioChunkManager>(get_default_storage_config());
}
protected:
@ -58,7 +59,8 @@ TEST_F(MinioChunkManagerTest, BucketNegtive) {
try {
chunk_manager_->CreateBucket(testBucketName);
} catch (S3ErrorException& e) {
EXPECT_TRUE(std::string(e.what()).find("BucketAlreadyOwnedByYou") != string::npos);
EXPECT_TRUE(std::string(e.what()).find("BucketAlreadyOwnedByYou") !=
string::npos);
}
chunk_manager_->DeleteBucket(testBucketName);
}

View File

@ -25,7 +25,8 @@ namespace wrapper = milvus::storage;
static void
WriteToFile(CBuffer cb) {
auto data_file = std::ofstream("/tmp/wrapper_test_data.dat", std::ios::binary);
auto data_file =
std::ofstream("/tmp/wrapper_test_data.dat", std::ios::binary);
data_file.write(cb.data, cb.length);
data_file.close();
}
@ -40,7 +41,8 @@ ReadFromFile() {
std::shared_ptr<arrow::Table> table;
std::unique_ptr<parquet::arrow::FileReader> reader;
auto st = parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader);
auto st =
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader);
if (!st.ok())
return nullptr;
st = reader->ReadTable(&table);
@ -64,12 +66,14 @@ TEST(storage, inoutstream) {
ASSERT_NE(table, nullptr);
auto os = std::make_shared<milvus::storage::PayloadOutputStream>();
st = parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), os, 1024);
st = parquet::arrow::WriteTable(
*table, arrow::default_memory_pool(), os, 1024);
ASSERT_TRUE(st.ok());
const uint8_t* buf = os->Buffer().data();
int64_t buf_size = os->Buffer().size();
auto is = std::make_shared<milvus::storage::PayloadInputStream>(buf, buf_size);
auto is =
std::make_shared<milvus::storage::PayloadInputStream>(buf, buf_size);
std::shared_ptr<arrow::Table> intable;
std::unique_ptr<parquet::arrow::FileReader> reader;
@ -104,7 +108,8 @@ TEST(storage, boolean) {
auto nums = GetPayloadLengthFromWriter(payload);
ASSERT_EQ(nums, 4);
auto reader = NewPayloadReader(int(milvus::DataType::BOOL), (uint8_t*)cb.data, cb.length);
auto reader = NewPayloadReader(
int(milvus::DataType::BOOL), (uint8_t*)cb.data, cb.length);
bool* values;
int length = GetPayloadLengthFromReader(reader);
ASSERT_EQ(length, 4);
@ -119,46 +124,77 @@ TEST(storage, boolean) {
ReleasePayloadReader(reader);
}
#define NUMERIC_TEST(TEST_NAME, COLUMN_TYPE, DATA_TYPE, ADD_FUNC, GET_FUNC, ARRAY_TYPE) \
TEST(wrapper, TEST_NAME) { \
auto payload = NewPayloadWriter(COLUMN_TYPE); \
DATA_TYPE data[] = {-1, 1, -100, 100}; \
\
auto st = ADD_FUNC(payload, data, 4); \
ASSERT_EQ(st.error_code, ErrorCode::Success); \
st = FinishPayloadWriter(payload); \
ASSERT_EQ(st.error_code, ErrorCode::Success); \
auto cb = GetPayloadBufferFromWriter(payload); \
ASSERT_GT(cb.length, 0); \
ASSERT_NE(cb.data, nullptr); \
auto nums = GetPayloadLengthFromWriter(payload); \
ASSERT_EQ(nums, 4); \
\
auto reader = NewPayloadReader(COLUMN_TYPE, (uint8_t*)cb.data, cb.length); \
DATA_TYPE* values; \
int length; \
st = GET_FUNC(reader, &values, &length); \
ASSERT_EQ(st.error_code, ErrorCode::Success); \
ASSERT_NE(values, nullptr); \
ASSERT_EQ(length, 4); \
length = GetPayloadLengthFromReader(reader); \
ASSERT_EQ(length, 4); \
\
for (int i = 0; i < length; i++) { \
ASSERT_EQ(data[i], values[i]); \
} \
\
ReleasePayloadWriter(payload); \
ReleasePayloadReader(reader); \
#define NUMERIC_TEST( \
TEST_NAME, COLUMN_TYPE, DATA_TYPE, ADD_FUNC, GET_FUNC, ARRAY_TYPE) \
TEST(wrapper, TEST_NAME) { \
auto payload = NewPayloadWriter(COLUMN_TYPE); \
DATA_TYPE data[] = {-1, 1, -100, 100}; \
\
auto st = ADD_FUNC(payload, data, 4); \
ASSERT_EQ(st.error_code, ErrorCode::Success); \
st = FinishPayloadWriter(payload); \
ASSERT_EQ(st.error_code, ErrorCode::Success); \
auto cb = GetPayloadBufferFromWriter(payload); \
ASSERT_GT(cb.length, 0); \
ASSERT_NE(cb.data, nullptr); \
auto nums = GetPayloadLengthFromWriter(payload); \
ASSERT_EQ(nums, 4); \
\
auto reader = \
NewPayloadReader(COLUMN_TYPE, (uint8_t*)cb.data, cb.length); \
DATA_TYPE* values; \
int length; \
st = GET_FUNC(reader, &values, &length); \
ASSERT_EQ(st.error_code, ErrorCode::Success); \
ASSERT_NE(values, nullptr); \
ASSERT_EQ(length, 4); \
length = GetPayloadLengthFromReader(reader); \
ASSERT_EQ(length, 4); \
\
for (int i = 0; i < length; i++) { \
ASSERT_EQ(data[i], values[i]); \
} \
\
ReleasePayloadWriter(payload); \
ReleasePayloadReader(reader); \
}
NUMERIC_TEST(int8, int(milvus::DataType::INT8), int8_t, AddInt8ToPayload, GetInt8FromPayload, arrow::Int8Array)
NUMERIC_TEST(int16, int(milvus::DataType::INT16), int16_t, AddInt16ToPayload, GetInt16FromPayload, arrow::Int16Array)
NUMERIC_TEST(int32, int(milvus::DataType::INT32), int32_t, AddInt32ToPayload, GetInt32FromPayload, arrow::Int32Array)
NUMERIC_TEST(int64, int(milvus::DataType::INT64), int64_t, AddInt64ToPayload, GetInt64FromPayload, arrow::Int64Array)
NUMERIC_TEST(float32, int(milvus::DataType::FLOAT), float, AddFloatToPayload, GetFloatFromPayload, arrow::FloatArray)
NUMERIC_TEST(
float64, int(milvus::DataType::DOUBLE), double, AddDoubleToPayload, GetDoubleFromPayload, arrow::DoubleArray)
NUMERIC_TEST(int8,
int(milvus::DataType::INT8),
int8_t,
AddInt8ToPayload,
GetInt8FromPayload,
arrow::Int8Array)
NUMERIC_TEST(int16,
int(milvus::DataType::INT16),
int16_t,
AddInt16ToPayload,
GetInt16FromPayload,
arrow::Int16Array)
NUMERIC_TEST(int32,
int(milvus::DataType::INT32),
int32_t,
AddInt32ToPayload,
GetInt32FromPayload,
arrow::Int32Array)
NUMERIC_TEST(int64,
int(milvus::DataType::INT64),
int64_t,
AddInt64ToPayload,
GetInt64FromPayload,
arrow::Int64Array)
NUMERIC_TEST(float32,
int(milvus::DataType::FLOAT),
float,
AddFloatToPayload,
GetFloatFromPayload,
arrow::FloatArray)
NUMERIC_TEST(float64,
int(milvus::DataType::DOUBLE),
double,
AddDoubleToPayload,
GetDoubleFromPayload,
arrow::DoubleArray)
TEST(storage, stringarray) {
auto payload = NewPayloadWriter(int(milvus::DataType::VARCHAR));
@ -179,7 +215,8 @@ TEST(storage, stringarray) {
auto nums = GetPayloadLengthFromWriter(payload);
ASSERT_EQ(nums, 3);
auto reader = NewPayloadReader(int(milvus::DataType::VARCHAR), (uint8_t*)cb.data, cb.length);
auto reader = NewPayloadReader(
int(milvus::DataType::VARCHAR), (uint8_t*)cb.data, cb.length);
int length = GetPayloadLengthFromReader(reader);
ASSERT_EQ(length, 3);
char *v0, *v1, *v2;
@ -214,7 +251,8 @@ TEST(storage, stringarray) {
TEST(storage, binary_vector) {
int DIM = 16;
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
auto payload =
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
uint8_t data[] = {0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8};
auto st = AddBinaryVectorToPayload(payload, data, 16, 4);
@ -227,7 +265,8 @@ TEST(storage, binary_vector) {
auto nums = GetPayloadLengthFromWriter(payload);
ASSERT_EQ(nums, 4);
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
auto reader = NewPayloadReader(
int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
uint8_t* values;
int length;
int dim;
@ -249,7 +288,8 @@ TEST(storage, binary_vector) {
TEST(storage, binary_vector_empty) {
int DIM = 16;
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
auto payload =
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
auto st = FinishPayloadWriter(payload);
ASSERT_EQ(st.error_code, ErrorCode::Success);
auto cb = GetPayloadBufferFromWriter(payload);
@ -257,7 +297,8 @@ TEST(storage, binary_vector_empty) {
// ASSERT_EQ(cb.data, nullptr);
auto nums = GetPayloadLengthFromWriter(payload);
ASSERT_EQ(nums, 0);
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
auto reader = NewPayloadReader(
int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
ASSERT_EQ(0, GetPayloadLengthFromReader(reader));
// ASSERT_EQ(reader, nullptr);
ReleasePayloadWriter(payload);
@ -266,7 +307,8 @@ TEST(storage, binary_vector_empty) {
TEST(storage, float_vector) {
int DIM = 2;
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
auto payload =
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
float data[] = {1, 2, 3, 4, 5, 6, 7, 8};
auto st = AddFloatVectorToPayload(payload, data, DIM, 4);
@ -279,7 +321,8 @@ TEST(storage, float_vector) {
auto nums = GetPayloadLengthFromWriter(payload);
ASSERT_EQ(nums, 4);
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
auto reader = NewPayloadReader(
int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
float* values;
int length;
int dim;
@ -301,7 +344,8 @@ TEST(storage, float_vector) {
TEST(storage, float_vector_empty) {
int DIM = 2;
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
auto payload =
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
auto st = FinishPayloadWriter(payload);
ASSERT_EQ(st.error_code, ErrorCode::Success);
auto cb = GetPayloadBufferFromWriter(payload);
@ -309,7 +353,8 @@ TEST(storage, float_vector_empty) {
// ASSERT_EQ(cb.data, nullptr);
auto nums = GetPayloadLengthFromWriter(payload);
ASSERT_EQ(nums, 0);
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
auto reader = NewPayloadReader(
int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
ASSERT_EQ(0, GetPayloadLengthFromReader(reader));
// ASSERT_EQ(reader, nullptr);
ReleasePayloadWriter(payload);

View File

@ -29,8 +29,12 @@ namespace spb = proto::schema;
static SchemaPtr
getStandardSchema() {
auto schema = std::make_shared<Schema>();
schema->AddDebugField("FloatVectorField", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("BinaryVectorField", DataType::VECTOR_BINARY, 16, knowhere::metric::JACCARD);
schema->AddDebugField(
"FloatVectorField", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("BinaryVectorField",
DataType::VECTOR_BINARY,
16,
knowhere::metric::JACCARD);
schema->AddDebugField("Int64Field", DataType::INT64);
schema->AddDebugField("Int32Field", DataType::INT32);
schema->AddDebugField("Int16Field", DataType::INT16);

View File

@ -37,7 +37,8 @@ TEST(Query, ShowExecutor) {
auto metric_type = knowhere::metric::L2;
auto node = std::make_unique<FloatVectorANNS>();
auto schema = std::make_shared<Schema>();
auto field_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
auto field_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
int64_t num_queries = 100L;
auto raw_data = DataGen(schema, num_queries);
auto& info = node->search_info_;
@ -79,7 +80,8 @@ TEST(Query, DSL) {
})";
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto plan = CreatePlan(*schema, dsl_string);
auto res = shower.call_child(*plan->plan_node_);
@ -126,7 +128,8 @@ TEST(Query, ParsePlaceholderGroup) {
})";
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto plan = CreatePlan(*schema, dsl_string);
int64_t num_queries = 100000;
int dim = 16;
@ -139,7 +142,8 @@ TEST(Query, ExecWithPredicateLoader) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::FLOAT);
auto counter_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(counter_fid);
@ -174,12 +178,17 @@ TEST(Query, ExecWithPredicateLoader) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -217,7 +226,8 @@ TEST(Query, ExecWithPredicateSmallN) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 7, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 7, knowhere::metric::L2);
schema->AddDebugField("age", DataType::FLOAT);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -252,12 +262,17 @@ TEST(Query, ExecWithPredicateSmallN) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 7, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -271,7 +286,8 @@ TEST(Query, ExecWithPredicate) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::FLOAT);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -306,12 +322,17 @@ TEST(Query, ExecWithPredicate) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -349,7 +370,8 @@ TEST(Query, ExecTerm) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::FLOAT);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -383,12 +405,17 @@ TEST(Query, ExecTerm) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 3;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -405,7 +432,8 @@ TEST(Query, ExecEmpty) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("age", DataType::FLOAT);
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
std::string dsl = R"({
"bool": {
"must": [
@ -430,7 +458,8 @@ TEST(Query, ExecEmpty) {
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -477,11 +506,16 @@ TEST(Query, ExecWithoutPredicateFlat) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -495,7 +529,8 @@ TEST(Query, ExecWithoutPredicate) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::FLOAT);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -523,11 +558,16 @@ TEST(Query, ExecWithoutPredicate) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -587,18 +627,25 @@ TEST(Query, InnerProduct) {
]
}
})";
auto vec_fid = schema->AddDebugField("normalized", DataType::VECTOR_FLOAT, dim, knowhere::metric::IP);
auto vec_fid = schema->AddDebugField(
"normalized", DataType::VECTOR_FLOAT, dim, knowhere::metric::IP);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
auto plan = CreatePlan(*schema, dsl);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto col = dataset.get_col<float>(vec_fid);
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, col.data());
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group_raw =
CreatePlaceholderGroupFromBlob(num_queries, 16, col.data());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp ts = N * 2;
auto sr = segment->Search(plan.get(), ph_group.get(), ts);
assert_order(*sr, "ip");
@ -651,15 +698,21 @@ TEST(Query, FillSegment) {
// dispatch here
int N = 100000;
auto dataset = DataGen(schema, N);
const auto std_vec = dataset.get_col<int64_t>(FieldId(101)); // ids field
const auto std_vfloat_vec = dataset.get_col<float>(FieldId(100)); // vector field
const auto std_i32_vec = dataset.get_col<int32_t>(FieldId(102)); // scalar field
const auto std_vec = dataset.get_col<int64_t>(FieldId(101)); // ids field
const auto std_vfloat_vec =
dataset.get_col<float>(FieldId(100)); // vector field
const auto std_i32_vec =
dataset.get_col<int32_t>(FieldId(102)); // scalar field
std::vector<std::unique_ptr<SegmentInternalInterface>> segments;
segments.emplace_back([&] {
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
return segment;
}());
segments.emplace_back([&] {
@ -708,8 +761,10 @@ TEST(Query, FillSegment) {
for (auto& segment : segments) {
plan->target_entries_.clear();
plan->target_entries_.push_back(schema->get_field_id(FieldName("fakevec")));
plan->target_entries_.push_back(schema->get_field_id(FieldName("the_value")));
plan->target_entries_.push_back(
schema->get_field_id(FieldName("fakevec")));
plan->target_entries_.push_back(
schema->get_field_id(FieldName("the_value")));
auto result = segment->Search(plan.get(), ph.get(), ts);
// std::cout << SearchResultToJson(result).dump(2);
result->result_offsets_.resize(topk * num_queries);
@ -723,11 +778,13 @@ TEST(Query, FillSegment) {
}
auto vec_field_id = schema->get_field_id(FieldName("fakevec"));
auto output_vec_field_data = fields_data.at(vec_field_id)->vectors().float_vector().data();
auto output_vec_field_data =
fields_data.at(vec_field_id)->vectors().float_vector().data();
ASSERT_EQ(output_vec_field_data.size(), topk * num_queries * dim);
auto i32_field_id = schema->get_field_id(FieldName("the_value"));
auto output_i32_field_data = fields_data.at(i32_field_id)->scalars().int_data().data();
auto output_i32_field_data =
fields_data.at(i32_field_id)->scalars().int_data().data();
ASSERT_EQ(output_i32_field_data.size(), topk * num_queries);
for (int i = 0; i < topk * num_queries; i++) {
@ -737,13 +794,17 @@ TEST(Query, FillSegment) {
auto std_val = std_vec[internal_offset];
auto std_i32 = std_i32_vec[internal_offset];
std::vector<float> std_vfloat(dim);
std::copy_n(std_vfloat_vec.begin() + dim * internal_offset, dim, std_vfloat.begin());
std::copy_n(std_vfloat_vec.begin() + dim * internal_offset,
dim,
std_vfloat.begin());
ASSERT_EQ(val, std_val) << "io:" << internal_offset;
if (val != -1) {
// check vector field
std::vector<float> vfloat(dim);
memcpy(vfloat.data(), &output_vec_field_data[i * dim], dim * sizeof(float));
memcpy(vfloat.data(),
&output_vec_field_data[i * dim],
dim * sizeof(float));
ASSERT_EQ(vfloat, std_vfloat);
// check int32 field
@ -759,7 +820,8 @@ TEST(Query, ExecWithPredicateBinary) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -794,13 +856,19 @@ TEST(Query, ExecWithPredicateBinary) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto vec_ptr = dataset.get_col<uint8_t>(vec_fid);
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreateBinaryPlaceholderGroupFromBlob(num_queries, 512, vec_ptr.data() + 1024 * 512 / 8);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group_raw = CreateBinaryPlaceholderGroupFromBlob(
num_queries, 512, vec_ptr.data() + 1024 * 512 / 8);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
auto sr = segment->Search(plan.get(), ph_group.get(), time);

View File

@ -32,7 +32,10 @@ cmp2(std::pair<float, int64_t> a, std::pair<float, int64_t> b) {
}
auto
RangeSearchSortResultBF(milvus::DatasetPtr data_set, int64_t topk, size_t nq, std::string metric_type) {
RangeSearchSortResultBF(milvus::DatasetPtr data_set,
int64_t topk,
size_t nq,
std::string metric_type) {
auto lims = milvus::GetDatasetLims(data_set);
auto id = milvus::GetDatasetIDs(data_set);
auto dist = milvus::GetDatasetDistance(data_set);
@ -69,7 +72,10 @@ RangeSearchSortResultBF(milvus::DatasetPtr data_set, int64_t topk, size_t nq, st
}
milvus::DatasetPtr
genResultDataset(const int64_t nq, const int64_t* ids, const float* distance, const size_t* lims) {
genResultDataset(const int64_t nq,
const int64_t* ids,
const float* distance,
const size_t* lims) {
auto ret_ds = std::make_shared<milvus::Dataset>();
ret_ds->SetRows(nq);
ret_ds->SetIds(ids);
@ -80,12 +86,17 @@ genResultDataset(const int64_t nq, const int64_t* ids, const float* distance, co
}
void
CheckRangeSearchSortResult(int64_t* p_id, float* p_dist, milvus::DatasetPtr dataset, int64_t n) {
CheckRangeSearchSortResult(int64_t* p_id,
float* p_dist,
milvus::DatasetPtr dataset,
int64_t n) {
auto id = milvus::GetDatasetIDs(dataset);
auto dist = milvus::GetDatasetDistance(dataset);
for (int i = 0; i < n; i++) {
AssertInfo(id[i] == p_id[i], "id of range search result are not the same");
AssertInfo(dist[i] == p_dist[i], "distance of range search result are not the same");
AssertInfo(id[i] == p_id[i],
"id of range search result are not the same");
AssertInfo(dist[i] == p_dist[i],
"distance of range search result are not the same");
}
}
@ -102,7 +113,8 @@ GenRangeSearchResult(int64_t* ids,
std::mt19937 e(seed);
std::uniform_int_distribution<> uniform_num(0, N);
std::uniform_int_distribution<> uniform_ids(id_min, id_max);
std::uniform_real_distribution<> uniform_distance(distance_min, distance_max);
std::uniform_real_distribution<> uniform_distance(distance_min,
distance_max);
lims = new size_t[N + 1];
// alloc max memory
@ -122,12 +134,14 @@ GenRangeSearchResult(int64_t* ids,
return genResultDataset(N, ids, distances, lims);
}
class RangeSearchSortTest : public ::testing::TestWithParam<knowhere::MetricType> {
class RangeSearchSortTest
: public ::testing::TestWithParam<knowhere::MetricType> {
protected:
void
SetUp() override {
metric_type = GetParam();
dataset = GenRangeSearchResult(ids, distances, lims, N, id_min, id_max, dist_min, dist_max);
dataset = GenRangeSearchResult(
ids, distances, lims, N, id_min, id_max, dist_min, dist_max);
}
void
@ -160,7 +174,8 @@ INSTANTIATE_TEST_CASE_P(RangeSearchSortParameters,
TEST_P(RangeSearchSortTest, CheckRangeSearchSort) {
auto res = milvus::SortRangeSearchResult(dataset, TOPK, N, metric_type);
auto [real_num, p_id, p_dist] = RangeSearchSortResultBF(dataset, TOPK, N, metric_type);
auto [real_num, p_id, p_dist] =
RangeSearchSortResultBF(dataset, TOPK, N, metric_type);
CheckRangeSearchSortResult(p_id, p_dist, res, real_num);
delete[] p_id;
delete[] p_dist;

View File

@ -31,7 +31,8 @@ GenSubSearchResult(const int64_t nq,
const int64_t round_decimal) {
constexpr int64_t limit = 1000000L;
bool is_ip = (metric_type == knowhere::metric::IP);
SubSearchResultUniq sub_result = std::make_unique<SubSearchResult>(nq, topk, metric_type, round_decimal);
SubSearchResultUniq sub_result =
std::make_unique<SubSearchResult>(nq, topk, metric_type, round_decimal);
std::vector<int64_t> ids;
std::vector<float> distances;
for (auto n = 0; n < nq; ++n) {
@ -41,11 +42,16 @@ GenSubSearchResult(const int64_t nq,
distances.push_back(gen_x);
}
if (is_ip) {
std::sort(ids.begin() + n * topk, ids.begin() + (n + 1) * topk, std::greater<int64_t>());
std::sort(distances.begin() + n * topk, distances.begin() + (n + 1) * topk, std::greater<float>());
std::sort(ids.begin() + n * topk,
ids.begin() + (n + 1) * topk,
std::greater<int64_t>());
std::sort(distances.begin() + n * topk,
distances.begin() + (n + 1) * topk,
std::greater<float>());
} else {
std::sort(ids.begin() + n * topk, ids.begin() + (n + 1) * topk);
std::sort(distances.begin() + n * topk, distances.begin() + (n + 1) * topk);
std::sort(distances.begin() + n * topk,
distances.begin() + (n + 1) * topk);
}
}
sub_result->mutable_distances() = std::move(distances);
@ -86,7 +92,8 @@ TestSubSearchResultMerge(const knowhere::MetricType& metric_type,
SubSearchResult final_result(nq, topk, metric_type, round_decimal);
for (int i = 0; i < iteration; ++i) {
SubSearchResultUniq sub_result = GenSubSearchResult(nq, topk, metric_type, round_decimal);
SubSearchResultUniq sub_result =
GenSubSearchResult(nq, topk, metric_type, round_decimal);
auto ids = sub_result->get_ids();
for (int n = 0; n < nq; ++n) {
for (int k = 0; k < topk; ++k) {
@ -103,8 +110,10 @@ TestSubSearchResultMerge(const knowhere::MetricType& metric_type,
}
TEST(Reduce, SubSearchResult) {
using queue_type_l2 = std::priority_queue<int64_t, std::vector<int64_t>, std::less<int64_t>>;
using queue_type_ip = std::priority_queue<int64_t, std::vector<int64_t>, std::greater<int64_t>>;
using queue_type_l2 =
std::priority_queue<int64_t, std::vector<int64_t>, std::less<int64_t>>;
using queue_type_ip = std::
priority_queue<int64_t, std::vector<int64_t>, std::greater<int64_t>>;
TestSubSearchResultMerge<queue_type_l2>(knowhere::metric::L2, 1, 1, 1);
TestSubSearchResultMerge<queue_type_l2>(knowhere::metric::L2, 1, 1, 10);

View File

@ -23,22 +23,36 @@ TEST(Relational, Basic) {
std::string s = "str4";
std::string another_s = "str5";
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, another_i64), i64 == another_i64);
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, another_i64), i64 != another_i64);
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, another_i64), i64 >= another_i64);
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, another_i64), i64 > another_i64);
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, another_i64), i64 <= another_i64);
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, another_i64), i64 < another_i64);
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, another_i64),
i64 == another_i64);
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, another_i64),
i64 != another_i64);
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, another_i64),
i64 >= another_i64);
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, another_i64),
i64 > another_i64);
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, another_i64),
i64 <= another_i64);
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, another_i64),
i64 < another_i64);
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(s, another_s), s == another_s);
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(s, another_s), s != another_s);
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(s, another_s), s >= another_s);
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(s, another_s), s > another_s);
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(s, another_s), s <= another_s);
ASSERT_EQ(Relational<decltype(std::less<>{})>()(s, another_s), s < another_s);
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(s, another_s),
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(s, another_s),
s == another_s);
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(s, another_s),
s != another_s);
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(s, another_s),
s >= another_s);
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(s, another_s),
s > another_s);
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(s, another_s),
s <= another_s);
ASSERT_EQ(Relational<decltype(std::less<>{})>()(s, another_s),
s < another_s);
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(
s, another_s),
milvus::PrefixMatch(s, another_s));
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(s, another_s),
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(
s, another_s),
milvus::PostfixMatch(s, another_s));
}
@ -49,10 +63,13 @@ TEST(Relational, DifferentFundamentalType) {
int64_t i64 = 4;
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, i32), i64 == i32);
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, i32), i64 != i32);
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, i32), i64 >= i32);
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, i32),
i64 != i32);
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, i32),
i64 >= i32);
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, i32), i64 > i32);
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, i32), i64 <= i32);
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, i32),
i64 <= i32);
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, i32), i64 < i32);
}
@ -68,8 +85,11 @@ TEST(Relational, DifferentInCompatibleType) {
ASSERT_ANY_THROW(Relational<decltype(std::greater<>{})>()(s, i64));
ASSERT_ANY_THROW(Relational<decltype(std::less_equal<>{})>()(s, i64));
ASSERT_ANY_THROW(Relational<decltype(std::less<>{})>()(s, i64));
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(s, i64));
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(s, i64));
ASSERT_ANY_THROW(
Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(s, i64));
ASSERT_ANY_THROW(
Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(s,
i64));
ASSERT_ANY_THROW(Relational<decltype(std::equal_to<>{})>()(i64, s));
ASSERT_ANY_THROW(Relational<decltype(std::not_equal_to<>{})>()(i64, s));
@ -77,6 +97,9 @@ TEST(Relational, DifferentInCompatibleType) {
ASSERT_ANY_THROW(Relational<decltype(std::greater<>{})>()(i64, s));
ASSERT_ANY_THROW(Relational<decltype(std::less_equal<>{})>()(i64, s));
ASSERT_ANY_THROW(Relational<decltype(std::less<>{})>()(i64, s));
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(i64, s));
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(i64, s));
ASSERT_ANY_THROW(
Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(i64, s));
ASSERT_ANY_THROW(
Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(i64,
s));
}

View File

@ -48,7 +48,8 @@ TEST(Retrieve, AutoID) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
auto fid_vec = schema->AddDebugField(
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
schema->set_primary_field_id(fid_64);
int64_t N = 100;
@ -65,7 +66,8 @@ TEST(Retrieve, AutoID) {
for (int i = 0; i < req_size; ++i) {
values.emplace_back(i64_col[choose(i)]);
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
fid_64, DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldId> target_fields_id{fid_64, fid_vec};
@ -98,7 +100,8 @@ TEST(Retrieve, AutoID2) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
auto fid_vec = schema->AddDebugField(
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
schema->set_primary_field_id(fid_64);
int64_t N = 100;
@ -115,7 +118,8 @@ TEST(Retrieve, AutoID2) {
for (int i = 0; i < req_size; ++i) {
values.emplace_back(i64_col[choose(i)]);
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
fid_64, DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldId> target_offsets{fid_64, fid_vec};
@ -143,7 +147,8 @@ TEST(Retrieve, NotExist) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
auto fid_vec = schema->AddDebugField(
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
schema->set_primary_field_id(fid_64);
int64_t N = 100;
@ -163,7 +168,8 @@ TEST(Retrieve, NotExist) {
values.emplace_back(choose2(i));
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
fid_64, DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldId> target_offsets{fid_64, fid_vec};
@ -191,7 +197,8 @@ TEST(Retrieve, Empty) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
auto fid_vec = schema->AddDebugField(
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
schema->set_primary_field_id(fid_64);
int64_t N = 100;
@ -205,7 +212,8 @@ TEST(Retrieve, Empty) {
for (int i = 0; i < req_size; ++i) {
values.emplace_back(choose(i));
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
fid_64, DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldId> target_offsets{fid_64, fid_vec};
@ -226,7 +234,8 @@ TEST(Retrieve, LargeTimestamp) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
auto fid_vec = schema->AddDebugField(
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
schema->set_primary_field_id(fid_64);
int64_t N = 100;
@ -244,7 +253,8 @@ TEST(Retrieve, LargeTimestamp) {
for (int i = 0; i < req_size; ++i) {
values.emplace_back(i64_col[choose(i)]);
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
fid_64, DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldId> target_offsets{fid_64, fid_vec};
@ -253,7 +263,8 @@ TEST(Retrieve, LargeTimestamp) {
std::vector<int> filter_timestamps{-1, 0, 1, 10, 20};
filter_timestamps.push_back(N / 2);
for (const auto& f_ts : filter_timestamps) {
auto retrieve_results = segment->Retrieve(plan.get(), ts_offset + 1 + f_ts);
auto retrieve_results =
segment->Retrieve(plan.get(), ts_offset + 1 + f_ts);
Assert(retrieve_results->fields_data_size() == 2);
int target_num = (f_ts + choose_sep) / choose_sep;
@ -263,10 +274,12 @@ TEST(Retrieve, LargeTimestamp) {
for (auto field_data : retrieve_results->fields_data()) {
if (DataType(field_data.type()) == DataType::INT64) {
Assert(field_data.scalars().long_data().data_size() == target_num);
Assert(field_data.scalars().long_data().data_size() ==
target_num);
}
if (DataType(field_data.type()) == DataType::VECTOR_FLOAT) {
Assert(field_data.vectors().float_vector().data_size() == target_num * DIM);
Assert(field_data.vectors().float_vector().data_size() ==
target_num * DIM);
}
}
}
@ -276,7 +289,8 @@ TEST(Retrieve, Delete) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
auto fid_vec = schema->AddDebugField(
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
schema->set_primary_field_id(fid_64);
auto fid_ts = schema->AddDebugField("Timestamp", DataType::INT64);
@ -300,7 +314,8 @@ TEST(Retrieve, Delete) {
for (int i = 0; i < req_size; ++i) {
values.emplace_back(i64_col[choose(i)]);
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
fid_64, DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldId> target_offsets{fid_ts, fid_64, fid_vec};
@ -357,7 +372,10 @@ TEST(Retrieve, Delete) {
std::vector<idx_t> new_timestamps{10, 10, 10, 10, 10, 10};
auto reserved_offset = segment->PreDelete(new_count);
ASSERT_EQ(reserved_offset, row_count);
segment->Delete(reserved_offset, new_count, ids.get(), reinterpret_cast<const Timestamp*>(new_timestamps.data()));
segment->Delete(reserved_offset,
new_count,
ids.get(),
reinterpret_cast<const Timestamp*>(new_timestamps.data()));
{
auto retrieve_results = segment->Retrieve(plan.get(), 100);

View File

@ -49,7 +49,9 @@ TYPED_TEST_P(TypedScalarIndexTest, Constructor) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
}
}
@ -61,8 +63,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Count) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
auto scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto arr = GenArr<T>(nb);
scalar_index->Build(nb, arr.data());
ASSERT_EQ(nb, scalar_index->Count());
@ -77,8 +82,11 @@ TYPED_TEST_P(TypedScalarIndexTest, In) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
auto scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto arr = GenArr<T>(nb);
scalar_index->Build(nb, arr.data());
assert_in<T>(scalar_index, arr);
@ -93,8 +101,11 @@ TYPED_TEST_P(TypedScalarIndexTest, NotIn) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
auto scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto arr = GenArr<T>(nb);
scalar_index->Build(nb, arr.data());
assert_not_in<T>(scalar_index, arr);
@ -109,8 +120,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Reverse) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
auto scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto arr = GenArr<T>(nb);
scalar_index->Build(nb, arr.data());
assert_reverse<T>(scalar_index, arr);
@ -125,8 +139,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Range) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
auto scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto arr = GenArr<T>(nb);
scalar_index->Build(nb, arr.data());
assert_range<T>(scalar_index, arr);
@ -141,16 +158,22 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) {
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = milvus::DataType(dtype);
create_index_info.index_type = index_type;
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
auto scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
auto arr = GenArr<T>(nb);
scalar_index->Build(nb, arr.data());
auto binary_set = index->Serialize(nullptr);
auto copy_index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
auto copy_index =
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
create_index_info);
copy_index->Load(binary_set);
auto copy_scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(copy_index.get());
auto copy_scalar_index =
dynamic_cast<milvus::index::ScalarIndex<T>*>(copy_index.get());
ASSERT_EQ(nb, copy_scalar_index->Count());
assert_in<T>(copy_scalar_index, arr);
assert_not_in<T>(copy_scalar_index, arr);
@ -159,8 +182,17 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) {
}
// TODO: it's easy to overflow for int8_t. Design more reasonable ut.
using ScalarT = ::testing::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
using ScalarT =
::testing::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest, Dummy, Constructor, Count, In, NotIn, Range, Codec, Reverse);
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest,
Dummy,
Constructor,
Count,
In,
NotIn,
Range,
Codec,
Reverse);
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexTest, ScalarT);

View File

@ -31,7 +31,9 @@ using milvus::indexbuilder::ScalarIndexCreatorPtr;
using ScalarTestParams = std::pair<MapParams, MapParams>;
namespace {
template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T> | std::is_same_v<T, std::string>>>
template <typename T,
typename = std::enable_if_t<std::is_arithmetic_v<T> |
std::is_same_v<T, std::string>>>
inline void
build_index(const ScalarIndexCreatorPtr& creator, const std::vector<T>& arr) {
const int64_t dim = 8; // not important here
@ -41,7 +43,8 @@ build_index(const ScalarIndexCreatorPtr& creator, const std::vector<T>& arr) {
template <>
inline void
build_index(const ScalarIndexCreatorPtr& creator, const std::vector<bool>& arr) {
build_index(const ScalarIndexCreatorPtr& creator,
const std::vector<bool>& arr) {
schemapb::BoolArray pbarr;
for (auto b : arr) {
pbarr.add_data(b);
@ -50,19 +53,20 @@ build_index(const ScalarIndexCreatorPtr& creator, const std::vector<bool>& arr)
creator->Build(ds);
delete[](char*) (ds->GetTensor());
delete[](char*)(ds->GetTensor());
}
template <>
inline void
build_index(const ScalarIndexCreatorPtr& creator, const std::vector<std::string>& arr) {
build_index(const ScalarIndexCreatorPtr& creator,
const std::vector<std::string>& arr) {
schemapb::StringArray pbarr;
*(pbarr.mutable_data()) = {arr.begin(), arr.end()};
auto ds = GenDsFromPB(pbarr);
creator->Build(ds);
delete[](char*) (ds->GetTensor());
delete[](char*)(ds->GetTensor());
}
} // namespace
@ -79,7 +83,8 @@ class TypedScalarIndexCreatorTest : public ::testing::Test {
// }
};
using ScalarT = ::testing::Types<bool, int8_t, int16_t, int32_t, int64_t, float, double, std::string>;
using ScalarT = ::testing::
Types<bool, int8_t, int16_t, int32_t, int64_t, float, double, std::string>;
TYPED_TEST_CASE_P(TypedScalarIndexCreatorTest);
@ -97,8 +102,10 @@ TYPED_TEST_P(TypedScalarIndexCreatorTest, Constructor) {
auto index_params = tp.second;
auto serialized_type_params = generate_type_params(type_params);
auto serialized_index_params = generate_index_params(index_params);
auto creator = milvus::indexbuilder::CreateScalarIndex(milvus::DataType(dtype), serialized_type_params.c_str(),
serialized_index_params.c_str());
auto creator = milvus::indexbuilder::CreateScalarIndex(
milvus::DataType(dtype),
serialized_type_params.c_str(),
serialized_index_params.c_str());
}
}
@ -110,17 +117,26 @@ TYPED_TEST_P(TypedScalarIndexCreatorTest, Codec) {
auto index_params = tp.second;
auto serialized_type_params = generate_type_params(type_params);
auto serialized_index_params = generate_index_params(index_params);
auto creator = milvus::indexbuilder::CreateScalarIndex(milvus::DataType(dtype), serialized_type_params.c_str(),
serialized_index_params.c_str());
auto creator = milvus::indexbuilder::CreateScalarIndex(
milvus::DataType(dtype),
serialized_type_params.c_str(),
serialized_index_params.c_str());
auto arr = GenArr<T>(nb);
build_index<T>(creator, arr);
auto binary_set = creator->Serialize();
auto copy_creator = milvus::indexbuilder::CreateScalarIndex(
milvus::DataType(dtype), serialized_type_params.c_str(), serialized_index_params.c_str());
milvus::DataType(dtype),
serialized_type_params.c_str(),
serialized_index_params.c_str());
copy_creator->Load(binary_set);
}
}
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexCreatorTest, Dummy, Constructor, Codec);
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexCreatorTest,
Dummy,
Constructor,
Codec);
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexCreatorTest, ScalarT);
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck,
TypedScalarIndexCreatorTest,
ScalarT);

View File

@ -30,7 +30,8 @@ TEST(Sealed, without_predicate) {
auto dim = 16;
auto topK = 5;
auto metric_type = knowhere::metric::L2;
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fake_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -64,12 +65,18 @@ TEST(Sealed, without_predicate) {
auto query_ptr = vec_col.data() + 4200 * dim;
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group_raw =
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 1000000;
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
@ -81,11 +88,13 @@ TEST(Sealed, without_predicate) {
create_index_info.metric_type = knowhere::metric::L2;
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, nullptr);
auto build_conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"}};
auto build_conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"}};
auto search_conf = knowhere::Json{{knowhere::indexparam::NPROBE, 10}};
@ -134,7 +143,8 @@ TEST(Sealed, with_predicate) {
auto dim = 16;
auto topK = 5;
auto metric_type = knowhere::metric::L2;
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fake_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
std::string dsl = R"({
@ -172,12 +182,18 @@ TEST(Sealed, with_predicate) {
auto query_ptr = vec_col.data() + 42000 * dim;
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group_raw =
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 10000000;
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
@ -187,11 +203,13 @@ TEST(Sealed, with_predicate) {
create_index_info.field_type = DataType::VECTOR_FLOAT;
create_index_info.metric_type = knowhere::metric::L2;
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, nullptr);
auto build_conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"}};
auto build_conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"}};
auto database = knowhere::GenDataSet(N, dim, vec_col.data());
indexing->BuildWithDataset(database, build_conf);
@ -203,7 +221,8 @@ TEST(Sealed, with_predicate) {
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
auto search_conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, {knowhere::indexparam::NPROBE, 10}};
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::indexparam::NPROBE, 10}};
milvus::SearchInfo searchInfo;
searchInfo.topk_ = topK;
searchInfo.metric_type_ = knowhere::metric::L2;
@ -237,7 +256,8 @@ TEST(Sealed, with_predicate_filter_all) {
auto topK = 5;
// auto metric_type = MetricType::METRIC_L2;
auto metric_type = knowhere::metric::L2;
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fake_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
std::string dsl = R"({
@ -275,8 +295,10 @@ TEST(Sealed, with_predicate_filter_all) {
auto query_ptr = vec_col.data() + 42000 * dim;
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group_raw =
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = 10000000;
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
@ -285,11 +307,13 @@ TEST(Sealed, with_predicate_filter_all) {
create_index_info.field_type = DataType::VECTOR_FLOAT;
create_index_info.metric_type = knowhere::metric::L2;
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
auto ivf_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto ivf_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, nullptr);
auto ivf_build_conf = knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"},
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
auto ivf_build_conf =
knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "100"},
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
auto database = knowhere::GenDataSet(N, dim, vec_col.data());
ivf_indexing->BuildWithDataset(database, ivf_build_conf);
@ -311,19 +335,22 @@ TEST(Sealed, with_predicate_filter_all) {
auto sr = ivf_sealed_segment->Search(plan.get(), ph_group.get(), time);
EXPECT_EQ(sr->get_total_result_count(), 0);
auto hnsw_conf = knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::HNSW_M, "16"},
{knowhere::indexparam::EFCONSTRUCTION, "200"},
{knowhere::indexparam::EF, "200"},
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
auto hnsw_conf =
knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::HNSW_M, "16"},
{knowhere::indexparam::EFCONSTRUCTION, "200"},
{knowhere::indexparam::EF, "200"},
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
create_index_info.field_type = DataType::VECTOR_FLOAT;
create_index_info.metric_type = knowhere::metric::L2;
create_index_info.index_type = knowhere::IndexEnum::INDEX_HNSW;
auto hnsw_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
auto hnsw_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, nullptr);
hnsw_indexing->BuildWithDataset(database, hnsw_conf);
auto hnsw_vec_index = dynamic_cast<index::VectorIndex*>(hnsw_indexing.get());
auto hnsw_vec_index =
dynamic_cast<index::VectorIndex*>(hnsw_indexing.get());
EXPECT_EQ(hnsw_vec_index->Count(), N);
EXPECT_EQ(hnsw_vec_index->GetDim(), dim);
@ -347,7 +374,8 @@ TEST(Sealed, LoadFieldData) {
auto N = ROW_COUNT;
auto metric_type = knowhere::metric::L2;
auto schema = std::make_shared<Schema>();
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fakevec_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
@ -396,7 +424,8 @@ TEST(Sealed, LoadFieldData) {
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
ASSERT_ANY_THROW(segment->Search(plan.get(), ph_group.get(), time));
@ -441,7 +470,8 @@ TEST(Sealed, LoadFieldDataMmap) {
auto N = ROW_COUNT;
auto metric_type = knowhere::metric::L2;
auto schema = std::make_shared<Schema>();
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fakevec_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
@ -490,7 +520,8 @@ TEST(Sealed, LoadFieldDataMmap) {
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
ASSERT_ANY_THROW(segment->Search(plan.get(), ph_group.get(), time));
@ -534,7 +565,8 @@ TEST(Sealed, LoadScalarIndex) {
auto N = ROW_COUNT;
auto metric_type = knowhere::metric::L2;
auto schema = std::make_shared<Schema>();
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fakevec_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
@ -579,19 +611,24 @@ TEST(Sealed, LoadScalarIndex) {
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
LoadFieldDataInfo row_id_info;
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
FieldMeta row_id_field_meta(
FieldName("RowID"), RowFieldID, DataType::INT64);
auto array = CreateScalarDataArrayFrom(
dataset.row_ids_.data(), N, row_id_field_meta);
row_id_info.field_data = array.get();
row_id_info.row_count = dataset.row_ids_.size();
row_id_info.field_id = RowFieldID.get(); // field id for RowId
segment->LoadFieldData(row_id_info);
LoadFieldDataInfo ts_info;
FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta);
FieldMeta ts_field_meta(
FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
array =
CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta);
ts_info.field_data = array.get();
ts_info.row_count = dataset.timestamps_.size();
ts_info.field_id = TimestampFieldID.get();
@ -609,7 +646,8 @@ TEST(Sealed, LoadScalarIndex) {
counter_index.field_type = DataType::INT64;
counter_index.index_params["index_type"] = "sort";
auto counter_data = dataset.get_col<int64_t>(counter_id);
counter_index.index = std::move(GenScalarIndexing<int64_t>(N, counter_data.data()));
counter_index.index =
std::move(GenScalarIndexing<int64_t>(N, counter_data.data()));
segment->LoadIndex(counter_index);
LoadIndexInfo double_index;
@ -617,7 +655,8 @@ TEST(Sealed, LoadScalarIndex) {
double_index.field_type = DataType::DOUBLE;
double_index.index_params["index_type"] = "sort";
auto double_data = dataset.get_col<double>(double_id);
double_index.index = std::move(GenScalarIndexing<double>(N, double_data.data()));
double_index.index =
std::move(GenScalarIndexing<double>(N, double_data.data()));
segment->LoadIndex(double_index);
LoadIndexInfo nothing_index;
@ -625,7 +664,8 @@ TEST(Sealed, LoadScalarIndex) {
nothing_index.field_type = DataType::INT32;
nothing_index.index_params["index_type"] = "sort";
auto nothing_data = dataset.get_col<int32_t>(nothing_id);
nothing_index.index = std::move(GenScalarIndexing<int32_t>(N, nothing_data.data()));
nothing_index.index =
std::move(GenScalarIndexing<int32_t>(N, nothing_data.data()));
segment->LoadIndex(nothing_index);
auto sr = segment->Search(plan.get(), ph_group.get(), time);
@ -639,7 +679,8 @@ TEST(Sealed, Delete) {
auto N = 10;
auto metric_type = knowhere::metric::L2;
auto schema = std::make_shared<Schema>();
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fakevec_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
@ -682,7 +723,8 @@ TEST(Sealed, Delete) {
auto plan = CreatePlan(*schema, dsl);
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
ASSERT_ANY_THROW(segment->Search(plan.get(), ph_group.get(), time));
@ -705,11 +747,14 @@ TEST(Sealed, Delete) {
int64_t new_count = 3;
std::vector<idx_t> new_pks{6, 7, 8};
auto new_ids = std::make_unique<IdArray>();
new_ids->mutable_int_id()->mutable_data()->Add(new_pks.begin(), new_pks.end());
new_ids->mutable_int_id()->mutable_data()->Add(new_pks.begin(),
new_pks.end());
std::vector<idx_t> new_timestamps{10, 10, 10};
auto reserved_offset = segment->PreDelete(new_count);
ASSERT_EQ(reserved_offset, row_count);
segment->Delete(reserved_offset, new_count, new_ids.get(),
segment->Delete(reserved_offset,
new_count,
new_ids.get(),
reinterpret_cast<const Timestamp*>(new_timestamps.data()));
}
@ -730,7 +775,8 @@ GenRandomFloatVecs(int N, int dim) {
srand(time(NULL));
for (int i = 0; i < N; i++) {
for (int j = 0; j < dim; j++) {
vecs.push_back(static_cast<float>(rand()) / static_cast<float>(RAND_MAX));
vecs.push_back(static_cast<float>(rand()) /
static_cast<float>(RAND_MAX));
}
}
return vecs;
@ -750,7 +796,8 @@ GenQueryVecs(int N, int dim) {
auto
transfer_to_fields_data(const std::vector<float>& vecs) {
auto arr = std::make_unique<DataArray>();
*(arr->mutable_vectors()->mutable_float_vector()->mutable_data()) = {vecs.begin(), vecs.end()};
*(arr->mutable_vectors()->mutable_float_vector()->mutable_data()) = {
vecs.begin(), vecs.end()};
return arr;
}
@ -758,7 +805,8 @@ TEST(Sealed, BF) {
auto schema = std::make_shared<Schema>();
auto dim = 128;
auto metric_type = "L2";
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fake_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -788,13 +836,16 @@ TEST(Sealed, BF) {
output_field_ids: 101)") %
topK;
auto serialized_expr_plan = fmt.str();
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan.data());
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
auto binary_plan =
translate_text_plan_to_binary_plan(serialized_expr_plan.data());
auto plan =
CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
auto num_queries = 10;
auto query = GenQueryVecs(num_queries, dim);
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, query);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto result = segment->Search(plan.get(), ph_group.get(), MAX_TIMESTAMP);
auto ves = SearchResultToVector(*result);
@ -811,7 +862,8 @@ TEST(Sealed, BF_Overflow) {
auto schema = std::make_shared<Schema>();
auto dim = 128;
auto metric_type = "L2";
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto fake_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -839,13 +891,16 @@ TEST(Sealed, BF_Overflow) {
output_field_ids: 101)") %
topK;
auto serialized_expr_plan = fmt.str();
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan.data());
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
auto binary_plan =
translate_text_plan_to_binary_plan(serialized_expr_plan.data());
auto plan =
CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
auto num_queries = 10;
auto query = GenQueryVecs(num_queries, dim);
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, query);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto result = segment->Search(plan.get(), ph_group.get(), MAX_TIMESTAMP);
auto ves = SearchResultToVector(*result);
@ -894,7 +949,8 @@ TEST(Sealed, RealCount) {
ASSERT_EQ(del_offset1, 0);
auto del_ids1 = GenPKs(pks.begin(), pks.begin() + half);
auto del_tss1 = GenTss(half, c);
auto status = segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
auto status =
segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
ASSERT_TRUE(status.ok());
ASSERT_EQ(c - half, segment->get_real_count());
@ -902,7 +958,8 @@ TEST(Sealed, RealCount) {
auto del_offset2 = segment->PreDelete(half);
ASSERT_EQ(del_offset2, half);
auto del_tss2 = GenTss(half, c + half);
status = segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
status =
segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
ASSERT_TRUE(status.ok());
ASSERT_EQ(c - half, segment->get_real_count());

View File

@ -37,9 +37,13 @@ generate_data(int N) {
for (auto& x : vec) {
x = distribution(er);
}
raw_data.insert(raw_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec));
raw_data.insert(raw_data.end(),
(const char*)std::begin(vec),
(const char*)std::end(vec));
int age = ei() % 100;
raw_data.insert(raw_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age));
raw_data.insert(raw_data.end(),
(const char*)&age,
((const char*)&age) + sizeof(age));
}
return std::make_tuple(raw_data, timestamps, uids);
}
@ -48,7 +52,8 @@ generate_data(int N) {
TEST(SegmentCoreTest, NormalDistributionTest) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::INT32);
int N = 100 * 1000;
auto [raw_data, timestamps, uids] = generate_data(N);
@ -63,7 +68,8 @@ TEST(SegmentCoreTest, MockTest2) {
// schema
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -71,29 +77,37 @@ TEST(SegmentCoreTest, MockTest2) {
auto dataset = DataGen(schema, N);
auto segment = CreateGrowingSegment(schema);
auto reserved_begin = segment->PreInsert(N);
segment->Insert(reserved_begin, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(reserved_begin,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
}
TEST(SegmentCoreTest, SmallIndex) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("age", DataType::INT32);
}
TEST(InsertRecordTest, growing_int64_t) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
auto record = milvus::segcore::InsertRecord<false>(*schema, int64_t(32));
const int N = 100000;
for (int i = 1; i <= N; i++) record.insert_pk(PkType(int64_t(i)), int64_t(i));
for (int i = 1; i <= N; i++)
record.insert_pk(PkType(int64_t(i)), int64_t(i));
for (int i = 1; i <= N; i++) {
std::vector<SegOffset> offset = record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
std::vector<SegOffset> offset =
record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
ASSERT_EQ(offset[0].get(), int64_t(i));
}
}
@ -101,16 +115,19 @@ TEST(InsertRecordTest, growing_int64_t) {
TEST(InsertRecordTest, growing_string) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR);
schema->set_primary_field_id(i64_fid);
auto record = milvus::segcore::InsertRecord<false>(*schema, int64_t(32));
const int N = 100000;
for (int i = 1; i <= N; i++) record.insert_pk(PkType(std::to_string(i)), int64_t(i));
for (int i = 1; i <= N; i++)
record.insert_pk(PkType(std::to_string(i)), int64_t(i));
for (int i = 1; i <= N; i++) {
std::vector<SegOffset> offset = record.search_pk(std::to_string(i), int64_t(N + 1));
std::vector<SegOffset> offset =
record.search_pk(std::to_string(i), int64_t(N + 1));
ASSERT_EQ(offset[0].get(), int64_t(i));
}
}
@ -118,17 +135,20 @@ TEST(InsertRecordTest, growing_string) {
TEST(InsertRecordTest, sealed_int64_t) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
auto record = milvus::segcore::InsertRecord<true>(*schema, int64_t(32));
const int N = 100000;
for (int i = N; i >= 1; i--) record.insert_pk(PkType(int64_t(i)), int64_t(i));
for (int i = N; i >= 1; i--)
record.insert_pk(PkType(int64_t(i)), int64_t(i));
record.seal_pks();
for (int i = 1; i <= N; i++) {
std::vector<SegOffset> offset = record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
std::vector<SegOffset> offset =
record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
ASSERT_EQ(offset[0].get(), int64_t(i));
}
}
@ -136,18 +156,21 @@ TEST(InsertRecordTest, sealed_int64_t) {
TEST(InsertRecordTest, sealed_string) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR);
schema->set_primary_field_id(i64_fid);
auto record = milvus::segcore::InsertRecord<true>(*schema, int64_t(32));
const int N = 100000;
for (int i = 1; i <= N; i++) record.insert_pk(PkType(std::to_string(i)), int64_t(i));
for (int i = 1; i <= N; i++)
record.insert_pk(PkType(std::to_string(i)), int64_t(i));
record.seal_pks();
for (int i = 1; i <= N; i++) {
std::vector<SegOffset> offset = record.search_pk(std::to_string(i), int64_t(N + 1));
std::vector<SegOffset> offset =
record.search_pk(std::to_string(i), int64_t(N + 1));
ASSERT_EQ(offset[0].get(), int64_t(i));
}
}

View File

@ -23,9 +23,11 @@ TEST(Span, Naive) {
int64_t N = ROW_COUNT;
constexpr int64_t size_per_chunk = 32 * 1024;
auto schema = std::make_shared<Schema>();
auto bin_vec_fid = schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
auto bin_vec_fid = schema->AddDebugField(
"binaryvec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
auto float_vec_fid = schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, knowhere::metric::L2);
auto float_vec_fid = schema->AddDebugField(
"floatvec", DataType::VECTOR_FLOAT, 32, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
@ -33,7 +35,11 @@ TEST(Span, Naive) {
auto seg_conf = SegcoreConfig::default_config();
auto segment = CreateGrowingSegment(schema, -1, seg_conf);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto vec_ptr = dataset.get_col<uint8_t>(bin_vec_fid);
auto age_ptr = dataset.get_col<float>(float_fid);
auto float_ptr = dataset.get_col<float>(float_vec_fid);
@ -43,9 +49,11 @@ TEST(Span, Naive) {
auto row_count = interface.get_row_count();
ASSERT_EQ(N, row_count);
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
auto vec_span = interface.chunk_data<milvus::BinaryVector>(bin_vec_fid, chunk_id);
auto vec_span =
interface.chunk_data<milvus::BinaryVector>(bin_vec_fid, chunk_id);
auto age_span = interface.chunk_data<float>(float_fid, chunk_id);
auto float_span = interface.chunk_data<milvus::FloatVector>(float_vec_fid, chunk_id);
auto float_span =
interface.chunk_data<milvus::FloatVector>(float_vec_fid, chunk_id);
auto begin = chunk_id * size_per_chunk;
auto end = std::min((chunk_id + 1) * size_per_chunk, N);
auto size_of_chunk = end - begin;

View File

@ -46,7 +46,10 @@ GenGenericValue(T value) {
}
auto
GenColumnInfo(int64_t field_id, proto::schema::DataType field_type, bool auto_id, bool is_pk) {
GenColumnInfo(int64_t field_id,
proto::schema::DataType field_type,
bool auto_id,
bool is_pk) {
auto column_info = new proto::plan::ColumnInfo();
column_info->set_field_id(field_id);
column_info->set_data_type(field_type);
@ -56,7 +59,10 @@ GenColumnInfo(int64_t field_id, proto::schema::DataType field_type, bool auto_id
}
auto
GenQueryInfo(int64_t topk, std::string metric_type, std::string search_params, int64_t round_decimal = -1) {
GenQueryInfo(int64_t topk,
std::string metric_type,
std::string search_params,
int64_t round_decimal = -1) {
auto query_info = new proto::plan::QueryInfo();
query_info->set_topk(topk);
query_info->set_metric_type(metric_type);
@ -66,7 +72,10 @@ GenQueryInfo(int64_t topk, std::string metric_type, std::string search_params, i
}
auto
GenAnns(proto::plan::Expr* predicate, bool is_binary, int64_t field_id, std::string placeholder_tag = "$0") {
GenAnns(proto::plan::Expr* predicate,
bool is_binary,
int64_t field_id,
std::string placeholder_tag = "$0") {
auto query_info = GenQueryInfo(10, "L2", "{\"nprobe\": 10}", -1);
auto anns = new proto::plan::VectorANNS();
anns->set_is_binary(is_binary);
@ -146,23 +155,32 @@ GenPlanNode() {
}
void
SetTargetEntry(std::unique_ptr<proto::plan::PlanNode>& plan_node, const std::vector<int64_t>& output_fields) {
SetTargetEntry(std::unique_ptr<proto::plan::PlanNode>& plan_node,
const std::vector<int64_t>& output_fields) {
for (auto id : output_fields) {
plan_node->add_output_field_ids(id);
}
}
auto
GenTermPlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta, const std::vector<std::string>& strs)
GenTermPlan(const FieldMeta& fvec_meta,
const FieldMeta& str_meta,
const std::vector<std::string>& strs)
-> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto column_info = GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto term_expr = GenTermExpr<std::string>(strs);
term_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
expr->set_allocated_term_expr(term_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto anns = GenAnns(expr,
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(),
"$0");
auto plan_node = GenPlanNode();
plan_node->set_allocated_vector_anns(anns);
@ -171,8 +189,12 @@ GenTermPlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta, const std::ve
auto
GenAlwaysFalseExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto term_expr = GenTermExpr<std::string>({}); // in empty set, always false.
auto column_info = GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto term_expr =
GenTermExpr<std::string>({}); // in empty set, always false.
term_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
@ -193,8 +215,10 @@ GenAlwaysTrueExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto
GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
auto anns = GenAnns(always_false_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(), "$0");
auto anns = GenAnns(always_false_expr,
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(),
"$0");
auto plan_node = GenPlanNode();
plan_node->set_allocated_vector_anns(anns);
@ -204,8 +228,10 @@ GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto always_true_expr = GenAlwaysTrueExpr(fvec_meta, str_meta);
auto anns =
GenAnns(always_true_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto anns = GenAnns(always_true_expr,
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(),
"$0");
auto plan_node = GenPlanNode();
plan_node->set_allocated_vector_anns(anns);
@ -217,7 +243,8 @@ GenTestSchema() {
auto schema = std::make_shared<Schema>();
schema->AddDebugField("str", DataType::VARCHAR);
schema->AddDebugField("another_str", DataType::VARCHAR);
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto pk = schema->AddDebugField("int64", DataType::INT64);
schema->set_primary_field_id(pk);
return schema;
@ -228,7 +255,8 @@ GenStrPKSchema() {
auto schema = std::make_shared<Schema>();
auto pk = schema->AddDebugField("str", DataType::VARCHAR);
schema->AddDebugField("another_str", DataType::VARCHAR);
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField(
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
schema->AddDebugField("int64", DataType::INT64);
schema->set_primary_field_id(pk);
return schema;
@ -252,7 +280,11 @@ TEST(StringExpr, Term) {
}();
std::map<int, std::vector<std::string>> terms = {
{0, {"2000", "3000"}}, {1, {"2000"}}, {2, {"3000"}}, {3, {}}, {4, {vec_2k_3k}},
{0, {"2000", "3000"}},
{1, {"2000"}},
{2, {"3000"}},
{3, {}},
{4, {vec_2k_3k}},
};
auto seg = CreateGrowingSegment(schema);
@ -266,11 +298,16 @@ TEST(StringExpr, Term) {
auto end = new_str_col->scalars().string_data().data().end();
str_col.insert(str_col.end(), begin, end);
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [_, term] : terms) {
auto plan_proto = GenTermPlan(fvec_meta, str_meta, term);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
@ -296,11 +333,18 @@ TEST(StringExpr, Compare) {
const auto& str_meta = schema->operator[](FieldName("str"));
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
auto gen_compare_plan = [&, fvec_meta, str_meta,
another_str_meta](proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
auto str_col_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto gen_compare_plan =
[&, fvec_meta, str_meta, another_str_meta](
proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
auto str_col_info = GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto another_str_col_info =
GenColumnInfo(another_str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
GenColumnInfo(another_str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto compare_expr = GenCompareExpr(op);
compare_expr->set_allocated_left_column_info(str_col_info);
@ -309,22 +353,37 @@ TEST(StringExpr, Compare) {
auto expr = GenExpr().release();
expr->set_allocated_compare_expr(compare_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto anns =
GenAnns(expr,
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(),
"$0");
auto plan_node = std::make_unique<proto::plan::PlanNode>();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
};
std::vector<std::tuple<proto::plan::OpType, std::function<bool(std::string, std::string)>>> testcases{
{proto::plan::OpType::GreaterThan, [](std::string v1, std::string v2) { return v1 > v2; }},
{proto::plan::OpType::GreaterEqual, [](std::string v1, std::string v2) { return v1 >= v2; }},
{proto::plan::OpType::LessThan, [](std::string v1, std::string v2) { return v1 < v2; }},
{proto::plan::OpType::LessEqual, [](std::string v1, std::string v2) { return v1 <= v2; }},
{proto::plan::OpType::Equal, [](std::string v1, std::string v2) { return v1 == v2; }},
{proto::plan::OpType::NotEqual, [](std::string v1, std::string v2) { return v1 != v2; }},
{proto::plan::OpType::PrefixMatch, [](std::string v1, std::string v2) { return PrefixMatch(v1, v2); }},
};
std::vector<std::tuple<proto::plan::OpType,
std::function<bool(std::string, std::string)>>>
testcases{
{proto::plan::OpType::GreaterThan,
[](std::string v1, std::string v2) { return v1 > v2; }},
{proto::plan::OpType::GreaterEqual,
[](std::string v1, std::string v2) { return v1 >= v2; }},
{proto::plan::OpType::LessThan,
[](std::string v1, std::string v2) { return v1 < v2; }},
{proto::plan::OpType::LessEqual,
[](std::string v1, std::string v2) { return v1 <= v2; }},
{proto::plan::OpType::Equal,
[](std::string v1, std::string v2) { return v1 == v2; }},
{proto::plan::OpType::NotEqual,
[](std::string v1, std::string v2) { return v1 != v2; }},
{proto::plan::OpType::PrefixMatch,
[](std::string v1, std::string v2) {
return PrefixMatch(v1, v2);
}},
};
auto seg = CreateGrowingSegment(schema);
int N = 1000;
@ -334,7 +393,8 @@ TEST(StringExpr, Compare) {
for (int iter = 0; iter < num_iters; ++iter) {
auto raw_data = DataGen(schema, N, iter);
auto reserve_col = [&, raw_data](const FieldMeta& field_meta, std::vector<std::string>& str_col) {
auto reserve_col = [&, raw_data](const FieldMeta& field_meta,
std::vector<std::string>& str_col) {
auto new_str_col = raw_data.get_col(field_meta.get_id());
auto begin = new_str_col->scalars().string_data().data().begin();
auto end = new_str_col->scalars().string_data().data().end();
@ -346,12 +406,17 @@ TEST(StringExpr, Compare) {
{
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [op, ref_func] : testcases) {
auto plan_proto = gen_compare_plan(op);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
@ -377,29 +442,51 @@ TEST(StringExpr, UnaryRange) {
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
auto gen_unary_range_plan = [&, fvec_meta, str_meta](proto::plan::OpType op,
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto gen_unary_range_plan =
[&, fvec_meta, str_meta](
proto::plan::OpType op,
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto unary_range_expr = GenUnaryRangeExpr(op, value);
unary_range_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
expr->set_allocated_unary_range_expr(unary_range_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto anns =
GenAnns(expr,
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(),
"$0");
auto plan_node = std::make_unique<proto::plan::PlanNode>();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
};
std::vector<std::tuple<proto::plan::OpType, std::string, std::function<bool(std::string)>>> testcases{
{proto::plan::OpType::GreaterThan, "2000", [](std::string val) { return val > "2000"; }},
{proto::plan::OpType::GreaterEqual, "2000", [](std::string val) { return val >= "2000"; }},
{proto::plan::OpType::LessThan, "3000", [](std::string val) { return val < "3000"; }},
{proto::plan::OpType::LessEqual, "3000", [](std::string val) { return val <= "3000"; }},
{proto::plan::OpType::PrefixMatch, "a", [](std::string val) { return PrefixMatch(val, "a"); }},
};
std::vector<std::tuple<proto::plan::OpType,
std::string,
std::function<bool(std::string)>>>
testcases{
{proto::plan::OpType::GreaterThan,
"2000",
[](std::string val) { return val > "2000"; }},
{proto::plan::OpType::GreaterEqual,
"2000",
[](std::string val) { return val >= "2000"; }},
{proto::plan::OpType::LessThan,
"3000",
[](std::string val) { return val < "3000"; }},
{proto::plan::OpType::LessEqual,
"3000",
[](std::string val) { return val <= "3000"; }},
{proto::plan::OpType::PrefixMatch,
"a",
[](std::string val) { return PrefixMatch(val, "a"); }},
};
auto seg = CreateGrowingSegment(schema);
int N = 1000;
@ -412,11 +499,16 @@ TEST(StringExpr, UnaryRange) {
auto end = new_str_col->scalars().string_data().data().end();
str_col.insert(str_col.end(), begin, end);
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [op, value, ref_func] : testcases) {
auto plan_proto = gen_unary_range_plan(op, value);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
@ -428,7 +520,8 @@ TEST(StringExpr, UnaryRange) {
auto val = str_col[i];
auto ref = ref_func(val);
ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val;
ASSERT_EQ(ans, ref)
<< "@" << op << "@" << value << "@" << i << "!!" << val;
}
}
}
@ -441,16 +534,28 @@ TEST(StringExpr, BinaryRange) {
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
auto gen_binary_range_plan = [&, fvec_meta, str_meta](bool lb_inclusive, bool ub_inclusive, std::string lb,
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto binary_range_expr = GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
auto gen_binary_range_plan =
[&, fvec_meta, str_meta](
bool lb_inclusive,
bool ub_inclusive,
std::string lb,
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(),
proto::schema::DataType::VarChar,
false,
false);
auto binary_range_expr =
GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
binary_range_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
expr->set_allocated_binary_range_expr(binary_range_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto anns =
GenAnns(expr,
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(),
"$0");
auto plan_node = std::make_unique<proto::plan::PlanNode>();
plan_node->set_allocated_vector_anns(anns);
@ -458,13 +563,34 @@ TEST(StringExpr, BinaryRange) {
};
// bool lb_inclusive, bool ub_inclusive, std::string lb, std::string ub
std::vector<std::tuple<bool, bool, std::string, std::string, std::function<bool(std::string)>>> testcases{
{false, false, "2000", "3000", [](std::string val) { return val > "2000" && val < "3000"; }},
{false, true, "2000", "3000", [](std::string val) { return val > "2000" && val <= "3000"; }},
{true, false, "2000", "3000", [](std::string val) { return val >= "2000" && val < "3000"; }},
{true, true, "2000", "3000", [](std::string val) { return val >= "2000" && val <= "3000"; }},
{true, true, "2000", "1000", [](std::string val) { return false; }},
};
std::vector<std::tuple<bool,
bool,
std::string,
std::string,
std::function<bool(std::string)>>>
testcases{
{false,
false,
"2000",
"3000",
[](std::string val) { return val > "2000" && val < "3000"; }},
{false,
true,
"2000",
"3000",
[](std::string val) { return val > "2000" && val <= "3000"; }},
{true,
false,
"2000",
"3000",
[](std::string val) { return val >= "2000" && val < "3000"; }},
{true,
true,
"2000",
"3000",
[](std::string val) { return val >= "2000" && val <= "3000"; }},
{true, true, "2000", "1000", [](std::string val) { return false; }},
};
auto seg = CreateGrowingSegment(schema);
int N = 1000;
@ -477,13 +603,20 @@ TEST(StringExpr, BinaryRange) {
auto end = new_str_col->scalars().string_data().data().end();
str_col.insert(str_col.end(), begin, end);
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] : testcases) {
auto plan_proto = gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
ExecExprVisitor visitor(
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] :
testcases) {
auto plan_proto =
gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N * num_iters);
@ -493,8 +626,9 @@ TEST(StringExpr, BinaryRange) {
auto val = str_col[i];
auto ref = ref_func(val);
ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i
<< "!!" << val;
ASSERT_EQ(ans, ref)
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
<< "@" << ub << "@" << i << "!!" << val;
}
}
}
@ -512,20 +646,27 @@ TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
auto round_decimal = -1;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
auto str_col =
dataset.get_col(str_meta.get_id())->scalars().string_data().data();
auto query_ptr = vec_col.data();
auto segment = CreateGrowingSegment(schema);
segment->disable_small_index(); // brute-force search.
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto plan_proto = GenAlwaysTruePlan(fvec_meta, str_meta);
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
auto num_queries = 5;
auto topk = 10;
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto ph_group_raw =
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = MAX_TIMESTAMP;
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
@ -538,19 +679,25 @@ TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
dim, //
query_ptr //
};
auto sub_result = BruteForceSearch(search_dataset, vec_col.data(), N, knowhere::Json(), nullptr);
auto sub_result = BruteForceSearch(
search_dataset, vec_col.data(), N, knowhere::Json(), nullptr);
auto sr = segment->Search(plan.get(), ph_group.get(), time);
segment->FillPrimaryKeys(plan.get(), *sr);
segment->FillTargetEntry(plan.get(), *sr);
ASSERT_EQ(sr->pk_type_, DataType::VARCHAR);
ASSERT_TRUE(sr->output_fields_data_.find(str_meta.get_id()) != sr->output_fields_data_.end());
auto retrieved_str_col = sr->output_fields_data_[str_meta.get_id()]->scalars().string_data().data();
ASSERT_TRUE(sr->output_fields_data_.find(str_meta.get_id()) !=
sr->output_fields_data_.end());
auto retrieved_str_col = sr->output_fields_data_[str_meta.get_id()]
->scalars()
.string_data()
.data();
for (auto q = 0; q < num_queries; q++) {
for (auto k = 0; k < topk; k++) {
auto offset = q * topk + k;
auto seg_offset = sub_result.get_seg_offsets()[offset];
ASSERT_EQ(std::get<std::string>(sr->primary_keys_[offset]), str_col[seg_offset]);
ASSERT_EQ(std::get<std::string>(sr->primary_keys_[offset]),
str_col[seg_offset]);
ASSERT_EQ(retrieved_str_col[offset], str_col[seg_offset]);
}
}
@ -567,11 +714,16 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
auto N = 100000;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
auto str_col =
dataset.get_col(str_meta.get_id())->scalars().string_data().data();
auto segment = CreateGrowingSegment(schema);
segment->disable_small_index(); // brute-force search.
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
segment->Insert(0,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
dataset.raw_);
auto expr_proto = GenAlwaysTrueExpr(fvec_meta, str_meta);
auto plan_proto = GenPlanNode();
@ -585,5 +737,6 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
ASSERT_EQ(retrieved->ids().str_id().data().size(), N);
ASSERT_EQ(retrieved->offset().size(), N);
ASSERT_EQ(retrieved->fields_data().size(), 1);
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(), N);
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(),
N);
}

View File

@ -111,7 +111,8 @@ TEST_F(StringIndexMarisaTest, Range) {
TEST_F(StringIndexMarisaTest, Reverse) {
auto index_types = GetIndexTypes<std::string>();
for (const auto& index_type : index_types) {
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex<std::string>(index_type);
auto index = milvus::index::IndexFactory::GetInstance()
.CreateScalarIndex<std::string>(index_type);
index->Build(nb, strs.data());
assert_reverse<std::string>(index.get(), strs);
}
@ -135,21 +136,24 @@ TEST_F(StringIndexMarisaTest, Query) {
{
auto ds = knowhere::GenDataSet(strs.size(), 8, strs.data());
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::In);
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
milvus::OpType::In);
auto bitset = index->Query(ds);
ASSERT_TRUE(bitset->any());
}
{
auto ds = knowhere::GenDataSet(strs.size(), 8, strs.data());
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::NotIn);
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
milvus::OpType::NotIn);
auto bitset = index->Query(ds);
ASSERT_TRUE(bitset->none());
}
{
auto ds = std::make_shared<knowhere::DataSet>();
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::GreaterEqual);
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
milvus::OpType::GreaterEqual);
ds->Set<std::string>(milvus::index::RANGE_VALUE, "0");
auto bitset = index->Query(ds);
ASSERT_EQ(bitset->size(), strs.size());
@ -158,7 +162,8 @@ TEST_F(StringIndexMarisaTest, Query) {
{
auto ds = std::make_shared<knowhere::DataSet>();
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::Range);
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
milvus::OpType::Range);
ds->Set<std::string>(milvus::index::LOWER_BOUND_VALUE, "0");
ds->Set<std::string>(milvus::index::UPPER_BOUND_VALUE, "range");
ds->Set<bool>(milvus::index::LOWER_BOUND_INCLUSIVE, true);
@ -170,8 +175,10 @@ TEST_F(StringIndexMarisaTest, Query) {
{
for (size_t i = 0; i < strs.size(); i++) {
auto ds = std::make_shared<knowhere::DataSet>();
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::PrefixMatch);
ds->Set<std::string>(milvus::index::PREFIX_VALUE, std::move(strs[i]));
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
milvus::OpType::PrefixMatch);
ds->Set<std::string>(milvus::index::PREFIX_VALUE,
std::move(strs[i]));
auto bitset = index->Query(ds);
ASSERT_EQ(bitset->size(), strs.size());
ASSERT_TRUE(bitset->test(i));
@ -255,7 +262,8 @@ TEST_F(StringIndexMarisaTest, Codec) {
}
TEST_F(StringIndexMarisaTest, BaseIndexCodec) {
milvus::index::IndexBasePtr index = milvus::index::CreateStringIndexMarisa();
milvus::index::IndexBasePtr index =
milvus::index::CreateStringIndexMarisa();
std::vector<std::string> strings(nb);
for (int i = 0; i < nb; ++i) {
strings[i] = std::to_string(std::rand() % 10);

View File

@ -20,14 +20,22 @@ using namespace milvus::segcore;
TEST(TimestampIndex, Naive) {
SUCCEED();
std::vector<Timestamp> timestamps{
1, 2, 14, 11, 13, 22, 21, 20,
1,
2,
14,
11,
13,
22,
21,
20,
};
std::vector<int64_t> lengths = {2, 3, 3};
TimestampIndex index;
index.set_length_meta(lengths);
index.build_with(timestamps.data(), timestamps.size());
auto guessed_slice = GenerateFakeSlices(timestamps.data(), timestamps.size(), 2);
auto guessed_slice =
GenerateFakeSlices(timestamps.data(), timestamps.size(), 2);
ASSERT_EQ(guessed_slice.size(), lengths.size());
for (auto i = 0; i < lengths.size(); ++i) {
ASSERT_EQ(guessed_slice[i], lengths[i]);

View File

@ -21,12 +21,16 @@ TEST(Util, StringMatch) {
using namespace milvus::query;
ASSERT_ANY_THROW(Match(1, 2, OpType::PrefixMatch));
ASSERT_ANY_THROW(Match(std::string("not_match_operation"), std::string("not_match"), OpType::LessEqual));
ASSERT_ANY_THROW(Match(std::string("not_match_operation"),
std::string("not_match"),
OpType::LessEqual));
ASSERT_TRUE(PrefixMatch("prefix1", "prefix"));
ASSERT_TRUE(PostfixMatch("1postfix", "postfix"));
ASSERT_TRUE(Match(std::string("prefix1"), std::string("prefix"), OpType::PrefixMatch));
ASSERT_TRUE(Match(std::string("1postfix"), std::string("postfix"), OpType::PostfixMatch));
ASSERT_TRUE(Match(
std::string("prefix1"), std::string("prefix"), OpType::PrefixMatch));
ASSERT_TRUE(Match(
std::string("1postfix"), std::string("postfix"), OpType::PostfixMatch));
ASSERT_FALSE(PrefixMatch("", "longer"));
ASSERT_FALSE(PostfixMatch("", "longer"));
@ -41,7 +45,8 @@ TEST(Util, GetDeleteBitmap) {
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
schema->set_primary_field_id(i64_fid);
auto N = 10;
@ -74,7 +79,11 @@ TEST(Util, GetDeleteBitmap) {
auto query_timestamp = tss[N - 1];
auto del_barrier = get_barrier(delete_record, query_timestamp);
auto insert_barrier = get_barrier(insert_record, query_timestamp);
auto res_bitmap = get_deleted_bitmap(del_barrier, insert_barrier, delete_record, insert_record, query_timestamp);
auto res_bitmap = get_deleted_bitmap(del_barrier,
insert_barrier,
delete_record,
insert_record,
query_timestamp);
ASSERT_EQ(res_bitmap->bitmap_ptr->count(), 0);
// test case insert repeated pk1 (ts = {1 ... N}) -> delete pk1 (ts = N) -> query (ts = N)
@ -86,12 +95,17 @@ TEST(Util, GetDeleteBitmap) {
delete_record.ack_responder_.AddSegment(offset, offset + 1);
del_barrier = get_barrier(delete_record, query_timestamp);
res_bitmap = get_deleted_bitmap(del_barrier, insert_barrier, delete_record, insert_record, query_timestamp);
res_bitmap = get_deleted_bitmap(del_barrier,
insert_barrier,
delete_record,
insert_record,
query_timestamp);
ASSERT_EQ(res_bitmap->bitmap_ptr->count(), N - 1);
// test case insert repeated pk1 (ts = {1 ... N}) -> delete pk1 (ts = N) -> query (ts = N/2)
query_timestamp = tss[N - 1] / 2;
del_barrier = get_barrier(delete_record, query_timestamp);
res_bitmap = get_deleted_bitmap(del_barrier, N, delete_record, insert_record, query_timestamp);
res_bitmap = get_deleted_bitmap(
del_barrier, N, delete_record, insert_record, query_timestamp);
ASSERT_EQ(res_bitmap->bitmap_ptr->count(), 0);
}

View File

@ -34,7 +34,8 @@ compare_double(double x, double y, double epsilon = 0.000001f) {
}
inline void
assert_order(const milvus::SearchResult& result, const knowhere::MetricType& metric_type) {
assert_order(const milvus::SearchResult& result,
const knowhere::MetricType& metric_type) {
bool dsc = milvus::PositivelyRelated(metric_type);
auto& ids = result.seg_offsets_;
auto& dist = result.distances_;
@ -143,7 +144,8 @@ assert_reverse(ScalarIndex<double>* index, const std::vector<double>& arr) {
template <>
inline void
assert_reverse(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
assert_reverse(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
for (size_t offset = 0; offset < arr.size(); ++offset) {
ASSERT_TRUE(arr[offset].compare(index->Reverse_Lookup(offset)) == 0);
}
@ -151,7 +153,8 @@ assert_reverse(ScalarIndex<std::string>* index, const std::vector<std::string>&
template <>
inline void
assert_in(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
assert_in(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
auto bitset1 = index->In(arr.size(), arr.data());
ASSERT_EQ(arr.size(), bitset1->size());
ASSERT_TRUE(bitset1->any());
@ -159,7 +162,8 @@ assert_in(ScalarIndex<std::string>* index, const std::vector<std::string>& arr)
template <>
inline void
assert_not_in(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
assert_not_in(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
auto bitset1 = index->NotIn(arr.size(), arr.data());
ASSERT_EQ(arr.size(), bitset1->size());
ASSERT_TRUE(bitset1->none());
@ -167,7 +171,8 @@ assert_not_in(ScalarIndex<std::string>* index, const std::vector<std::string>& a
template <>
inline void
assert_range(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
assert_range(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
auto test_min = arr[0];
auto test_max = arr[arr.size() - 1];

View File

@ -92,12 +92,17 @@ struct GeneratedData {
int len = raw_->num_rows() * field_meta.get_dim();
ret.resize(len);
auto src_data =
reinterpret_cast<const T*>(target_field_data.vectors().float_vector().data().data());
reinterpret_cast<const T*>(target_field_data.vectors()
.float_vector()
.data()
.data());
std::copy_n(src_data, len, ret.data());
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
} else if (field_meta.get_data_type() ==
DataType::VECTOR_BINARY) {
int len = raw_->num_rows() * (field_meta.get_dim() / 8);
ret.resize(len);
auto src_data = reinterpret_cast<const T*>(target_field_data.vectors().binary_vector().data());
auto src_data = reinterpret_cast<const T*>(
target_field_data.vectors().binary_vector().data());
std::copy_n(src_data, len, ret.data());
} else {
PanicInfo("unsupported");
@ -107,36 +112,44 @@ struct GeneratedData {
}
switch (field_meta.get_data_type()) {
case DataType::BOOL: {
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().bool_data().data().data());
auto src_data = reinterpret_cast<const T*>(
target_field_data.scalars().bool_data().data().data());
std::copy_n(src_data, raw_->num_rows(), ret.data());
break;
}
case DataType::INT8:
case DataType::INT16:
case DataType::INT32: {
auto src_data =
reinterpret_cast<const int32_t*>(target_field_data.scalars().int_data().data().data());
auto src_data = reinterpret_cast<const int32_t*>(
target_field_data.scalars().int_data().data().data());
std::copy_n(src_data, raw_->num_rows(), ret.data());
break;
}
case DataType::INT64: {
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().long_data().data().data());
auto src_data = reinterpret_cast<const T*>(
target_field_data.scalars().long_data().data().data());
std::copy_n(src_data, raw_->num_rows(), ret.data());
break;
}
case DataType::FLOAT: {
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().float_data().data().data());
auto src_data = reinterpret_cast<const T*>(
target_field_data.scalars().float_data().data().data());
std::copy_n(src_data, raw_->num_rows(), ret.data());
break;
}
case DataType::DOUBLE: {
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().double_data().data().data());
auto src_data =
reinterpret_cast<const T*>(target_field_data.scalars()
.double_data()
.data()
.data());
std::copy_n(src_data, raw_->num_rows(), ret.data());
break;
}
case DataType::VARCHAR: {
auto ret_data = reinterpret_cast<std::string*>(ret.data());
auto src_data = target_field_data.scalars().string_data().data();
auto src_data =
target_field_data.scalars().string_data().data();
std::copy(src_data.begin(), src_data.end(), ret_data);
break;
@ -163,19 +176,29 @@ struct GeneratedData {
private:
GeneratedData() = default;
friend GeneratedData
DataGen(SchemaPtr schema, int64_t N, uint64_t seed, uint64_t ts_offset, int repeat_count);
DataGen(SchemaPtr schema,
int64_t N,
uint64_t seed,
uint64_t ts_offset,
int repeat_count);
};
inline GeneratedData
DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0, int repeat_count = 1) {
DataGen(SchemaPtr schema,
int64_t N,
uint64_t seed = 42,
uint64_t ts_offset = 0,
int repeat_count = 1) {
using std::vector;
std::default_random_engine er(seed);
std::normal_distribution<> distr(0, 1);
int offset = 0;
auto insert_data = std::make_unique<InsertData>();
auto insert_cols = [&insert_data](auto& data, int64_t count, auto& field_meta) {
auto array = milvus::segcore::CreateDataArrayFrom(data.data(), count, field_meta);
auto insert_cols = [&insert_data](
auto& data, int64_t count, auto& field_meta) {
auto array = milvus::segcore::CreateDataArrayFrom(
data.data(), count, field_meta);
insert_data->mutable_fields_data()->AddAllocated(array.release());
};
@ -185,7 +208,8 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0,
case DataType::VECTOR_FLOAT: {
auto dim = field_meta.get_dim();
vector<float> final(dim * N);
bool is_ip = starts_with(field_meta.get_name().get(), "normalized");
bool is_ip =
starts_with(field_meta.get_name().get(), "normalized");
#pragma omp parallel for
for (int n = 0; n < N; ++n) {
vector<float> data(dim);
@ -204,7 +228,8 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0,
}
}
std::copy(data.begin(), data.end(), final.begin() + dim * n);
std::copy(
data.begin(), data.end(), final.begin() + dim * n);
}
insert_cols(final, N, field_meta);
break;
@ -318,7 +343,9 @@ CreatePlaceholderGroup(int64_t num_queries, int dim, int64_t seed = 42) {
}
inline auto
CreatePlaceholderGroup(int64_t num_queries, int dim, const std::vector<float>& vecs) {
CreatePlaceholderGroup(int64_t num_queries,
int dim,
const std::vector<float>& vecs) {
namespace ser = milvus::proto::common;
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
@ -355,7 +382,9 @@ CreatePlaceholderGroupFromBlob(int64_t num_queries, int dim, const float* src) {
}
inline auto
CreateBinaryPlaceholderGroup(int64_t num_queries, int64_t dim, int64_t seed = 42) {
CreateBinaryPlaceholderGroup(int64_t num_queries,
int64_t dim,
int64_t seed = 42) {
assert(dim % 8 == 0);
namespace ser = milvus::proto::common;
ser::PlaceholderGroup raw_group;
@ -375,7 +404,9 @@ CreateBinaryPlaceholderGroup(int64_t num_queries, int64_t dim, int64_t seed = 42
}
inline auto
CreateBinaryPlaceholderGroupFromBlob(int64_t num_queries, int64_t dim, const uint8_t* ptr) {
CreateBinaryPlaceholderGroupFromBlob(int64_t num_queries,
int64_t dim,
const uint8_t* ptr) {
assert(dim % 8 == 0);
namespace ser = milvus::proto::common;
ser::PlaceholderGroup raw_group;
@ -402,7 +433,8 @@ SearchResultToVector(const SearchResult& sr) {
for (int q = 0; q < num_queries; ++q) {
for (int k = 0; k < topk; ++k) {
int index = q * topk + k;
result.emplace_back(std::make_pair(sr.seg_offsets_[index], sr.distances_[index]));
result.emplace_back(
std::make_pair(sr.seg_offsets_[index], sr.distances_[index]));
}
}
return result;
@ -417,7 +449,8 @@ SearchResultToJson(const SearchResult& sr) {
std::vector<std::string> result;
for (int k = 0; k < topk; ++k) {
int index = q * topk + k;
result.emplace_back(std::to_string(sr.seg_offsets_[index]) + "->" + std::to_string(sr.distances_[index]));
result.emplace_back(std::to_string(sr.seg_offsets_[index]) + "->" +
std::to_string(sr.distances_[index]));
}
results.emplace_back(std::move(result));
}
@ -433,7 +466,8 @@ SealedLoadFieldData(const GeneratedData& dataset,
{
LoadFieldDataInfo info;
FieldMeta field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), row_count, field_meta);
auto array = CreateScalarDataArrayFrom(
dataset.row_ids_.data(), row_count, field_meta);
info.field_data = array.get();
info.row_count = dataset.row_ids_.size();
info.field_id = RowFieldID.get(); // field id for RowId
@ -441,8 +475,10 @@ SealedLoadFieldData(const GeneratedData& dataset,
}
{
LoadFieldDataInfo info;
FieldMeta field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), row_count, field_meta);
FieldMeta field_meta(
FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto array = CreateScalarDataArrayFrom(
dataset.timestamps_.data(), row_count, field_meta);
info.field_data = array.get();
info.row_count = dataset.timestamps_.size();
info.field_id = TimestampFieldID.get();
@ -474,13 +510,16 @@ SealedCreator(SchemaPtr schema, const GeneratedData& dataset) {
inline std::unique_ptr<milvus::index::VectorIndex>
GenVecIndexing(int64_t N, int64_t dim, const float* vec) {
// {knowhere::IndexParams::nprobe, 10},
auto conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "1024"},
{knowhere::meta::DEVICE_ID, 0}};
auto conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, std::to_string(dim)},
{knowhere::indexparam::NLIST, "1024"},
{knowhere::meta::DEVICE_ID, 0}};
auto database = knowhere::GenDataSet(N, dim, vec);
auto indexing = std::make_unique<index::VectorMemNMIndex>(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
knowhere::metric::L2, IndexMode::MODE_CPU);
auto indexing = std::make_unique<index::VectorMemNMIndex>(
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
knowhere::metric::L2,
IndexMode::MODE_CPU);
indexing->BuildWithDataset(database, conf);
return indexing;
}
@ -502,7 +541,8 @@ GenScalarIndexing(int64_t N, const T* data) {
inline std::vector<char>
translate_text_plan_to_binary_plan(const char* text_plan) {
proto::plan::PlanNode plan_node;
auto ok = google::protobuf::TextFormat::ParseFromString(text_plan, &plan_node);
auto ok =
google::protobuf::TextFormat::ParseFromString(text_plan, &plan_node);
AssertInfo(ok, "Failed to parse");
std::string binary_plan;

View File

@ -92,7 +92,15 @@ get_default_storage_config() {
auto iamEndPoint = minioConfig["iamEndpoint"].as<std::string>();
auto bucketName = minioConfig["bucketName"].as<std::string>();
return StorageConfig{endpoint, bucketName, accessKey, accessValue, rootPath, "minio", iamEndPoint, useSSL, useIam};
return StorageConfig{endpoint,
bucketName,
accessKey,
accessValue,
rootPath,
"minio",
iamEndPoint,
useSSL,
useIam};
}
void
@ -173,7 +181,8 @@ class TestConfigWrapper {
strcpy(const_cast<char*>(config_.address), address.c_str());
strcpy(const_cast<char*>(config_.bucket_name), bucketName.c_str());
strcpy(const_cast<char*>(config_.access_key_id), accessKey.c_str());
strcpy(const_cast<char*>(config_.access_key_value), accessValue.c_str());
strcpy(const_cast<char*>(config_.access_key_value),
accessValue.c_str());
strcpy(const_cast<char*>(config_.remote_root_path), rootPath.c_str());
strcpy(const_cast<char*>(config_.storage_type), storage_type.c_str());
strcpy(const_cast<char*>(config_.iam_endpoint), iamEndPoint.c_str());
@ -190,7 +199,8 @@ get_default_cstorage_config() {
}
auto
generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type) {
generate_build_conf(const milvus::IndexType& index_type,
const milvus::MetricType& metric_type) {
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IDMAP) {
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
@ -198,8 +208,10 @@ generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricTyp
};
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type}, {knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::NLIST, "16"}, {knowhere::indexparam::M, "4"},
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{knowhere::indexparam::NLIST, "16"},
{knowhere::indexparam::M, "4"},
{knowhere::indexparam::NBITS, "8"},
};
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) {
@ -252,12 +264,15 @@ generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricTyp
}
auto
generate_load_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type, int64_t nb) {
generate_load_conf(const milvus::IndexType& index_type,
const milvus::MetricType& metric_type,
int64_t nb) {
if (index_type == knowhere::IndexEnum::INDEX_DISKANN) {
return knowhere::Json{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET, std::to_string(0.0002)},
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET,
std::to_string(0.0002)},
};
}
return knowhere::Json();
@ -275,12 +290,14 @@ search_with_nprobe_list() {
}
auto
generate_search_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type) {
generate_search_conf(const milvus::IndexType& index_type,
const milvus::MetricType& metric_type) {
auto conf = milvus::Config{
{knowhere::meta::METRIC_TYPE, metric_type},
};
if (milvus::index::is_in_list<milvus::IndexType>(index_type, search_with_nprobe_list)) {
if (milvus::index::is_in_list<milvus::IndexType>(index_type,
search_with_nprobe_list)) {
conf[knowhere::indexparam::NPROBE] = 4;
} else if (index_type == knowhere::IndexEnum::INDEX_HNSW) {
conf[knowhere::indexparam::EF] = 200;
@ -293,7 +310,8 @@ generate_search_conf(const milvus::IndexType& index_type, const milvus::MetricTy
}
auto
generate_range_search_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type) {
generate_range_search_conf(const milvus::IndexType& index_type,
const milvus::MetricType& metric_type) {
auto conf = milvus::Config{
{knowhere::meta::METRIC_TYPE, metric_type},
};
@ -306,7 +324,8 @@ generate_range_search_conf(const milvus::IndexType& index_type, const milvus::Me
conf[knowhere::meta::RANGE_FILTER] = 0.1;
}
if (milvus::index::is_in_list<milvus::IndexType>(index_type, search_with_nprobe_list)) {
if (milvus::index::is_in_list<milvus::IndexType>(index_type,
search_with_nprobe_list)) {
conf[knowhere::indexparam::NPROBE] = 4;
} else if (index_type == knowhere::IndexEnum::INDEX_HNSW) {
conf[knowhere::indexparam::EF] = 200;
@ -319,7 +338,8 @@ generate_range_search_conf(const milvus::IndexType& index_type, const milvus::Me
}
auto
generate_params(const knowhere::IndexType& index_type, const knowhere::MetricType& metric_type) {
generate_params(const knowhere::IndexType& index_type,
const knowhere::MetricType& metric_type) {
namespace indexcgo = milvus::proto::indexcgo;
indexcgo::TypeParams type_params;
@ -328,7 +348,8 @@ generate_params(const knowhere::IndexType& index_type, const knowhere::MetricTyp
auto configs = generate_build_conf(index_type, metric_type);
for (auto& [key, value] : configs.items()) {
auto param = index_params.add_params();
auto value_str = value.is_string() ? value.get<std::string>() : value.dump();
auto value_str =
value.is_string() ? value.get<std::string>() : value.dump();
param->set_key(key);
param->set_value(value_str);
}
@ -341,13 +362,18 @@ generate_params(const knowhere::IndexType& index_type, const knowhere::MetricTyp
}
auto
GenDataset(int64_t N, const knowhere::MetricType& metric_type, bool is_binary, int64_t dim = DIM) {
GenDataset(int64_t N,
const knowhere::MetricType& metric_type,
bool is_binary,
int64_t dim = DIM) {
auto schema = std::make_shared<milvus::Schema>();
if (!is_binary) {
schema->AddDebugField("fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else {
schema->AddDebugField("fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
schema->AddDebugField(
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
}
}
@ -408,15 +434,22 @@ Jaccard(const uint8_t* point_a, const uint8_t* point_b, int dim) {
}
float
CountDistance(
const void* point_a, const void* point_b, int dim, const knowhere::MetricType& metric, bool is_binary = false) {
CountDistance(const void* point_a,
const void* point_b,
int dim,
const knowhere::MetricType& metric,
bool is_binary = false) {
if (point_a == nullptr || point_b == nullptr) {
return std::numeric_limits<float>::max();
}
if (milvus::IsMetricType(metric, knowhere::metric::L2)) {
return L2(static_cast<const float*>(point_a), static_cast<const float*>(point_b), dim);
return L2(static_cast<const float*>(point_a),
static_cast<const float*>(point_b),
dim);
} else if (milvus::IsMetricType(metric, knowhere::metric::JACCARD)) {
return Jaccard(static_cast<const uint8_t*>(point_a), static_cast<const uint8_t*>(point_b), dim);
return Jaccard(static_cast<const uint8_t*>(point_a),
static_cast<const uint8_t*>(point_b),
dim);
} else {
return std::numeric_limits<float>::max();
}
@ -437,7 +470,8 @@ CheckDistances(const QueryResultPtr& result,
for (auto j = 0; j < k; ++j) {
auto dis = result->distances_[i * k + j];
auto id = result->seg_offsets_[i * k + j];
auto count_dis = CountDistance(query_vecs + i * dim, base_vecs + id * dim, dim, metric);
auto count_dis = CountDistance(
query_vecs + i * dim, base_vecs + id * dim, dim, metric);
// assert(std::abs(dis - count_dis) < threshold);
}
}
@ -472,7 +506,9 @@ generate_index_params(const MapParams& m) {
}
// TODO: std::is_arithmetic_v, hard to compare float point value. std::is_integral_v.
template <typename T, typename = typename std::enable_if_t<std::is_arithmetic_v<T> || std::is_same_v<T, std::string>>>
template <typename T,
typename = typename std::enable_if_t<std::is_arithmetic_v<T> ||
std::is_same_v<T, std::string>>>
inline std::vector<T>
GenArr(int64_t n) {
auto max_i8 = std::numeric_limits<int8_t>::max() - 1;
@ -507,7 +543,8 @@ GenArr<std::string>(int64_t n) {
std::vector<ScalarTestParams>
GenBoolParams() {
std::vector<ScalarTestParams> ret;
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
ret.emplace_back(
ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "flat"}}));
return ret;
}
@ -519,7 +556,9 @@ GenStringParams() {
return ret;
}
template <typename T, typename = typename std::enable_if_t<std::is_arithmetic_v<T> | std::is_same_v<std::string, T>>>
template <typename T,
typename = typename std::enable_if_t<std::is_arithmetic_v<T> |
std::is_same_v<std::string, T>>>
inline std::vector<ScalarTestParams>
GenParams() {
if (std::is_same_v<std::string, T>) {
@ -531,7 +570,8 @@ GenParams() {
}
std::vector<ScalarTestParams> ret;
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
ret.emplace_back(
ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "flat"}}));
return ret;
}