mirror of https://github.com/milvus-io/milvus.git
parent
bcd316a44e
commit
a36fefb009
|
@ -32,8 +32,8 @@ class BitsetView : public knowhere::BitsetView {
|
|||
BitsetView() = default;
|
||||
~BitsetView() = default;
|
||||
|
||||
BitsetView(const std::nullptr_t value)
|
||||
: knowhere::BitsetView(value) { // NOLINT
|
||||
BitsetView(const std::nullptr_t value) // NOLINT
|
||||
: knowhere::BitsetView(value) { // NOLINT
|
||||
}
|
||||
|
||||
BitsetView(const uint8_t* data, size_t num_bits)
|
||||
|
|
|
@ -11,10 +11,12 @@
|
|||
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
#include "common/Utils.h"
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
|
||||
#include "common/Utils.h"
|
||||
#include "common/RangeSearchHelper.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace {
|
||||
using ResultPair = std::pair<float, int64_t>;
|
||||
|
|
|
@ -17,8 +17,10 @@
|
|||
#include <sys/mman.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "common/Consts.h"
|
||||
#include "common/FieldMeta.h"
|
||||
|
|
|
@ -111,7 +111,7 @@ get_thread_starttime() {
|
|||
snprintf(filename,
|
||||
sizeof(filename),
|
||||
"/proc/%lld/task/%lld/stat",
|
||||
(long long)pid,
|
||||
(long long)pid, // NOLINT, TODO: How to solve this?
|
||||
(long long)tid); // NOLINT
|
||||
|
||||
int64_t val = 0;
|
||||
|
|
|
@ -176,8 +176,9 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
auto& chunk = get_chunk(chunk_id);
|
||||
if constexpr (is_scalar) {
|
||||
return Span<TraitType>(chunk.data(), chunk.size());
|
||||
} else if constexpr (std::is_same_v<Type, int64_t> ||
|
||||
} else if constexpr (std::is_same_v<Type, int64_t> || // NOLINT
|
||||
std::is_same_v<Type, int>) {
|
||||
// TODO: where should the braces be placed?
|
||||
// only for testing
|
||||
PanicInfo("unimplemented");
|
||||
} else {
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
#include "TimestampIndex.h"
|
||||
#include "common/Schema.h"
|
||||
#include "easylogging++.h"
|
||||
#include "easyloggingpp/easylogging++.h"
|
||||
#include "segcore/AckResponder.h"
|
||||
#include "segcore/ConcurrentVector.h"
|
||||
#include "segcore/Record.h"
|
||||
|
|
|
@ -38,7 +38,7 @@ class SegmentGrowing : public SegmentInternalInterface {
|
|||
const Timestamp* timestamps,
|
||||
const InsertData* insert_data) = 0;
|
||||
|
||||
virtual SegmentType
|
||||
SegmentType
|
||||
type() const override {
|
||||
return SegmentType::Growing;
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ class SegmentSealed : public SegmentInternalInterface {
|
|||
virtual void
|
||||
DropFieldData(const FieldId field_id) = 0;
|
||||
|
||||
virtual SegmentType
|
||||
SegmentType
|
||||
type() const override {
|
||||
return SegmentType::Sealed;
|
||||
}
|
||||
|
|
|
@ -1,9 +1,22 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "common/LoadInfo.h"
|
||||
|
||||
|
|
|
@ -23,9 +23,21 @@
|
|||
#include "velox/exec/tests/utils/TempDirectoryPath.h"
|
||||
#include "velox/vector/BaseVector.h"
|
||||
|
||||
using namespace facebook::velox;
|
||||
using namespace facebook::velox::dwio::common;
|
||||
using namespace facebook::velox::dwrf;
|
||||
namespace filesystems = facebook::velox::filesystems;
|
||||
namespace dwio = facebook::velox::dwio;
|
||||
namespace dwrf = facebook::velox::dwrf;
|
||||
|
||||
using facebook::velox::LocalReadFile;
|
||||
using facebook::velox::RowVector;
|
||||
using facebook::velox::vector_size_t;
|
||||
using facebook::velox::VectorPtr;
|
||||
|
||||
using dwio::common::BufferedInput;
|
||||
using dwio::common::FileFormat;
|
||||
using dwio::common::ReaderOptions;
|
||||
using dwio::common::RowReaderOptions;
|
||||
|
||||
using dwrf::DwrfReader;
|
||||
|
||||
// A temporary program that reads from ORC file and prints its content
|
||||
// Used to compare the ORC data read by DWRFReader against apache-orc repo.
|
||||
|
|
|
@ -47,13 +47,16 @@ IndexBuilder_build(benchmark::State& state) {
|
|||
indexcgo::TypeParams type_params;
|
||||
indexcgo::IndexParams index_params;
|
||||
|
||||
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
|
||||
std::tie(type_params, index_params) =
|
||||
generate_params(index_type, metric_type);
|
||||
|
||||
std::string type_params_str, index_params_str;
|
||||
bool ok;
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params,
|
||||
&type_params_str);
|
||||
assert(ok);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
|
||||
auto is_binary = state.range(2);
|
||||
|
@ -63,7 +66,9 @@ IndexBuilder_build(benchmark::State& state) {
|
|||
|
||||
for (auto _ : state) {
|
||||
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
|
||||
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str(),
|
||||
milvus::DataType::VECTOR_FLOAT,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
get_default_storage_config());
|
||||
index->Build(xb_dataset);
|
||||
}
|
||||
|
@ -77,13 +82,16 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
|
|||
indexcgo::TypeParams type_params;
|
||||
indexcgo::IndexParams index_params;
|
||||
|
||||
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
|
||||
std::tie(type_params, index_params) =
|
||||
generate_params(index_type, metric_type);
|
||||
|
||||
std::string type_params_str, index_params_str;
|
||||
bool ok;
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params,
|
||||
&type_params_str);
|
||||
assert(ok);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
|
||||
auto is_binary = state.range(2);
|
||||
|
@ -93,7 +101,9 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
|
|||
|
||||
for (auto _ : state) {
|
||||
auto index = std::make_unique<milvus::indexbuilder::VecIndexCreator>(
|
||||
milvus::DataType::VECTOR_FLOAT, type_params_str.c_str(), index_params_str.c_str(),
|
||||
milvus::DataType::VECTOR_FLOAT,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
get_default_storage_config());
|
||||
|
||||
index->Build(xb_dataset);
|
||||
|
|
|
@ -24,7 +24,8 @@ static int dim = 768;
|
|||
|
||||
const auto schema = []() {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
return schema;
|
||||
|
@ -56,7 +57,8 @@ const auto plan = [] {
|
|||
auto ph_group = [] {
|
||||
auto num_queries = 10;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
return ph_group;
|
||||
}();
|
||||
|
||||
|
@ -79,7 +81,11 @@ Search_SmallIndex(benchmark::State& state) {
|
|||
segment->disable_small_index();
|
||||
}
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset_.row_ids_.data(), dataset_.timestamps_.data(), dataset_.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset_.row_ids_.data(),
|
||||
dataset_.timestamps_.data(),
|
||||
dataset_.raw_);
|
||||
|
||||
Timestamp time = 10000000;
|
||||
|
||||
|
@ -88,7 +94,9 @@ Search_SmallIndex(benchmark::State& state) {
|
|||
}
|
||||
}
|
||||
|
||||
BENCHMARK(Search_SmallIndex)->MinTime(5)->ArgsProduct({{true, false}, {8, 16, 32}});
|
||||
BENCHMARK(Search_SmallIndex)
|
||||
->MinTime(5)
|
||||
->ArgsProduct({{true, false}, {8, 16, 32}});
|
||||
|
||||
static void
|
||||
Search_Sealed(benchmark::State& state) {
|
||||
|
|
|
@ -25,9 +25,13 @@ using namespace milvus::query;
|
|||
namespace {
|
||||
|
||||
auto
|
||||
GenFloatVecs(int dim, int n, const knowhere::MetricType& metric, int seed = 42) {
|
||||
GenFloatVecs(int dim,
|
||||
int n,
|
||||
const knowhere::MetricType& metric,
|
||||
int seed = 42) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto fvec = schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, dim, metric);
|
||||
auto fvec =
|
||||
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, dim, metric);
|
||||
auto dataset = DataGen(schema, n, seed);
|
||||
return dataset.get_col<float>(fvec);
|
||||
}
|
||||
|
@ -98,7 +102,8 @@ AssertMatch(const std::vector<int>& ref, const int64_t* ans) {
|
|||
|
||||
bool
|
||||
is_supported_float_metric(const std::string& metric) {
|
||||
return milvus::IsMetricType(metric, knowhere::metric::L2) || milvus::IsMetricType(metric, knowhere::metric::IP);
|
||||
return milvus::IsMetricType(metric, knowhere::metric::L2) ||
|
||||
milvus::IsMetricType(metric, knowhere::metric::IP);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -106,7 +111,11 @@ is_supported_float_metric(const std::string& metric) {
|
|||
class TestFloatSearchBruteForce : public ::testing::Test {
|
||||
public:
|
||||
void
|
||||
Run(int nb, int nq, int topk, int dim, const knowhere::MetricType& metric_type) {
|
||||
Run(int nb,
|
||||
int nq,
|
||||
int topk,
|
||||
int dim,
|
||||
const knowhere::MetricType& metric_type) {
|
||||
auto bitset = std::make_shared<BitsetType>();
|
||||
bitset->resize(nb);
|
||||
auto bitset_view = BitsetView(*bitset);
|
||||
|
@ -114,15 +123,22 @@ class TestFloatSearchBruteForce : public ::testing::Test {
|
|||
auto base = GenFloatVecs(dim, nb, metric_type);
|
||||
auto query = GenFloatVecs(dim, nq, metric_type);
|
||||
|
||||
dataset::SearchDataset dataset{metric_type, nq, topk, -1, dim, query.data()};
|
||||
dataset::SearchDataset dataset{
|
||||
metric_type, nq, topk, -1, dim, query.data()};
|
||||
if (!is_supported_float_metric(metric_type)) {
|
||||
// Memory leak in knowhere.
|
||||
// ASSERT_ANY_THROW(BruteForceSearch(dataset, base.data(), nb, bitset_view));
|
||||
return;
|
||||
}
|
||||
auto result = BruteForceSearch(dataset, base.data(), nb, knowhere::Json(), bitset_view);
|
||||
auto result = BruteForceSearch(
|
||||
dataset, base.data(), nb, knowhere::Json(), bitset_view);
|
||||
for (int i = 0; i < nq; i++) {
|
||||
auto ref = Ref(base.data(), query.data() + i * dim, nb, dim, topk, metric_type);
|
||||
auto ref = Ref(base.data(),
|
||||
query.data() + i * dim,
|
||||
nb,
|
||||
dim,
|
||||
topk,
|
||||
metric_type);
|
||||
auto ans = result.get_seg_offsets() + i * topk;
|
||||
AssertMatch(ref, ans);
|
||||
}
|
||||
|
|
|
@ -18,11 +18,16 @@ using namespace milvus::segcore;
|
|||
TEST(Binary, Insert) {
|
||||
int64_t N = 100000;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, knowhere::metric::JACCARD);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"vecbin", DataType::VECTOR_BINARY, 128, knowhere::metric::JACCARD);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto dataset = DataGen(schema, N, 10);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
auto offset = segment->PreInsert(N);
|
||||
segment->Insert(offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(offset,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -26,7 +26,9 @@ using namespace milvus;
|
|||
|
||||
TEST(storage, InsertDataFloat) {
|
||||
std::vector<float> data = {1, 2, 3, 4, 5};
|
||||
storage::Payload payload{storage::DataType::FLOAT, reinterpret_cast<const uint8_t*>(data.data()), int(data.size())};
|
||||
storage::Payload payload{storage::DataType::FLOAT,
|
||||
reinterpret_cast<const uint8_t*>(data.data()),
|
||||
int(data.size())};
|
||||
auto field_data = std::make_shared<storage::FieldData>(payload);
|
||||
|
||||
storage::InsertData insert_data(field_data);
|
||||
|
@ -35,23 +37,29 @@ TEST(storage, InsertDataFloat) {
|
|||
insert_data.SetTimestamps(0, 100);
|
||||
|
||||
auto serialized_bytes = insert_data.Serialize(storage::StorageType::Remote);
|
||||
auto new_insert_data = storage::DeserializeFileData(reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
|
||||
serialized_bytes.size());
|
||||
auto new_insert_data = storage::DeserializeFileData(
|
||||
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
|
||||
serialized_bytes.size());
|
||||
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
|
||||
ASSERT_EQ(new_insert_data->GetTimeRage(), std::make_pair(Timestamp(0), Timestamp(100)));
|
||||
ASSERT_EQ(new_insert_data->GetTimeRage(),
|
||||
std::make_pair(Timestamp(0), Timestamp(100)));
|
||||
auto new_payload = new_insert_data->GetPayload();
|
||||
ASSERT_EQ(new_payload->data_type, storage::DataType::FLOAT);
|
||||
ASSERT_EQ(new_payload->rows, data.size());
|
||||
std::vector<float> new_data(data.size());
|
||||
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(float));
|
||||
memcpy(new_data.data(),
|
||||
new_payload->raw_data,
|
||||
new_payload->rows * sizeof(float));
|
||||
ASSERT_EQ(data, new_data);
|
||||
}
|
||||
|
||||
TEST(storage, InsertDataVectorFloat) {
|
||||
std::vector<float> data = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
int DIM = 2;
|
||||
storage::Payload payload{storage::DataType::VECTOR_FLOAT, reinterpret_cast<const uint8_t*>(data.data()),
|
||||
int(data.size()) / DIM, DIM};
|
||||
storage::Payload payload{storage::DataType::VECTOR_FLOAT,
|
||||
reinterpret_cast<const uint8_t*>(data.data()),
|
||||
int(data.size()) / DIM,
|
||||
DIM};
|
||||
auto field_data = std::make_shared<storage::FieldData>(payload);
|
||||
|
||||
storage::InsertData insert_data(field_data);
|
||||
|
@ -60,56 +68,72 @@ TEST(storage, InsertDataVectorFloat) {
|
|||
insert_data.SetTimestamps(0, 100);
|
||||
|
||||
auto serialized_bytes = insert_data.Serialize(storage::StorageType::Remote);
|
||||
auto new_insert_data = storage::DeserializeFileData(reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
|
||||
serialized_bytes.size());
|
||||
auto new_insert_data = storage::DeserializeFileData(
|
||||
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
|
||||
serialized_bytes.size());
|
||||
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
|
||||
ASSERT_EQ(new_insert_data->GetTimeRage(), std::make_pair(Timestamp(0), Timestamp(100)));
|
||||
ASSERT_EQ(new_insert_data->GetTimeRage(),
|
||||
std::make_pair(Timestamp(0), Timestamp(100)));
|
||||
auto new_payload = new_insert_data->GetPayload();
|
||||
ASSERT_EQ(new_payload->data_type, storage::DataType::VECTOR_FLOAT);
|
||||
ASSERT_EQ(new_payload->rows, data.size() / DIM);
|
||||
std::vector<float> new_data(data.size());
|
||||
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(float) * DIM);
|
||||
memcpy(new_data.data(),
|
||||
new_payload->raw_data,
|
||||
new_payload->rows * sizeof(float) * DIM);
|
||||
ASSERT_EQ(data, new_data);
|
||||
}
|
||||
|
||||
TEST(storage, LocalInsertDataVectorFloat) {
|
||||
std::vector<float> data = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
int DIM = 2;
|
||||
storage::Payload payload{storage::DataType::VECTOR_FLOAT, reinterpret_cast<const uint8_t*>(data.data()),
|
||||
int(data.size()) / DIM, DIM};
|
||||
storage::Payload payload{storage::DataType::VECTOR_FLOAT,
|
||||
reinterpret_cast<const uint8_t*>(data.data()),
|
||||
int(data.size()) / DIM,
|
||||
DIM};
|
||||
auto field_data = std::make_shared<storage::FieldData>(payload);
|
||||
|
||||
storage::InsertData insert_data(field_data);
|
||||
storage::FieldDataMeta field_data_meta{100, 101, 102, 103};
|
||||
insert_data.SetFieldDataMeta(field_data_meta);
|
||||
|
||||
auto serialized_bytes = insert_data.Serialize(storage::StorageType::LocalDisk);
|
||||
auto new_insert_data =
|
||||
storage::DeserializeLocalInsertFileData(reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
|
||||
serialized_bytes.size(), storage::DataType::VECTOR_FLOAT);
|
||||
auto serialized_bytes =
|
||||
insert_data.Serialize(storage::StorageType::LocalDisk);
|
||||
auto new_insert_data = storage::DeserializeLocalInsertFileData(
|
||||
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
|
||||
serialized_bytes.size(),
|
||||
storage::DataType::VECTOR_FLOAT);
|
||||
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
|
||||
auto new_payload = new_insert_data->GetPayload();
|
||||
ASSERT_EQ(new_payload->data_type, storage::DataType::VECTOR_FLOAT);
|
||||
ASSERT_EQ(new_payload->rows, data.size() / DIM);
|
||||
std::vector<float> new_data(data.size());
|
||||
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(float) * DIM);
|
||||
memcpy(new_data.data(),
|
||||
new_payload->raw_data,
|
||||
new_payload->rows * sizeof(float) * DIM);
|
||||
ASSERT_EQ(data, new_data);
|
||||
}
|
||||
|
||||
TEST(storage, LocalIndexData) {
|
||||
std::vector<uint8_t> data = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
storage::Payload payload{storage::DataType::INT8, reinterpret_cast<const uint8_t*>(data.data()), int(data.size())};
|
||||
storage::Payload payload{storage::DataType::INT8,
|
||||
reinterpret_cast<const uint8_t*>(data.data()),
|
||||
int(data.size())};
|
||||
auto field_data = std::make_shared<storage::FieldData>(payload);
|
||||
storage::IndexData indexData_data(field_data);
|
||||
auto serialized_bytes = indexData_data.Serialize(storage::StorageType::LocalDisk);
|
||||
auto serialized_bytes =
|
||||
indexData_data.Serialize(storage::StorageType::LocalDisk);
|
||||
|
||||
auto new_index_data = storage::DeserializeLocalIndexFileData(
|
||||
reinterpret_cast<const uint8_t*>(serialized_bytes.data()), serialized_bytes.size());
|
||||
reinterpret_cast<const uint8_t*>(serialized_bytes.data()),
|
||||
serialized_bytes.size());
|
||||
ASSERT_EQ(new_index_data->GetCodecType(), storage::IndexDataType);
|
||||
auto new_payload = new_index_data->GetPayload();
|
||||
ASSERT_EQ(new_payload->data_type, storage::DataType::INT8);
|
||||
ASSERT_EQ(new_payload->rows, data.size());
|
||||
std::vector<uint8_t> new_data(data.size());
|
||||
memcpy(new_data.data(), new_payload->raw_data, new_payload->rows * sizeof(uint8_t));
|
||||
memcpy(new_data.data(),
|
||||
new_payload->raw_data,
|
||||
new_payload->rows * sizeof(uint8_t));
|
||||
ASSERT_EQ(data, new_data);
|
||||
}
|
||||
|
|
|
@ -71,13 +71,15 @@ TEST_F(DiskAnnFileManagerTest, AddFilePositive) {
|
|||
IndexMeta index_meta = {3, 100, 1000, 1, "index"};
|
||||
|
||||
int64_t slice_size = milvus::index_file_slice_size << 20;
|
||||
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(filed_data_meta, index_meta, storage_config_);
|
||||
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(
|
||||
filed_data_meta, index_meta, storage_config_);
|
||||
auto ok = diskAnnFileManager->AddFile(indexFilePath);
|
||||
EXPECT_EQ(ok, true);
|
||||
|
||||
auto remote_files_to_size = diskAnnFileManager->GetRemotePathsToFileSize();
|
||||
auto num_slice = index_size / slice_size;
|
||||
EXPECT_EQ(remote_files_to_size.size(), index_size % slice_size == 0 ? num_slice : num_slice + 1);
|
||||
EXPECT_EQ(remote_files_to_size.size(),
|
||||
index_size % slice_size == 0 ? num_slice : num_slice + 1);
|
||||
|
||||
std::vector<std::string> remote_files;
|
||||
for (auto& file2size : remote_files_to_size) {
|
||||
|
@ -125,13 +127,15 @@ TEST_F(DiskAnnFileManagerTest, AddFilePositiveParallel) {
|
|||
IndexMeta index_meta = {3, 100, 1000, 1, "index"};
|
||||
|
||||
int64_t slice_size = milvus::index_file_slice_size << 20;
|
||||
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(filed_data_meta, index_meta, storage_config_);
|
||||
auto diskAnnFileManager = std::make_shared<DiskFileManagerImpl>(
|
||||
filed_data_meta, index_meta, storage_config_);
|
||||
auto ok = diskAnnFileManager->AddFile(indexFilePath);
|
||||
EXPECT_EQ(ok, true);
|
||||
|
||||
auto remote_files_to_size = diskAnnFileManager->GetRemotePathsToFileSize();
|
||||
auto num_slice = index_size / slice_size;
|
||||
EXPECT_EQ(remote_files_to_size.size(), index_size % slice_size == 0 ? num_slice : num_slice + 1);
|
||||
EXPECT_EQ(remote_files_to_size.size(),
|
||||
index_size % slice_size == 0 ? num_slice : num_slice + 1);
|
||||
|
||||
std::vector<std::string> remote_files;
|
||||
for (auto& file2size : remote_files_to_size) {
|
||||
|
@ -169,14 +173,16 @@ TEST_F(DiskAnnFileManagerTest, TestThreadPool) {
|
|||
std::vector<std::future<int>> futures;
|
||||
auto start = chrono::system_clock::now();
|
||||
for (int i = 0; i < 100; i++) {
|
||||
futures.push_back(thread_pool->Submit(test_worker, "test_id" + std::to_string(i)));
|
||||
futures.push_back(
|
||||
thread_pool->Submit(test_worker, "test_id" + std::to_string(i)));
|
||||
}
|
||||
for (auto& future : futures) {
|
||||
EXPECT_EQ(future.get(), 1);
|
||||
}
|
||||
auto end = chrono::system_clock::now();
|
||||
auto duration = chrono::duration_cast<chrono::microseconds>(end - start);
|
||||
auto second = double(duration.count()) * chrono::microseconds::period::num / chrono::microseconds::period::den;
|
||||
auto second = double(duration.count()) * chrono::microseconds::period::num /
|
||||
chrono::microseconds::period::den;
|
||||
EXPECT_LT(second, 4 * 100);
|
||||
}
|
||||
|
||||
|
@ -193,7 +199,8 @@ TEST_F(DiskAnnFileManagerTest, TestThreadPoolException) {
|
|||
auto thread_pool = new milvus::ThreadPool(50);
|
||||
std::vector<std::future<int>> futures;
|
||||
for (int i = 0; i < 100; i++) {
|
||||
futures.push_back(thread_pool->Submit(test_exception, "test_id" + std::to_string(i)));
|
||||
futures.push_back(thread_pool->Submit(
|
||||
test_exception, "test_id" + std::to_string(i)));
|
||||
}
|
||||
for (auto& future : futures) {
|
||||
future.get();
|
||||
|
|
|
@ -97,11 +97,13 @@ TEST(Expr, Range) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
ShowPlanNodeVisitor shower;
|
||||
Assert(plan->tag2field_.at("$0") == schema->get_field_id(FieldName("fakevec")));
|
||||
Assert(plan->tag2field_.at("$0") ==
|
||||
schema->get_field_id(FieldName("fakevec")));
|
||||
auto out = shower.call_child(*plan->plan_node_);
|
||||
std::cout << out.dump(4);
|
||||
}
|
||||
|
@ -139,11 +141,13 @@ TEST(Expr, RangeBinary) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
ShowPlanNodeVisitor shower;
|
||||
Assert(plan->tag2field_.at("$0") == schema->get_field_id(FieldName("fakevec")));
|
||||
Assert(plan->tag2field_.at("$0") ==
|
||||
schema->get_field_id(FieldName("fakevec")));
|
||||
auto out = shower.call_child(*plan->plan_node_);
|
||||
std::cout << out.dump(4);
|
||||
}
|
||||
|
@ -181,7 +185,8 @@ TEST(Expr, InvalidRange) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
|
||||
}
|
||||
|
@ -219,7 +224,8 @@ TEST(Expr, InvalidDSL) {
|
|||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
|
||||
}
|
||||
|
@ -230,7 +236,8 @@ TEST(Expr, ShowExecutor) {
|
|||
auto node = std::make_unique<FloatVectorANNS>();
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto field_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
|
||||
auto field_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
|
||||
int64_t num_queries = 100L;
|
||||
auto raw_data = DataGen(schema, num_queries);
|
||||
auto& info = node->search_info_;
|
||||
|
@ -251,10 +258,14 @@ TEST(Expr, TestRange) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(int)>>> testcases = {
|
||||
{R"("GT": 2000, "LT": 3000)", [](int v) { return 2000 < v && v < 3000; }},
|
||||
{R"("GE": 2000, "LT": 3000)", [](int v) { return 2000 <= v && v < 3000; }},
|
||||
{R"("GT": 2000, "LE": 3000)", [](int v) { return 2000 < v && v <= 3000; }},
|
||||
{R"("GE": 2000, "LE": 3000)", [](int v) { return 2000 <= v && v <= 3000; }},
|
||||
{R"("GT": 2000, "LT": 3000)",
|
||||
[](int v) { return 2000 < v && v < 3000; }},
|
||||
{R"("GE": 2000, "LT": 3000)",
|
||||
[](int v) { return 2000 <= v && v < 3000; }},
|
||||
{R"("GT": 2000, "LE": 3000)",
|
||||
[](int v) { return 2000 < v && v <= 3000; }},
|
||||
{R"("GE": 2000, "LE": 3000)",
|
||||
[](int v) { return 2000 <= v && v <= 3000; }},
|
||||
{R"("GE": 2000)", [](int v) { return v >= 2000; }},
|
||||
{R"("GT": 2000)", [](int v) { return v > 2000; }},
|
||||
{R"("LE": 2000)", [](int v) { return v <= 2000; }},
|
||||
|
@ -290,7 +301,8 @@ TEST(Expr, TestRange) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
|
@ -303,11 +315,16 @@ TEST(Expr, TestRange) {
|
|||
auto new_age_col = raw_data.get_col<int>(i64_fid);
|
||||
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
auto loc = dsl_string_tmp.find("@@@@");
|
||||
auto dsl_string = dsl_string_tmp;
|
||||
|
@ -373,7 +390,8 @@ TEST(Expr, TestTerm) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
|
@ -386,11 +404,16 @@ TEST(Expr, TestTerm) {
|
|||
auto new_age_col = raw_data.get_col<int>(i64_fid);
|
||||
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
auto loc = dsl_string_tmp.find("@@@@");
|
||||
auto dsl_string = dsl_string_tmp;
|
||||
|
@ -445,36 +468,45 @@ TEST(Expr, TestSimpleDsl) {
|
|||
std::vector<std::tuple<Json, std::function<bool(int)>>> testcases;
|
||||
{
|
||||
Json dsl;
|
||||
dsl["must"] = Json::array({vec_dsl, get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
|
||||
dsl["must"] = Json::array(
|
||||
{vec_dsl, get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
testcases.emplace_back(
|
||||
dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
|
||||
}
|
||||
|
||||
{
|
||||
Json dsl;
|
||||
Json sub_dsl;
|
||||
sub_dsl["must"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
sub_dsl["must"] = Json::array(
|
||||
{get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
|
||||
testcases.emplace_back(
|
||||
dsl, [](int64_t x) { return (x & 0b1111) == 0b1011; });
|
||||
}
|
||||
|
||||
{
|
||||
Json dsl;
|
||||
Json sub_dsl;
|
||||
sub_dsl["should"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
sub_dsl["should"] = Json::array(
|
||||
{get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return !!((x & 0b1111) ^ 0b0100); });
|
||||
testcases.emplace_back(
|
||||
dsl, [](int64_t x) { return !!((x & 0b1111) ^ 0b0100); });
|
||||
}
|
||||
|
||||
{
|
||||
Json dsl;
|
||||
Json sub_dsl;
|
||||
sub_dsl["must_not"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
sub_dsl["must_not"] = Json::array(
|
||||
{get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
||||
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
||||
testcases.emplace_back(dsl, [](int64_t x) { return (x & 0b1111) != 0b1011; });
|
||||
testcases.emplace_back(
|
||||
dsl, [](int64_t x) { return (x & 0b1111) != 0b1011; });
|
||||
}
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
|
@ -486,11 +518,16 @@ TEST(Expr, TestSimpleDsl) {
|
|||
auto new_age_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
Json dsl;
|
||||
dsl["bool"] = clause;
|
||||
|
@ -511,11 +548,15 @@ TEST(Expr, TestSimpleDsl) {
|
|||
TEST(Expr, TestCompare) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>> testcases = {
|
||||
{R"("LT")", [](int a, int64_t b) { return a < b; }}, {R"("LE")", [](int a, int64_t b) { return a <= b; }},
|
||||
{R"("GT")", [](int a, int64_t b) { return a > b; }}, {R"("GE")", [](int a, int64_t b) { return a >= b; }},
|
||||
{R"("EQ")", [](int a, int64_t b) { return a == b; }}, {R"("NE")", [](int a, int64_t b) { return a != b; }},
|
||||
};
|
||||
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>>
|
||||
testcases = {
|
||||
{R"("LT")", [](int a, int64_t b) { return a < b; }},
|
||||
{R"("LE")", [](int a, int64_t b) { return a <= b; }},
|
||||
{R"("GT")", [](int a, int64_t b) { return a > b; }},
|
||||
{R"("GE")", [](int a, int64_t b) { return a >= b; }},
|
||||
{R"("EQ")", [](int a, int64_t b) { return a == b; }},
|
||||
{R"("NE")", [](int a, int64_t b) { return a != b; }},
|
||||
};
|
||||
|
||||
std::string dsl_string_tpl = R"({
|
||||
"bool": {
|
||||
|
@ -545,7 +586,8 @@ TEST(Expr, TestCompare) {
|
|||
}
|
||||
})";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i32_fid = schema->AddDebugField("age1", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age2", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -559,14 +601,21 @@ TEST(Expr, TestCompare) {
|
|||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_age1_col = raw_data.get_col<int>(i32_fid);
|
||||
auto new_age2_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
age1_col.insert(age1_col.end(), new_age1_col.begin(), new_age1_col.end());
|
||||
age2_col.insert(age2_col.end(), new_age2_col.begin(), new_age2_col.end());
|
||||
age1_col.insert(
|
||||
age1_col.end(), new_age1_col.begin(), new_age1_col.end());
|
||||
age2_col.insert(
|
||||
age2_col.end(), new_age2_col.begin(), new_age2_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
auto dsl_string = boost::str(boost::format(dsl_string_tpl) % clause);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
|
@ -580,7 +629,8 @@ TEST(Expr, TestCompare) {
|
|||
auto val1 = age1_col[i];
|
||||
auto val2 = age2_col[i];
|
||||
auto ref = ref_func(val1, val2);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!"
|
||||
<< boost::format("[%1%, %2%]") % val1 % val2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -588,14 +638,15 @@ TEST(Expr, TestCompare) {
|
|||
TEST(Expr, TestCompareWithScalarIndex) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>> testcases = {
|
||||
{R"(LessThan)", [](int a, int64_t b) { return a < b; }},
|
||||
{R"(LessEqual)", [](int a, int64_t b) { return a <= b; }},
|
||||
{R"(GreaterThan)", [](int a, int64_t b) { return a > b; }},
|
||||
{R"(GreaterEqual)", [](int a, int64_t b) { return a >= b; }},
|
||||
{R"(Equal)", [](int a, int64_t b) { return a == b; }},
|
||||
{R"(NotEqual)", [](int a, int64_t b) { return a != b; }},
|
||||
};
|
||||
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>>
|
||||
testcases = {
|
||||
{R"(LessThan)", [](int a, int64_t b) { return a < b; }},
|
||||
{R"(LessEqual)", [](int a, int64_t b) { return a <= b; }},
|
||||
{R"(GreaterThan)", [](int a, int64_t b) { return a > b; }},
|
||||
{R"(GreaterEqual)", [](int a, int64_t b) { return a >= b; }},
|
||||
{R"(Equal)", [](int a, int64_t b) { return a == b; }},
|
||||
{R"(NotEqual)", [](int a, int64_t b) { return a != b; }},
|
||||
};
|
||||
|
||||
std::string serialized_expr_plan = R"(vector_anns: <
|
||||
field_id: %1%
|
||||
|
@ -622,7 +673,8 @@ TEST(Expr, TestCompareWithScalarIndex) {
|
|||
>)";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -656,11 +708,14 @@ TEST(Expr, TestCompareWithScalarIndex) {
|
|||
|
||||
ExecExprVisitor visitor(*seg, seg->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
auto dsl_string = boost::format(serialized_expr_plan) % vec_fid.get() % clause % i32_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT32)) % i64_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT64));
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(dsl_string.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
auto dsl_string =
|
||||
boost::format(serialized_expr_plan) % vec_fid.get() % clause %
|
||||
i32_fid.get() % proto::schema::DataType_Name(int(DataType::INT32)) %
|
||||
i64_fid.get() % proto::schema::DataType_Name(int(DataType::INT64));
|
||||
auto binary_plan =
|
||||
translate_text_plan_to_binary_plan(dsl_string.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(
|
||||
*schema, binary_plan.data(), binary_plan.size());
|
||||
// std::cout << ShowPlanNodeVisitor().call_child(*plan->plan_node_) << std::endl;
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N);
|
||||
|
@ -670,7 +725,8 @@ TEST(Expr, TestCompareWithScalarIndex) {
|
|||
auto val1 = age32_col[i];
|
||||
auto val2 = age64_col[i];
|
||||
auto ref = ref_func(val1, val2);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!"
|
||||
<< boost::format("[%1%, %2%]") % val1 % val2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -678,14 +734,22 @@ TEST(Expr, TestCompareWithScalarIndex) {
|
|||
TEST(Expr, TestCompareWithScalarIndexMaris) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(std::string, std::string)>>> testcases = {
|
||||
{R"(LessThan)", [](std::string a, std::string b) { return a.compare(b) < 0; }},
|
||||
{R"(LessEqual)", [](std::string a, std::string b) { return a.compare(b) <= 0; }},
|
||||
{R"(GreaterThan)", [](std::string a, std::string b) { return a.compare(b) > 0; }},
|
||||
{R"(GreaterEqual)", [](std::string a, std::string b) { return a.compare(b) >= 0; }},
|
||||
{R"(Equal)", [](std::string a, std::string b) { return a.compare(b) == 0; }},
|
||||
{R"(NotEqual)", [](std::string a, std::string b) { return a.compare(b) != 0; }},
|
||||
};
|
||||
std::vector<
|
||||
std::tuple<std::string, std::function<bool(std::string, std::string)>>>
|
||||
testcases = {
|
||||
{R"(LessThan)",
|
||||
[](std::string a, std::string b) { return a.compare(b) < 0; }},
|
||||
{R"(LessEqual)",
|
||||
[](std::string a, std::string b) { return a.compare(b) <= 0; }},
|
||||
{R"(GreaterThan)",
|
||||
[](std::string a, std::string b) { return a.compare(b) > 0; }},
|
||||
{R"(GreaterEqual)",
|
||||
[](std::string a, std::string b) { return a.compare(b) >= 0; }},
|
||||
{R"(Equal)",
|
||||
[](std::string a, std::string b) { return a.compare(b) == 0; }},
|
||||
{R"(NotEqual)",
|
||||
[](std::string a, std::string b) { return a.compare(b) != 0; }},
|
||||
};
|
||||
|
||||
const char* serialized_expr_plan = R"(vector_anns: <
|
||||
field_id: %1%
|
||||
|
@ -712,7 +776,8 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
|
|||
>)";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto str1_fid = schema->AddDebugField("string1", DataType::VARCHAR);
|
||||
auto str2_fid = schema->AddDebugField("string2", DataType::VARCHAR);
|
||||
schema->set_primary_field_id(str1_fid);
|
||||
|
@ -744,10 +809,12 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
|
|||
|
||||
ExecExprVisitor visitor(*seg, seg->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
auto dsl_string =
|
||||
boost::format(serialized_expr_plan) % vec_fid.get() % clause % str1_fid.get() % str2_fid.get();
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(dsl_string.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
auto dsl_string = boost::format(serialized_expr_plan) % vec_fid.get() %
|
||||
clause % str1_fid.get() % str2_fid.get();
|
||||
auto binary_plan =
|
||||
translate_text_plan_to_binary_plan(dsl_string.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(
|
||||
*schema, binary_plan.data(), binary_plan.size());
|
||||
// std::cout << ShowPlanNodeVisitor().call_child(*plan->plan_node_) << std::endl;
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N);
|
||||
|
@ -757,7 +824,8 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
|
|||
auto val1 = str1_col[i];
|
||||
auto val2 = str2_col[i];
|
||||
auto ref = ref_func(val1, val2);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!"
|
||||
<< boost::format("[%1%, %2%]") % val1 % val2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -765,101 +833,115 @@ TEST(Expr, TestCompareWithScalarIndexMaris) {
|
|||
TEST(Expr, TestBinaryArithOpEvalRange) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>> testcases = {
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
|
||||
{R"("EQ": {
|
||||
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>>
|
||||
testcases = {
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 4,
|
||||
"value": 8
|
||||
}
|
||||
})",
|
||||
[](int8_t v) { return (v + 4) == 8; }, DataType::INT8},
|
||||
{R"("EQ": {
|
||||
[](int8_t v) { return (v + 4) == 8; },
|
||||
DataType::INT8},
|
||||
{R"("EQ": {
|
||||
"SUB": {
|
||||
"right_operand": 500,
|
||||
"value": 1500
|
||||
}
|
||||
})",
|
||||
[](int16_t v) { return (v - 500) == 1500; }, DataType::INT16},
|
||||
{R"("EQ": {
|
||||
[](int16_t v) { return (v - 500) == 1500; },
|
||||
DataType::INT16},
|
||||
{R"("EQ": {
|
||||
"MUL": {
|
||||
"right_operand": 2,
|
||||
"value": 4000
|
||||
}
|
||||
})",
|
||||
[](int32_t v) { return (v * 2) == 4000; }, DataType::INT32},
|
||||
{R"("EQ": {
|
||||
[](int32_t v) { return (v * 2) == 4000; },
|
||||
DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"DIV": {
|
||||
"right_operand": 2,
|
||||
"value": 1000
|
||||
}
|
||||
})",
|
||||
[](int64_t v) { return (v / 2) == 1000; }, DataType::INT64},
|
||||
{R"("EQ": {
|
||||
[](int64_t v) { return (v / 2) == 1000; },
|
||||
DataType::INT64},
|
||||
{R"("EQ": {
|
||||
"MOD": {
|
||||
"right_operand": 100,
|
||||
"value": 0
|
||||
}
|
||||
})",
|
||||
[](int32_t v) { return (v % 100) == 0; }, DataType::INT32},
|
||||
{R"("EQ": {
|
||||
[](int32_t v) { return (v % 100) == 0; },
|
||||
DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})",
|
||||
[](float v) { return (v + 500) == 2500; }, DataType::FLOAT},
|
||||
{R"("EQ": {
|
||||
[](float v) { return (v + 500) == 2500; },
|
||||
DataType::FLOAT},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})",
|
||||
[](double v) { return (v + 500) == 2500; }, DataType::DOUBLE},
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr NE of various data types
|
||||
{R"("NE": {
|
||||
[](double v) { return (v + 500) == 2500; },
|
||||
DataType::DOUBLE},
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr NE of various data types
|
||||
{R"("NE": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})",
|
||||
[](float v) { return (v + 500) != 2500; }, DataType::FLOAT},
|
||||
{R"("NE": {
|
||||
[](float v) { return (v + 500) != 2500; },
|
||||
DataType::FLOAT},
|
||||
{R"("NE": {
|
||||
"SUB": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})",
|
||||
[](double v) { return (v - 500) != 2500; }, DataType::DOUBLE},
|
||||
{R"("NE": {
|
||||
[](double v) { return (v - 500) != 2500; },
|
||||
DataType::DOUBLE},
|
||||
{R"("NE": {
|
||||
"MUL": {
|
||||
"right_operand": 2,
|
||||
"value": 2
|
||||
}
|
||||
})",
|
||||
[](int8_t v) { return (v * 2) != 2; }, DataType::INT8},
|
||||
{R"("NE": {
|
||||
[](int8_t v) { return (v * 2) != 2; },
|
||||
DataType::INT8},
|
||||
{R"("NE": {
|
||||
"DIV": {
|
||||
"right_operand": 2,
|
||||
"value": 1000
|
||||
}
|
||||
})",
|
||||
[](int16_t v) { return (v / 2) != 1000; }, DataType::INT16},
|
||||
{R"("NE": {
|
||||
[](int16_t v) { return (v / 2) != 1000; },
|
||||
DataType::INT16},
|
||||
{R"("NE": {
|
||||
"MOD": {
|
||||
"right_operand": 100,
|
||||
"value": 0
|
||||
}
|
||||
})",
|
||||
[](int32_t v) { return (v % 100) != 0; }, DataType::INT32},
|
||||
{R"("NE": {
|
||||
[](int32_t v) { return (v % 100) != 0; },
|
||||
DataType::INT32},
|
||||
{R"("NE": {
|
||||
"ADD": {
|
||||
"right_operand": 500,
|
||||
"value": 2500
|
||||
}
|
||||
})",
|
||||
[](int64_t v) { return (v + 500) != 2500; }, DataType::INT64},
|
||||
};
|
||||
[](int64_t v) { return (v + 500) != 2500; },
|
||||
DataType::INT64},
|
||||
};
|
||||
|
||||
std::string dsl_string_tmp = R"({
|
||||
"bool": {
|
||||
|
@ -917,7 +999,8 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
|
|||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
|
||||
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
|
||||
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||
|
@ -945,19 +1028,32 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
|
|||
auto new_age_float_col = raw_data.get_col<float>(float_fid);
|
||||
auto new_age_double_col = raw_data.get_col<double>(double_fid);
|
||||
|
||||
age8_col.insert(age8_col.end(), new_age8_col.begin(), new_age8_col.end());
|
||||
age16_col.insert(age16_col.end(), new_age16_col.begin(), new_age16_col.end());
|
||||
age32_col.insert(age32_col.end(), new_age32_col.begin(), new_age32_col.end());
|
||||
age64_col.insert(age64_col.end(), new_age64_col.begin(), new_age64_col.end());
|
||||
age_float_col.insert(age_float_col.end(), new_age_float_col.begin(), new_age_float_col.end());
|
||||
age_double_col.insert(age_double_col.end(), new_age_double_col.begin(), new_age_double_col.end());
|
||||
age8_col.insert(
|
||||
age8_col.end(), new_age8_col.begin(), new_age8_col.end());
|
||||
age16_col.insert(
|
||||
age16_col.end(), new_age16_col.begin(), new_age16_col.end());
|
||||
age32_col.insert(
|
||||
age32_col.end(), new_age32_col.begin(), new_age32_col.end());
|
||||
age64_col.insert(
|
||||
age64_col.end(), new_age64_col.begin(), new_age64_col.end());
|
||||
age_float_col.insert(age_float_col.end(),
|
||||
new_age_float_col.begin(),
|
||||
new_age_float_col.end());
|
||||
age_double_col.insert(age_double_col.end(),
|
||||
new_age_double_col.begin(),
|
||||
new_age_double_col.end());
|
||||
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func, dtype] : testcases) {
|
||||
auto loc = dsl_string_tmp.find("@@@@@");
|
||||
auto dsl_string = dsl_string_tmp;
|
||||
|
@ -1026,28 +1122,32 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
"value": 2500.00
|
||||
}
|
||||
})",
|
||||
"Assert \"(value.is_number_integer())\"", DataType::INT32},
|
||||
"Assert \"(value.is_number_integer())\"",
|
||||
DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500.0,
|
||||
"value": 2500
|
||||
}
|
||||
})",
|
||||
"Assert \"(right_operand.is_number_integer())\"", DataType::INT32},
|
||||
"Assert \"(right_operand.is_number_integer())\"",
|
||||
DataType::INT32},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": 500.0,
|
||||
"value": true
|
||||
}
|
||||
})",
|
||||
"Assert \"(value.is_number())\"", DataType::FLOAT},
|
||||
"Assert \"(value.is_number())\"",
|
||||
DataType::FLOAT},
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
"right_operand": "500",
|
||||
"value": 2500.0
|
||||
}
|
||||
})",
|
||||
"Assert \"(right_operand.is_number())\"", DataType::FLOAT},
|
||||
"Assert \"(right_operand.is_number())\"",
|
||||
DataType::FLOAT},
|
||||
// Check unsupported arithmetic operator type
|
||||
{R"("EQ": {
|
||||
"EXP": {
|
||||
|
@ -1055,7 +1155,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
"value": 2500
|
||||
}
|
||||
})",
|
||||
"arith op(exp) not found", DataType::INT32},
|
||||
"arith op(exp) not found",
|
||||
DataType::INT32},
|
||||
// Check unsupported data type
|
||||
{R"("EQ": {
|
||||
"ADD": {
|
||||
|
@ -1063,7 +1164,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
"value": false
|
||||
}
|
||||
})",
|
||||
"bool type is not supported", DataType::BOOL},
|
||||
"bool type is not supported",
|
||||
DataType::BOOL},
|
||||
};
|
||||
|
||||
std::string dsl_string_tmp = R"({
|
||||
|
@ -1107,7 +1209,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
schema->AddDebugField("FloatN", DataType::FLOAT);
|
||||
schema->AddDebugField("BoolField", DataType::BOOL);
|
||||
|
@ -1130,12 +1233,14 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
|
||||
try {
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
FAIL() << "Expected AssertionError: " << assert_info << " not thrown";
|
||||
FAIL() << "Expected AssertionError: " << assert_info
|
||||
<< " not thrown";
|
||||
} catch (const std::exception& err) {
|
||||
std::string err_msg = err.what();
|
||||
ASSERT_TRUE(err_msg.find(assert_info) != std::string::npos);
|
||||
} catch (...) {
|
||||
FAIL() << "Expected AssertionError: " << assert_info << " not thrown";
|
||||
FAIL() << "Expected AssertionError: " << assert_info
|
||||
<< " not thrown";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1143,9 +1248,10 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
|||
TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>> testcases = {
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
|
||||
{R"(arith_op: Add
|
||||
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>>
|
||||
testcases = {
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
int64_val: 4
|
||||
>
|
||||
|
@ -1153,8 +1259,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 8
|
||||
>)",
|
||||
[](int8_t v) { return (v + 4) == 8; }, DataType::INT8},
|
||||
{R"(arith_op: Sub
|
||||
[](int8_t v) { return (v + 4) == 8; },
|
||||
DataType::INT8},
|
||||
{R"(arith_op: Sub
|
||||
right_operand: <
|
||||
int64_val: 500
|
||||
>
|
||||
|
@ -1162,8 +1269,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 1500
|
||||
>)",
|
||||
[](int16_t v) { return (v - 500) == 1500; }, DataType::INT16},
|
||||
{R"(arith_op: Mul
|
||||
[](int16_t v) { return (v - 500) == 1500; },
|
||||
DataType::INT16},
|
||||
{R"(arith_op: Mul
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
|
@ -1171,8 +1279,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 4000
|
||||
>)",
|
||||
[](int32_t v) { return (v * 2) == 4000; }, DataType::INT32},
|
||||
{R"(arith_op: Div
|
||||
[](int32_t v) { return (v * 2) == 4000; },
|
||||
DataType::INT32},
|
||||
{R"(arith_op: Div
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
|
@ -1180,8 +1289,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 1000
|
||||
>)",
|
||||
[](int64_t v) { return (v / 2) == 1000; }, DataType::INT64},
|
||||
{R"(arith_op: Mod
|
||||
[](int64_t v) { return (v / 2) == 1000; },
|
||||
DataType::INT64},
|
||||
{R"(arith_op: Mod
|
||||
right_operand: <
|
||||
int64_val: 100
|
||||
>
|
||||
|
@ -1189,8 +1299,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 0
|
||||
>)",
|
||||
[](int32_t v) { return (v % 100) == 0; }, DataType::INT32},
|
||||
{R"(arith_op: Add
|
||||
[](int32_t v) { return (v % 100) == 0; },
|
||||
DataType::INT32},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
|
@ -1198,8 +1309,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
float_val: 2500
|
||||
>)",
|
||||
[](float v) { return (v + 500) == 2500; }, DataType::FLOAT},
|
||||
{R"(arith_op: Add
|
||||
[](float v) { return (v + 500) == 2500; },
|
||||
DataType::FLOAT},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
|
@ -1207,8 +1319,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
float_val: 2500
|
||||
>)",
|
||||
[](double v) { return (v + 500) == 2500; }, DataType::DOUBLE},
|
||||
{R"(arith_op: Add
|
||||
[](double v) { return (v + 500) == 2500; },
|
||||
DataType::DOUBLE},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
|
@ -1216,8 +1329,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
float_val: 2000
|
||||
>)",
|
||||
[](float v) { return (v + 500) != 2000; }, DataType::FLOAT},
|
||||
{R"(arith_op: Sub
|
||||
[](float v) { return (v + 500) != 2000; },
|
||||
DataType::FLOAT},
|
||||
{R"(arith_op: Sub
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
|
@ -1225,8 +1339,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
float_val: 2500
|
||||
>)",
|
||||
[](double v) { return (v - 500) != 2000; }, DataType::DOUBLE},
|
||||
{R"(arith_op: Mul
|
||||
[](double v) { return (v - 500) != 2000; },
|
||||
DataType::DOUBLE},
|
||||
{R"(arith_op: Mul
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
|
@ -1234,8 +1349,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 2
|
||||
>)",
|
||||
[](int8_t v) { return (v * 2) != 2; }, DataType::INT8},
|
||||
{R"(arith_op: Div
|
||||
[](int8_t v) { return (v * 2) != 2; },
|
||||
DataType::INT8},
|
||||
{R"(arith_op: Div
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
|
@ -1243,8 +1359,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 2000
|
||||
>)",
|
||||
[](int16_t v) { return (v / 2) != 2000; }, DataType::INT16},
|
||||
{R"(arith_op: Mod
|
||||
[](int16_t v) { return (v / 2) != 2000; },
|
||||
DataType::INT16},
|
||||
{R"(arith_op: Mod
|
||||
right_operand: <
|
||||
int64_val: 100
|
||||
>
|
||||
|
@ -1252,8 +1369,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 1
|
||||
>)",
|
||||
[](int32_t v) { return (v % 100) != 1; }, DataType::INT32},
|
||||
{R"(arith_op: Add
|
||||
[](int32_t v) { return (v % 100) != 1; },
|
||||
DataType::INT32},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
int64_val: 500
|
||||
>
|
||||
|
@ -1261,8 +1379,9 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
value: <
|
||||
int64_val: 2000
|
||||
>)",
|
||||
[](int64_t v) { return (v + 500) != 2000; }, DataType::INT64},
|
||||
};
|
||||
[](int64_t v) { return (v + 500) != 2000; },
|
||||
DataType::INT64},
|
||||
};
|
||||
|
||||
std::string serialized_expr_plan = R"(vector_anns: <
|
||||
field_id: %1%
|
||||
|
@ -1288,7 +1407,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
@@@@)";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
|
||||
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
|
||||
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||
|
@ -1369,7 +1489,8 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
for (auto [clause, ref_func, dtype] : testcases) {
|
||||
auto loc = serialized_expr_plan.find("@@@@@");
|
||||
|
@ -1400,8 +1521,10 @@ TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
|||
ASSERT_TRUE(false) << "No test case defined for this data type";
|
||||
}
|
||||
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(expr.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
auto binary_plan =
|
||||
translate_text_plan_to_binary_plan(expr.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(
|
||||
*schema, binary_plan.data(), binary_plan.size());
|
||||
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N);
|
||||
|
|
|
@ -51,7 +51,11 @@ TEST(Growing, RealCount) {
|
|||
ASSERT_EQ(offset, 0);
|
||||
auto dataset = DataGen(schema, c);
|
||||
auto pks = dataset.get_col<int64_t>(pk);
|
||||
segment->Insert(offset, c, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(offset,
|
||||
c,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
// no delete.
|
||||
ASSERT_EQ(c, segment->get_real_count());
|
||||
|
@ -62,7 +66,8 @@ TEST(Growing, RealCount) {
|
|||
ASSERT_EQ(del_offset1, 0);
|
||||
auto del_ids1 = GenPKs(pks.begin(), pks.begin() + half);
|
||||
auto del_tss1 = GenTss(half, c);
|
||||
auto status = segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
|
||||
auto status =
|
||||
segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_EQ(c - half, segment->get_real_count());
|
||||
|
||||
|
@ -70,7 +75,8 @@ TEST(Growing, RealCount) {
|
|||
auto del_offset2 = segment->PreDelete(half);
|
||||
ASSERT_EQ(del_offset2, half);
|
||||
auto del_tss2 = GenTss(half, c + half);
|
||||
status = segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
|
||||
status =
|
||||
segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_EQ(c - half, segment->get_real_count());
|
||||
|
||||
|
|
|
@ -27,12 +27,15 @@ TEST(FloatVecIndex, All) {
|
|||
auto metric_type = knowhere::metric::L2;
|
||||
indexcgo::TypeParams type_params;
|
||||
indexcgo::IndexParams index_params;
|
||||
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
|
||||
std::tie(type_params, index_params) =
|
||||
generate_params(index_type, metric_type);
|
||||
std::string type_params_str, index_params_str;
|
||||
bool ok;
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params,
|
||||
&type_params_str);
|
||||
assert(ok);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
|
@ -44,7 +47,11 @@ TEST(FloatVecIndex, All) {
|
|||
CIndex copy_index;
|
||||
|
||||
{
|
||||
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
&index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -56,7 +63,11 @@ TEST(FloatVecIndex, All) {
|
|||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), ©_index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
©_index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -79,12 +90,15 @@ TEST(BinaryVecIndex, All) {
|
|||
auto metric_type = knowhere::metric::JACCARD;
|
||||
indexcgo::TypeParams type_params;
|
||||
indexcgo::IndexParams index_params;
|
||||
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
|
||||
std::tie(type_params, index_params) =
|
||||
generate_params(index_type, metric_type);
|
||||
std::string type_params_str, index_params_str;
|
||||
bool ok;
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params,
|
||||
&type_params_str);
|
||||
assert(ok);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
|
@ -96,7 +110,11 @@ TEST(BinaryVecIndex, All) {
|
|||
CIndex copy_index;
|
||||
|
||||
{
|
||||
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
&index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -108,7 +126,11 @@ TEST(BinaryVecIndex, All) {
|
|||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), ©_index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
©_index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -149,11 +171,16 @@ TEST(CBoolIndexTest, All) {
|
|||
CIndex copy_index;
|
||||
|
||||
{
|
||||
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
&index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
status = BuildScalarIndex(index, half_ds->GetRows(), half_ds->GetTensor());
|
||||
status = BuildScalarIndex(
|
||||
index, half_ds->GetRows(), half_ds->GetTensor());
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -161,8 +188,11 @@ TEST(CBoolIndexTest, All) {
|
|||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
status =
|
||||
CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), ©_index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
©_index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -201,7 +231,11 @@ TEST(CInt64IndexTest, All) {
|
|||
CIndex copy_index;
|
||||
|
||||
{
|
||||
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
&index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -213,8 +247,11 @@ TEST(CInt64IndexTest, All) {
|
|||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
status =
|
||||
CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), ©_index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
©_index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -255,11 +292,16 @@ TEST(CStringIndexTest, All) {
|
|||
CIndex copy_index;
|
||||
|
||||
{
|
||||
status = CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), &index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
&index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
status = BuildScalarIndex(index, (str_ds->GetRows()), (str_ds->GetTensor()));
|
||||
status = BuildScalarIndex(
|
||||
index, (str_ds->GetRows()), (str_ds->GetTensor()));
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
@ -267,8 +309,11 @@ TEST(CStringIndexTest, All) {
|
|||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
status =
|
||||
CreateIndex(dtype, type_params_str.c_str(), index_params_str.c_str(), ©_index, c_storage_config);
|
||||
status = CreateIndex(dtype,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
©_index,
|
||||
c_storage_config);
|
||||
ASSERT_EQ(Success, status.error_code);
|
||||
}
|
||||
{
|
||||
|
|
|
@ -34,11 +34,14 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
|
|||
auto param = GetParam();
|
||||
index_type = param.first;
|
||||
metric_type = param.second;
|
||||
std::tie(type_params, index_params) = generate_params(index_type, metric_type);
|
||||
std::tie(type_params, index_params) =
|
||||
generate_params(index_type, metric_type);
|
||||
bool ok;
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(type_params,
|
||||
&type_params_str);
|
||||
assert(ok);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str);
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
|
||||
search_conf = generate_search_conf(index_type, metric_type);
|
||||
|
@ -65,11 +68,13 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
|
|||
if (!is_binary) {
|
||||
xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
xq_dataset = knowhere::GenDataSet(
|
||||
NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
} else {
|
||||
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
|
||||
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
||||
xq_dataset = knowhere::GenDataSet(
|
||||
NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,19 +102,28 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
|
|||
INSTANTIATE_TEST_CASE_P(
|
||||
IndexTypeParameters,
|
||||
IndexWrapperTest,
|
||||
::testing::Values(std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::TANIMOTO),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
|
||||
::testing::Values(
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
|
||||
knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
knowhere::metric::TANIMOTO),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
||||
knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
|
||||
|
||||
TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||
auto index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||
vec_field_data_type, type_params_str.c_str(), index_params_str.c_str(), storage_config_);
|
||||
vec_field_data_type,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
storage_config_);
|
||||
|
||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
||||
knowhere::DataSetPtr xb_dataset;
|
||||
|
@ -125,9 +139,14 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||
|
||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||
auto binary_set = index->Serialize();
|
||||
auto copy_index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||
vec_field_data_type, type_params_str.c_str(), index_params_str.c_str(), storage_config_);
|
||||
auto vec_index = static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
|
||||
auto copy_index =
|
||||
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||
vec_field_data_type,
|
||||
type_params_str.c_str(),
|
||||
index_params_str.c_str(),
|
||||
storage_config_);
|
||||
auto vec_index =
|
||||
static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
|
||||
ASSERT_EQ(vec_index->dim(), DIM);
|
||||
ASSERT_NO_THROW(vec_index->Load(binary_set));
|
||||
|
||||
|
|
|
@ -149,7 +149,8 @@ TEST(Indexing, BinaryBruteForce) {
|
|||
auto metric_type = knowhere::metric::JACCARD;
|
||||
auto result_count = topk * num_queries;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, dim, metric_type);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"vecbin", DataType::VECTOR_BINARY, dim, metric_type);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
auto dataset = DataGen(schema, N, 10);
|
||||
auto bin_vec = dataset.get_col<uint8_t>(vec_fid);
|
||||
|
@ -163,7 +164,8 @@ TEST(Indexing, BinaryBruteForce) {
|
|||
query_data //
|
||||
};
|
||||
|
||||
auto sub_result = query::BruteForceSearch(search_dataset, bin_vec.data(), N, knowhere::Json(), nullptr);
|
||||
auto sub_result = query::BruteForceSearch(
|
||||
search_dataset, bin_vec.data(), N, knowhere::Json(), nullptr);
|
||||
|
||||
SearchResult sr;
|
||||
sr.total_nq_ = num_queries;
|
||||
|
@ -222,7 +224,8 @@ TEST(Indexing, Naive) {
|
|||
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
||||
create_index_info.metric_type = knowhere::metric::L2;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, nullptr);
|
||||
|
||||
auto build_conf = knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
|
@ -276,7 +279,8 @@ TEST(Indexing, Naive) {
|
|||
if (result->seg_offsets_[i] < N / 2) {
|
||||
std::cout << "WRONG: ";
|
||||
}
|
||||
std::cout << result->seg_offsets_[i] << "->" << result->distances_[i] << std::endl;
|
||||
std::cout << result->seg_offsets_[i] << "->" << result->distances_[i]
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -319,11 +323,13 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
|||
if (!is_binary) {
|
||||
xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
xq_dataset = knowhere::GenDataSet(
|
||||
NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
} else {
|
||||
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
|
||||
xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
||||
xq_dataset = knowhere::GenDataSet(
|
||||
NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -351,19 +357,25 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
|||
INSTANTIATE_TEST_CASE_P(
|
||||
IndexTypeParameters,
|
||||
IndexTest,
|
||||
::testing::Values(std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::metric::TANIMOTO),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
|
||||
// ci ut not start minio, so not run ut about diskann index for now
|
||||
// #ifdef BUILD_DISK_ANN
|
||||
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
|
||||
// #endif
|
||||
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
|
||||
::testing::Values(
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
|
||||
knowhere::metric::L2),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
knowhere::metric::TANIMOTO),
|
||||
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
||||
knowhere::metric::JACCARD),
|
||||
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2),
|
||||
// ci ut not start minio, so not run ut about diskann index for now
|
||||
// #ifdef BUILD_DISK_ANN
|
||||
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
|
||||
// #endif
|
||||
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::metric::L2)));
|
||||
|
||||
TEST_P(IndexTest, BuildAndQuery) {
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
|
@ -376,11 +388,14 @@ TEST_P(IndexTest, BuildAndQuery) {
|
|||
milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
||||
milvus::storage::IndexMeta index_meta{3, 100, 1000, 1};
|
||||
auto file_manager =
|
||||
std::make_shared<milvus::storage::DiskFileManagerImpl>(field_data_meta, index_meta, storage_config_);
|
||||
index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, file_manager);
|
||||
std::make_shared<milvus::storage::DiskFileManagerImpl>(
|
||||
field_data_meta, index_meta, storage_config_);
|
||||
index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, file_manager);
|
||||
#endif
|
||||
} else {
|
||||
index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
|
||||
index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, nullptr);
|
||||
}
|
||||
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
||||
milvus::index::IndexBasePtr new_index;
|
||||
|
@ -394,8 +409,10 @@ TEST_P(IndexTest, BuildAndQuery) {
|
|||
milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
||||
milvus::storage::IndexMeta index_meta{3, 100, 1000, 1};
|
||||
auto file_manager =
|
||||
std::make_shared<milvus::storage::DiskFileManagerImpl>(field_data_meta, index_meta, storage_config_);
|
||||
new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, file_manager);
|
||||
std::make_shared<milvus::storage::DiskFileManagerImpl>(
|
||||
field_data_meta, index_meta, storage_config_);
|
||||
new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, file_manager);
|
||||
|
||||
vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
||||
|
||||
|
|
|
@ -30,7 +30,8 @@ class MinioChunkManagerTest : public testing::Test {
|
|||
|
||||
virtual void
|
||||
SetUp() {
|
||||
chunk_manager_ = std::make_unique<MinioChunkManager>(get_default_storage_config());
|
||||
chunk_manager_ =
|
||||
std::make_unique<MinioChunkManager>(get_default_storage_config());
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -58,7 +59,8 @@ TEST_F(MinioChunkManagerTest, BucketNegtive) {
|
|||
try {
|
||||
chunk_manager_->CreateBucket(testBucketName);
|
||||
} catch (S3ErrorException& e) {
|
||||
EXPECT_TRUE(std::string(e.what()).find("BucketAlreadyOwnedByYou") != string::npos);
|
||||
EXPECT_TRUE(std::string(e.what()).find("BucketAlreadyOwnedByYou") !=
|
||||
string::npos);
|
||||
}
|
||||
chunk_manager_->DeleteBucket(testBucketName);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,8 @@ namespace wrapper = milvus::storage;
|
|||
|
||||
static void
|
||||
WriteToFile(CBuffer cb) {
|
||||
auto data_file = std::ofstream("/tmp/wrapper_test_data.dat", std::ios::binary);
|
||||
auto data_file =
|
||||
std::ofstream("/tmp/wrapper_test_data.dat", std::ios::binary);
|
||||
data_file.write(cb.data, cb.length);
|
||||
data_file.close();
|
||||
}
|
||||
|
@ -40,7 +41,8 @@ ReadFromFile() {
|
|||
|
||||
std::shared_ptr<arrow::Table> table;
|
||||
std::unique_ptr<parquet::arrow::FileReader> reader;
|
||||
auto st = parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader);
|
||||
auto st =
|
||||
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader);
|
||||
if (!st.ok())
|
||||
return nullptr;
|
||||
st = reader->ReadTable(&table);
|
||||
|
@ -64,12 +66,14 @@ TEST(storage, inoutstream) {
|
|||
ASSERT_NE(table, nullptr);
|
||||
|
||||
auto os = std::make_shared<milvus::storage::PayloadOutputStream>();
|
||||
st = parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), os, 1024);
|
||||
st = parquet::arrow::WriteTable(
|
||||
*table, arrow::default_memory_pool(), os, 1024);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
const uint8_t* buf = os->Buffer().data();
|
||||
int64_t buf_size = os->Buffer().size();
|
||||
auto is = std::make_shared<milvus::storage::PayloadInputStream>(buf, buf_size);
|
||||
auto is =
|
||||
std::make_shared<milvus::storage::PayloadInputStream>(buf, buf_size);
|
||||
|
||||
std::shared_ptr<arrow::Table> intable;
|
||||
std::unique_ptr<parquet::arrow::FileReader> reader;
|
||||
|
@ -104,7 +108,8 @@ TEST(storage, boolean) {
|
|||
auto nums = GetPayloadLengthFromWriter(payload);
|
||||
ASSERT_EQ(nums, 4);
|
||||
|
||||
auto reader = NewPayloadReader(int(milvus::DataType::BOOL), (uint8_t*)cb.data, cb.length);
|
||||
auto reader = NewPayloadReader(
|
||||
int(milvus::DataType::BOOL), (uint8_t*)cb.data, cb.length);
|
||||
bool* values;
|
||||
int length = GetPayloadLengthFromReader(reader);
|
||||
ASSERT_EQ(length, 4);
|
||||
|
@ -119,46 +124,77 @@ TEST(storage, boolean) {
|
|||
ReleasePayloadReader(reader);
|
||||
}
|
||||
|
||||
#define NUMERIC_TEST(TEST_NAME, COLUMN_TYPE, DATA_TYPE, ADD_FUNC, GET_FUNC, ARRAY_TYPE) \
|
||||
TEST(wrapper, TEST_NAME) { \
|
||||
auto payload = NewPayloadWriter(COLUMN_TYPE); \
|
||||
DATA_TYPE data[] = {-1, 1, -100, 100}; \
|
||||
\
|
||||
auto st = ADD_FUNC(payload, data, 4); \
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success); \
|
||||
st = FinishPayloadWriter(payload); \
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success); \
|
||||
auto cb = GetPayloadBufferFromWriter(payload); \
|
||||
ASSERT_GT(cb.length, 0); \
|
||||
ASSERT_NE(cb.data, nullptr); \
|
||||
auto nums = GetPayloadLengthFromWriter(payload); \
|
||||
ASSERT_EQ(nums, 4); \
|
||||
\
|
||||
auto reader = NewPayloadReader(COLUMN_TYPE, (uint8_t*)cb.data, cb.length); \
|
||||
DATA_TYPE* values; \
|
||||
int length; \
|
||||
st = GET_FUNC(reader, &values, &length); \
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success); \
|
||||
ASSERT_NE(values, nullptr); \
|
||||
ASSERT_EQ(length, 4); \
|
||||
length = GetPayloadLengthFromReader(reader); \
|
||||
ASSERT_EQ(length, 4); \
|
||||
\
|
||||
for (int i = 0; i < length; i++) { \
|
||||
ASSERT_EQ(data[i], values[i]); \
|
||||
} \
|
||||
\
|
||||
ReleasePayloadWriter(payload); \
|
||||
ReleasePayloadReader(reader); \
|
||||
#define NUMERIC_TEST( \
|
||||
TEST_NAME, COLUMN_TYPE, DATA_TYPE, ADD_FUNC, GET_FUNC, ARRAY_TYPE) \
|
||||
TEST(wrapper, TEST_NAME) { \
|
||||
auto payload = NewPayloadWriter(COLUMN_TYPE); \
|
||||
DATA_TYPE data[] = {-1, 1, -100, 100}; \
|
||||
\
|
||||
auto st = ADD_FUNC(payload, data, 4); \
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success); \
|
||||
st = FinishPayloadWriter(payload); \
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success); \
|
||||
auto cb = GetPayloadBufferFromWriter(payload); \
|
||||
ASSERT_GT(cb.length, 0); \
|
||||
ASSERT_NE(cb.data, nullptr); \
|
||||
auto nums = GetPayloadLengthFromWriter(payload); \
|
||||
ASSERT_EQ(nums, 4); \
|
||||
\
|
||||
auto reader = \
|
||||
NewPayloadReader(COLUMN_TYPE, (uint8_t*)cb.data, cb.length); \
|
||||
DATA_TYPE* values; \
|
||||
int length; \
|
||||
st = GET_FUNC(reader, &values, &length); \
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success); \
|
||||
ASSERT_NE(values, nullptr); \
|
||||
ASSERT_EQ(length, 4); \
|
||||
length = GetPayloadLengthFromReader(reader); \
|
||||
ASSERT_EQ(length, 4); \
|
||||
\
|
||||
for (int i = 0; i < length; i++) { \
|
||||
ASSERT_EQ(data[i], values[i]); \
|
||||
} \
|
||||
\
|
||||
ReleasePayloadWriter(payload); \
|
||||
ReleasePayloadReader(reader); \
|
||||
}
|
||||
|
||||
NUMERIC_TEST(int8, int(milvus::DataType::INT8), int8_t, AddInt8ToPayload, GetInt8FromPayload, arrow::Int8Array)
|
||||
NUMERIC_TEST(int16, int(milvus::DataType::INT16), int16_t, AddInt16ToPayload, GetInt16FromPayload, arrow::Int16Array)
|
||||
NUMERIC_TEST(int32, int(milvus::DataType::INT32), int32_t, AddInt32ToPayload, GetInt32FromPayload, arrow::Int32Array)
|
||||
NUMERIC_TEST(int64, int(milvus::DataType::INT64), int64_t, AddInt64ToPayload, GetInt64FromPayload, arrow::Int64Array)
|
||||
NUMERIC_TEST(float32, int(milvus::DataType::FLOAT), float, AddFloatToPayload, GetFloatFromPayload, arrow::FloatArray)
|
||||
NUMERIC_TEST(
|
||||
float64, int(milvus::DataType::DOUBLE), double, AddDoubleToPayload, GetDoubleFromPayload, arrow::DoubleArray)
|
||||
NUMERIC_TEST(int8,
|
||||
int(milvus::DataType::INT8),
|
||||
int8_t,
|
||||
AddInt8ToPayload,
|
||||
GetInt8FromPayload,
|
||||
arrow::Int8Array)
|
||||
NUMERIC_TEST(int16,
|
||||
int(milvus::DataType::INT16),
|
||||
int16_t,
|
||||
AddInt16ToPayload,
|
||||
GetInt16FromPayload,
|
||||
arrow::Int16Array)
|
||||
NUMERIC_TEST(int32,
|
||||
int(milvus::DataType::INT32),
|
||||
int32_t,
|
||||
AddInt32ToPayload,
|
||||
GetInt32FromPayload,
|
||||
arrow::Int32Array)
|
||||
NUMERIC_TEST(int64,
|
||||
int(milvus::DataType::INT64),
|
||||
int64_t,
|
||||
AddInt64ToPayload,
|
||||
GetInt64FromPayload,
|
||||
arrow::Int64Array)
|
||||
NUMERIC_TEST(float32,
|
||||
int(milvus::DataType::FLOAT),
|
||||
float,
|
||||
AddFloatToPayload,
|
||||
GetFloatFromPayload,
|
||||
arrow::FloatArray)
|
||||
NUMERIC_TEST(float64,
|
||||
int(milvus::DataType::DOUBLE),
|
||||
double,
|
||||
AddDoubleToPayload,
|
||||
GetDoubleFromPayload,
|
||||
arrow::DoubleArray)
|
||||
|
||||
TEST(storage, stringarray) {
|
||||
auto payload = NewPayloadWriter(int(milvus::DataType::VARCHAR));
|
||||
|
@ -179,7 +215,8 @@ TEST(storage, stringarray) {
|
|||
auto nums = GetPayloadLengthFromWriter(payload);
|
||||
ASSERT_EQ(nums, 3);
|
||||
|
||||
auto reader = NewPayloadReader(int(milvus::DataType::VARCHAR), (uint8_t*)cb.data, cb.length);
|
||||
auto reader = NewPayloadReader(
|
||||
int(milvus::DataType::VARCHAR), (uint8_t*)cb.data, cb.length);
|
||||
int length = GetPayloadLengthFromReader(reader);
|
||||
ASSERT_EQ(length, 3);
|
||||
char *v0, *v1, *v2;
|
||||
|
@ -214,7 +251,8 @@ TEST(storage, stringarray) {
|
|||
|
||||
TEST(storage, binary_vector) {
|
||||
int DIM = 16;
|
||||
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
|
||||
auto payload =
|
||||
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
|
||||
uint8_t data[] = {0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8};
|
||||
|
||||
auto st = AddBinaryVectorToPayload(payload, data, 16, 4);
|
||||
|
@ -227,7 +265,8 @@ TEST(storage, binary_vector) {
|
|||
auto nums = GetPayloadLengthFromWriter(payload);
|
||||
ASSERT_EQ(nums, 4);
|
||||
|
||||
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
|
||||
auto reader = NewPayloadReader(
|
||||
int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
|
||||
uint8_t* values;
|
||||
int length;
|
||||
int dim;
|
||||
|
@ -249,7 +288,8 @@ TEST(storage, binary_vector) {
|
|||
|
||||
TEST(storage, binary_vector_empty) {
|
||||
int DIM = 16;
|
||||
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
|
||||
auto payload =
|
||||
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_BINARY), DIM);
|
||||
auto st = FinishPayloadWriter(payload);
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success);
|
||||
auto cb = GetPayloadBufferFromWriter(payload);
|
||||
|
@ -257,7 +297,8 @@ TEST(storage, binary_vector_empty) {
|
|||
// ASSERT_EQ(cb.data, nullptr);
|
||||
auto nums = GetPayloadLengthFromWriter(payload);
|
||||
ASSERT_EQ(nums, 0);
|
||||
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
|
||||
auto reader = NewPayloadReader(
|
||||
int(milvus::DataType::VECTOR_BINARY), (uint8_t*)cb.data, cb.length);
|
||||
ASSERT_EQ(0, GetPayloadLengthFromReader(reader));
|
||||
// ASSERT_EQ(reader, nullptr);
|
||||
ReleasePayloadWriter(payload);
|
||||
|
@ -266,7 +307,8 @@ TEST(storage, binary_vector_empty) {
|
|||
|
||||
TEST(storage, float_vector) {
|
||||
int DIM = 2;
|
||||
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
|
||||
auto payload =
|
||||
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
|
||||
float data[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
|
||||
auto st = AddFloatVectorToPayload(payload, data, DIM, 4);
|
||||
|
@ -279,7 +321,8 @@ TEST(storage, float_vector) {
|
|||
auto nums = GetPayloadLengthFromWriter(payload);
|
||||
ASSERT_EQ(nums, 4);
|
||||
|
||||
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
|
||||
auto reader = NewPayloadReader(
|
||||
int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
|
||||
float* values;
|
||||
int length;
|
||||
int dim;
|
||||
|
@ -301,7 +344,8 @@ TEST(storage, float_vector) {
|
|||
|
||||
TEST(storage, float_vector_empty) {
|
||||
int DIM = 2;
|
||||
auto payload = NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
|
||||
auto payload =
|
||||
NewVectorPayloadWriter(int(milvus::DataType::VECTOR_FLOAT), DIM);
|
||||
auto st = FinishPayloadWriter(payload);
|
||||
ASSERT_EQ(st.error_code, ErrorCode::Success);
|
||||
auto cb = GetPayloadBufferFromWriter(payload);
|
||||
|
@ -309,7 +353,8 @@ TEST(storage, float_vector_empty) {
|
|||
// ASSERT_EQ(cb.data, nullptr);
|
||||
auto nums = GetPayloadLengthFromWriter(payload);
|
||||
ASSERT_EQ(nums, 0);
|
||||
auto reader = NewPayloadReader(int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
|
||||
auto reader = NewPayloadReader(
|
||||
int(milvus::DataType::VECTOR_FLOAT), (uint8_t*)cb.data, cb.length);
|
||||
ASSERT_EQ(0, GetPayloadLengthFromReader(reader));
|
||||
// ASSERT_EQ(reader, nullptr);
|
||||
ReleasePayloadWriter(payload);
|
||||
|
|
|
@ -29,8 +29,12 @@ namespace spb = proto::schema;
|
|||
static SchemaPtr
|
||||
getStandardSchema() {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("FloatVectorField", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("BinaryVectorField", DataType::VECTOR_BINARY, 16, knowhere::metric::JACCARD);
|
||||
schema->AddDebugField(
|
||||
"FloatVectorField", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("BinaryVectorField",
|
||||
DataType::VECTOR_BINARY,
|
||||
16,
|
||||
knowhere::metric::JACCARD);
|
||||
schema->AddDebugField("Int64Field", DataType::INT64);
|
||||
schema->AddDebugField("Int32Field", DataType::INT32);
|
||||
schema->AddDebugField("Int16Field", DataType::INT16);
|
||||
|
|
|
@ -37,7 +37,8 @@ TEST(Query, ShowExecutor) {
|
|||
auto metric_type = knowhere::metric::L2;
|
||||
auto node = std::make_unique<FloatVectorANNS>();
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto field_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
|
||||
auto field_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, metric_type);
|
||||
int64_t num_queries = 100L;
|
||||
auto raw_data = DataGen(schema, num_queries);
|
||||
auto& info = node->search_info_;
|
||||
|
@ -79,7 +80,8 @@ TEST(Query, DSL) {
|
|||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
auto res = shower.call_child(*plan->plan_node_);
|
||||
|
@ -126,7 +128,8 @@ TEST(Query, ParsePlaceholderGroup) {
|
|||
})";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto plan = CreatePlan(*schema, dsl_string);
|
||||
int64_t num_queries = 100000;
|
||||
int dim = 16;
|
||||
|
@ -139,7 +142,8 @@ TEST(Query, ExecWithPredicateLoader) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto counter_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(counter_fid);
|
||||
|
@ -174,12 +178,17 @@ TEST(Query, ExecWithPredicateLoader) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -217,7 +226,8 @@ TEST(Query, ExecWithPredicateSmallN) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 7, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 7, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -252,12 +262,17 @@ TEST(Query, ExecWithPredicateSmallN) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 7, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -271,7 +286,8 @@ TEST(Query, ExecWithPredicate) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -306,12 +322,17 @@ TEST(Query, ExecWithPredicate) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -349,7 +370,8 @@ TEST(Query, ExecTerm) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -383,12 +405,17 @@ TEST(Query, ExecTerm) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 3;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -405,7 +432,8 @@ TEST(Query, ExecEmpty) {
|
|||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -430,7 +458,8 @@ TEST(Query, ExecEmpty) {
|
|||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -477,11 +506,16 @@ TEST(Query, ExecWithoutPredicateFlat) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -495,7 +529,8 @@ TEST(Query, ExecWithoutPredicate) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -523,11 +558,16 @@ TEST(Query, ExecWithoutPredicate) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -587,18 +627,25 @@ TEST(Query, InnerProduct) {
|
|||
]
|
||||
}
|
||||
})";
|
||||
auto vec_fid = schema->AddDebugField("normalized", DataType::VECTOR_FLOAT, dim, knowhere::metric::IP);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"normalized", DataType::VECTOR_FLOAT, dim, knowhere::metric::IP);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
auto col = dataset.get_col<float>(vec_fid);
|
||||
|
||||
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, col.data());
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, col.data());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp ts = N * 2;
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), ts);
|
||||
assert_order(*sr, "ip");
|
||||
|
@ -651,15 +698,21 @@ TEST(Query, FillSegment) {
|
|||
// dispatch here
|
||||
int N = 100000;
|
||||
auto dataset = DataGen(schema, N);
|
||||
const auto std_vec = dataset.get_col<int64_t>(FieldId(101)); // ids field
|
||||
const auto std_vfloat_vec = dataset.get_col<float>(FieldId(100)); // vector field
|
||||
const auto std_i32_vec = dataset.get_col<int32_t>(FieldId(102)); // scalar field
|
||||
const auto std_vec = dataset.get_col<int64_t>(FieldId(101)); // ids field
|
||||
const auto std_vfloat_vec =
|
||||
dataset.get_col<float>(FieldId(100)); // vector field
|
||||
const auto std_i32_vec =
|
||||
dataset.get_col<int32_t>(FieldId(102)); // scalar field
|
||||
|
||||
std::vector<std::unique_ptr<SegmentInternalInterface>> segments;
|
||||
segments.emplace_back([&] {
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
return segment;
|
||||
}());
|
||||
segments.emplace_back([&] {
|
||||
|
@ -708,8 +761,10 @@ TEST(Query, FillSegment) {
|
|||
|
||||
for (auto& segment : segments) {
|
||||
plan->target_entries_.clear();
|
||||
plan->target_entries_.push_back(schema->get_field_id(FieldName("fakevec")));
|
||||
plan->target_entries_.push_back(schema->get_field_id(FieldName("the_value")));
|
||||
plan->target_entries_.push_back(
|
||||
schema->get_field_id(FieldName("fakevec")));
|
||||
plan->target_entries_.push_back(
|
||||
schema->get_field_id(FieldName("the_value")));
|
||||
auto result = segment->Search(plan.get(), ph.get(), ts);
|
||||
// std::cout << SearchResultToJson(result).dump(2);
|
||||
result->result_offsets_.resize(topk * num_queries);
|
||||
|
@ -723,11 +778,13 @@ TEST(Query, FillSegment) {
|
|||
}
|
||||
|
||||
auto vec_field_id = schema->get_field_id(FieldName("fakevec"));
|
||||
auto output_vec_field_data = fields_data.at(vec_field_id)->vectors().float_vector().data();
|
||||
auto output_vec_field_data =
|
||||
fields_data.at(vec_field_id)->vectors().float_vector().data();
|
||||
ASSERT_EQ(output_vec_field_data.size(), topk * num_queries * dim);
|
||||
|
||||
auto i32_field_id = schema->get_field_id(FieldName("the_value"));
|
||||
auto output_i32_field_data = fields_data.at(i32_field_id)->scalars().int_data().data();
|
||||
auto output_i32_field_data =
|
||||
fields_data.at(i32_field_id)->scalars().int_data().data();
|
||||
ASSERT_EQ(output_i32_field_data.size(), topk * num_queries);
|
||||
|
||||
for (int i = 0; i < topk * num_queries; i++) {
|
||||
|
@ -737,13 +794,17 @@ TEST(Query, FillSegment) {
|
|||
auto std_val = std_vec[internal_offset];
|
||||
auto std_i32 = std_i32_vec[internal_offset];
|
||||
std::vector<float> std_vfloat(dim);
|
||||
std::copy_n(std_vfloat_vec.begin() + dim * internal_offset, dim, std_vfloat.begin());
|
||||
std::copy_n(std_vfloat_vec.begin() + dim * internal_offset,
|
||||
dim,
|
||||
std_vfloat.begin());
|
||||
|
||||
ASSERT_EQ(val, std_val) << "io:" << internal_offset;
|
||||
if (val != -1) {
|
||||
// check vector field
|
||||
std::vector<float> vfloat(dim);
|
||||
memcpy(vfloat.data(), &output_vec_field_data[i * dim], dim * sizeof(float));
|
||||
memcpy(vfloat.data(),
|
||||
&output_vec_field_data[i * dim],
|
||||
dim * sizeof(float));
|
||||
ASSERT_EQ(vfloat, std_vfloat);
|
||||
|
||||
// check int32 field
|
||||
|
@ -759,7 +820,8 @@ TEST(Query, ExecWithPredicateBinary) {
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
|
||||
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -794,13 +856,19 @@ TEST(Query, ExecWithPredicateBinary) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(vec_fid);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreateBinaryPlaceholderGroupFromBlob(num_queries, 512, vec_ptr.data() + 1024 * 512 / 8);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group_raw = CreateBinaryPlaceholderGroupFromBlob(
|
||||
num_queries, 512, vec_ptr.data() + 1024 * 512 / 8);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp time = 1000000;
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
|
|
@ -32,7 +32,10 @@ cmp2(std::pair<float, int64_t> a, std::pair<float, int64_t> b) {
|
|||
}
|
||||
|
||||
auto
|
||||
RangeSearchSortResultBF(milvus::DatasetPtr data_set, int64_t topk, size_t nq, std::string metric_type) {
|
||||
RangeSearchSortResultBF(milvus::DatasetPtr data_set,
|
||||
int64_t topk,
|
||||
size_t nq,
|
||||
std::string metric_type) {
|
||||
auto lims = milvus::GetDatasetLims(data_set);
|
||||
auto id = milvus::GetDatasetIDs(data_set);
|
||||
auto dist = milvus::GetDatasetDistance(data_set);
|
||||
|
@ -69,7 +72,10 @@ RangeSearchSortResultBF(milvus::DatasetPtr data_set, int64_t topk, size_t nq, st
|
|||
}
|
||||
|
||||
milvus::DatasetPtr
|
||||
genResultDataset(const int64_t nq, const int64_t* ids, const float* distance, const size_t* lims) {
|
||||
genResultDataset(const int64_t nq,
|
||||
const int64_t* ids,
|
||||
const float* distance,
|
||||
const size_t* lims) {
|
||||
auto ret_ds = std::make_shared<milvus::Dataset>();
|
||||
ret_ds->SetRows(nq);
|
||||
ret_ds->SetIds(ids);
|
||||
|
@ -80,12 +86,17 @@ genResultDataset(const int64_t nq, const int64_t* ids, const float* distance, co
|
|||
}
|
||||
|
||||
void
|
||||
CheckRangeSearchSortResult(int64_t* p_id, float* p_dist, milvus::DatasetPtr dataset, int64_t n) {
|
||||
CheckRangeSearchSortResult(int64_t* p_id,
|
||||
float* p_dist,
|
||||
milvus::DatasetPtr dataset,
|
||||
int64_t n) {
|
||||
auto id = milvus::GetDatasetIDs(dataset);
|
||||
auto dist = milvus::GetDatasetDistance(dataset);
|
||||
for (int i = 0; i < n; i++) {
|
||||
AssertInfo(id[i] == p_id[i], "id of range search result are not the same");
|
||||
AssertInfo(dist[i] == p_dist[i], "distance of range search result are not the same");
|
||||
AssertInfo(id[i] == p_id[i],
|
||||
"id of range search result are not the same");
|
||||
AssertInfo(dist[i] == p_dist[i],
|
||||
"distance of range search result are not the same");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -102,7 +113,8 @@ GenRangeSearchResult(int64_t* ids,
|
|||
std::mt19937 e(seed);
|
||||
std::uniform_int_distribution<> uniform_num(0, N);
|
||||
std::uniform_int_distribution<> uniform_ids(id_min, id_max);
|
||||
std::uniform_real_distribution<> uniform_distance(distance_min, distance_max);
|
||||
std::uniform_real_distribution<> uniform_distance(distance_min,
|
||||
distance_max);
|
||||
|
||||
lims = new size_t[N + 1];
|
||||
// alloc max memory
|
||||
|
@ -122,12 +134,14 @@ GenRangeSearchResult(int64_t* ids,
|
|||
return genResultDataset(N, ids, distances, lims);
|
||||
}
|
||||
|
||||
class RangeSearchSortTest : public ::testing::TestWithParam<knowhere::MetricType> {
|
||||
class RangeSearchSortTest
|
||||
: public ::testing::TestWithParam<knowhere::MetricType> {
|
||||
protected:
|
||||
void
|
||||
SetUp() override {
|
||||
metric_type = GetParam();
|
||||
dataset = GenRangeSearchResult(ids, distances, lims, N, id_min, id_max, dist_min, dist_max);
|
||||
dataset = GenRangeSearchResult(
|
||||
ids, distances, lims, N, id_min, id_max, dist_min, dist_max);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -160,7 +174,8 @@ INSTANTIATE_TEST_CASE_P(RangeSearchSortParameters,
|
|||
|
||||
TEST_P(RangeSearchSortTest, CheckRangeSearchSort) {
|
||||
auto res = milvus::SortRangeSearchResult(dataset, TOPK, N, metric_type);
|
||||
auto [real_num, p_id, p_dist] = RangeSearchSortResultBF(dataset, TOPK, N, metric_type);
|
||||
auto [real_num, p_id, p_dist] =
|
||||
RangeSearchSortResultBF(dataset, TOPK, N, metric_type);
|
||||
CheckRangeSearchSortResult(p_id, p_dist, res, real_num);
|
||||
delete[] p_id;
|
||||
delete[] p_dist;
|
||||
|
|
|
@ -31,7 +31,8 @@ GenSubSearchResult(const int64_t nq,
|
|||
const int64_t round_decimal) {
|
||||
constexpr int64_t limit = 1000000L;
|
||||
bool is_ip = (metric_type == knowhere::metric::IP);
|
||||
SubSearchResultUniq sub_result = std::make_unique<SubSearchResult>(nq, topk, metric_type, round_decimal);
|
||||
SubSearchResultUniq sub_result =
|
||||
std::make_unique<SubSearchResult>(nq, topk, metric_type, round_decimal);
|
||||
std::vector<int64_t> ids;
|
||||
std::vector<float> distances;
|
||||
for (auto n = 0; n < nq; ++n) {
|
||||
|
@ -41,11 +42,16 @@ GenSubSearchResult(const int64_t nq,
|
|||
distances.push_back(gen_x);
|
||||
}
|
||||
if (is_ip) {
|
||||
std::sort(ids.begin() + n * topk, ids.begin() + (n + 1) * topk, std::greater<int64_t>());
|
||||
std::sort(distances.begin() + n * topk, distances.begin() + (n + 1) * topk, std::greater<float>());
|
||||
std::sort(ids.begin() + n * topk,
|
||||
ids.begin() + (n + 1) * topk,
|
||||
std::greater<int64_t>());
|
||||
std::sort(distances.begin() + n * topk,
|
||||
distances.begin() + (n + 1) * topk,
|
||||
std::greater<float>());
|
||||
} else {
|
||||
std::sort(ids.begin() + n * topk, ids.begin() + (n + 1) * topk);
|
||||
std::sort(distances.begin() + n * topk, distances.begin() + (n + 1) * topk);
|
||||
std::sort(distances.begin() + n * topk,
|
||||
distances.begin() + (n + 1) * topk);
|
||||
}
|
||||
}
|
||||
sub_result->mutable_distances() = std::move(distances);
|
||||
|
@ -86,7 +92,8 @@ TestSubSearchResultMerge(const knowhere::MetricType& metric_type,
|
|||
|
||||
SubSearchResult final_result(nq, topk, metric_type, round_decimal);
|
||||
for (int i = 0; i < iteration; ++i) {
|
||||
SubSearchResultUniq sub_result = GenSubSearchResult(nq, topk, metric_type, round_decimal);
|
||||
SubSearchResultUniq sub_result =
|
||||
GenSubSearchResult(nq, topk, metric_type, round_decimal);
|
||||
auto ids = sub_result->get_ids();
|
||||
for (int n = 0; n < nq; ++n) {
|
||||
for (int k = 0; k < topk; ++k) {
|
||||
|
@ -103,8 +110,10 @@ TestSubSearchResultMerge(const knowhere::MetricType& metric_type,
|
|||
}
|
||||
|
||||
TEST(Reduce, SubSearchResult) {
|
||||
using queue_type_l2 = std::priority_queue<int64_t, std::vector<int64_t>, std::less<int64_t>>;
|
||||
using queue_type_ip = std::priority_queue<int64_t, std::vector<int64_t>, std::greater<int64_t>>;
|
||||
using queue_type_l2 =
|
||||
std::priority_queue<int64_t, std::vector<int64_t>, std::less<int64_t>>;
|
||||
using queue_type_ip = std::
|
||||
priority_queue<int64_t, std::vector<int64_t>, std::greater<int64_t>>;
|
||||
|
||||
TestSubSearchResultMerge<queue_type_l2>(knowhere::metric::L2, 1, 1, 1);
|
||||
TestSubSearchResultMerge<queue_type_l2>(knowhere::metric::L2, 1, 1, 10);
|
||||
|
|
|
@ -23,22 +23,36 @@ TEST(Relational, Basic) {
|
|||
std::string s = "str4";
|
||||
std::string another_s = "str5";
|
||||
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, another_i64), i64 == another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, another_i64), i64 != another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, another_i64), i64 >= another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, another_i64), i64 > another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, another_i64), i64 <= another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, another_i64), i64 < another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, another_i64),
|
||||
i64 == another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, another_i64),
|
||||
i64 != another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, another_i64),
|
||||
i64 >= another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, another_i64),
|
||||
i64 > another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, another_i64),
|
||||
i64 <= another_i64);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, another_i64),
|
||||
i64 < another_i64);
|
||||
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(s, another_s), s == another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(s, another_s), s != another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(s, another_s), s >= another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(s, another_s), s > another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(s, another_s), s <= another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(s, another_s), s < another_s);
|
||||
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(s, another_s),
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(s, another_s),
|
||||
s == another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(s, another_s),
|
||||
s != another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(s, another_s),
|
||||
s >= another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(s, another_s),
|
||||
s > another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(s, another_s),
|
||||
s <= another_s);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(s, another_s),
|
||||
s < another_s);
|
||||
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(
|
||||
s, another_s),
|
||||
milvus::PrefixMatch(s, another_s));
|
||||
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(s, another_s),
|
||||
ASSERT_EQ(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(
|
||||
s, another_s),
|
||||
milvus::PostfixMatch(s, another_s));
|
||||
}
|
||||
|
||||
|
@ -49,10 +63,13 @@ TEST(Relational, DifferentFundamentalType) {
|
|||
int64_t i64 = 4;
|
||||
|
||||
ASSERT_EQ(Relational<decltype(std::equal_to<>{})>()(i64, i32), i64 == i32);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, i32), i64 != i32);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, i32), i64 >= i32);
|
||||
ASSERT_EQ(Relational<decltype(std::not_equal_to<>{})>()(i64, i32),
|
||||
i64 != i32);
|
||||
ASSERT_EQ(Relational<decltype(std::greater_equal<>{})>()(i64, i32),
|
||||
i64 >= i32);
|
||||
ASSERT_EQ(Relational<decltype(std::greater<>{})>()(i64, i32), i64 > i32);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, i32), i64 <= i32);
|
||||
ASSERT_EQ(Relational<decltype(std::less_equal<>{})>()(i64, i32),
|
||||
i64 <= i32);
|
||||
ASSERT_EQ(Relational<decltype(std::less<>{})>()(i64, i32), i64 < i32);
|
||||
}
|
||||
|
||||
|
@ -68,8 +85,11 @@ TEST(Relational, DifferentInCompatibleType) {
|
|||
ASSERT_ANY_THROW(Relational<decltype(std::greater<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less_equal<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less<>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(
|
||||
Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(s, i64));
|
||||
ASSERT_ANY_THROW(
|
||||
Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(s,
|
||||
i64));
|
||||
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::equal_to<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::not_equal_to<>{})>()(i64, s));
|
||||
|
@ -77,6 +97,9 @@ TEST(Relational, DifferentInCompatibleType) {
|
|||
ASSERT_ANY_THROW(Relational<decltype(std::greater<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less_equal<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(std::less<>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(
|
||||
Relational<decltype(MatchOp<milvus::OpType::PrefixMatch>{})>()(i64, s));
|
||||
ASSERT_ANY_THROW(
|
||||
Relational<decltype(MatchOp<milvus::OpType::PostfixMatch>{})>()(i64,
|
||||
s));
|
||||
}
|
||||
|
|
|
@ -48,7 +48,8 @@ TEST(Retrieve, AutoID) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
auto fid_vec = schema->AddDebugField(
|
||||
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
|
@ -65,7 +66,8 @@ TEST(Retrieve, AutoID) {
|
|||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
|
||||
fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldId> target_fields_id{fid_64, fid_vec};
|
||||
|
@ -98,7 +100,8 @@ TEST(Retrieve, AutoID2) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
auto fid_vec = schema->AddDebugField(
|
||||
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
|
@ -115,7 +118,8 @@ TEST(Retrieve, AutoID2) {
|
|||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
|
||||
fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
|
@ -143,7 +147,8 @@ TEST(Retrieve, NotExist) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
auto fid_vec = schema->AddDebugField(
|
||||
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
|
@ -163,7 +168,8 @@ TEST(Retrieve, NotExist) {
|
|||
values.emplace_back(choose2(i));
|
||||
}
|
||||
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
|
||||
fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
|
@ -191,7 +197,8 @@ TEST(Retrieve, Empty) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
auto fid_vec = schema->AddDebugField(
|
||||
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
|
@ -205,7 +212,8 @@ TEST(Retrieve, Empty) {
|
|||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(choose(i));
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
|
||||
fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
|
@ -226,7 +234,8 @@ TEST(Retrieve, LargeTimestamp) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
auto fid_vec = schema->AddDebugField(
|
||||
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
int64_t N = 100;
|
||||
|
@ -244,7 +253,8 @@ TEST(Retrieve, LargeTimestamp) {
|
|||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
|
||||
fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldId> target_offsets{fid_64, fid_vec};
|
||||
|
@ -253,7 +263,8 @@ TEST(Retrieve, LargeTimestamp) {
|
|||
std::vector<int> filter_timestamps{-1, 0, 1, 10, 20};
|
||||
filter_timestamps.push_back(N / 2);
|
||||
for (const auto& f_ts : filter_timestamps) {
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), ts_offset + 1 + f_ts);
|
||||
auto retrieve_results =
|
||||
segment->Retrieve(plan.get(), ts_offset + 1 + f_ts);
|
||||
Assert(retrieve_results->fields_data_size() == 2);
|
||||
|
||||
int target_num = (f_ts + choose_sep) / choose_sep;
|
||||
|
@ -263,10 +274,12 @@ TEST(Retrieve, LargeTimestamp) {
|
|||
|
||||
for (auto field_data : retrieve_results->fields_data()) {
|
||||
if (DataType(field_data.type()) == DataType::INT64) {
|
||||
Assert(field_data.scalars().long_data().data_size() == target_num);
|
||||
Assert(field_data.scalars().long_data().data_size() ==
|
||||
target_num);
|
||||
}
|
||||
if (DataType(field_data.type()) == DataType::VECTOR_FLOAT) {
|
||||
Assert(field_data.vectors().float_vector().data_size() == target_num * DIM);
|
||||
Assert(field_data.vectors().float_vector().data_size() ==
|
||||
target_num * DIM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -276,7 +289,8 @@ TEST(Retrieve, Delete) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
|
||||
auto DIM = 16;
|
||||
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
auto fid_vec = schema->AddDebugField(
|
||||
"vector_64", DataType::VECTOR_FLOAT, DIM, knowhere::metric::L2);
|
||||
schema->set_primary_field_id(fid_64);
|
||||
|
||||
auto fid_ts = schema->AddDebugField("Timestamp", DataType::INT64);
|
||||
|
@ -300,7 +314,8 @@ TEST(Retrieve, Delete) {
|
|||
for (int i = 0; i < req_size; ++i) {
|
||||
values.emplace_back(i64_col[choose(i)]);
|
||||
}
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(fid_64, DataType::INT64, values);
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(
|
||||
fid_64, DataType::INT64, values);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldId> target_offsets{fid_ts, fid_64, fid_vec};
|
||||
|
@ -357,7 +372,10 @@ TEST(Retrieve, Delete) {
|
|||
std::vector<idx_t> new_timestamps{10, 10, 10, 10, 10, 10};
|
||||
auto reserved_offset = segment->PreDelete(new_count);
|
||||
ASSERT_EQ(reserved_offset, row_count);
|
||||
segment->Delete(reserved_offset, new_count, ids.get(), reinterpret_cast<const Timestamp*>(new_timestamps.data()));
|
||||
segment->Delete(reserved_offset,
|
||||
new_count,
|
||||
ids.get(),
|
||||
reinterpret_cast<const Timestamp*>(new_timestamps.data()));
|
||||
|
||||
{
|
||||
auto retrieve_results = segment->Retrieve(plan.get(), 100);
|
||||
|
|
|
@ -49,7 +49,9 @@ TYPED_TEST_P(TypedScalarIndexTest, Constructor) {
|
|||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = milvus::DataType(dtype);
|
||||
create_index_info.index_type = index_type;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,8 +63,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Count) {
|
|||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = milvus::DataType(dtype);
|
||||
create_index_info.index_type = index_type;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
auto scalar_index =
|
||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto arr = GenArr<T>(nb);
|
||||
scalar_index->Build(nb, arr.data());
|
||||
ASSERT_EQ(nb, scalar_index->Count());
|
||||
|
@ -77,8 +82,11 @@ TYPED_TEST_P(TypedScalarIndexTest, In) {
|
|||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = milvus::DataType(dtype);
|
||||
create_index_info.index_type = index_type;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
auto scalar_index =
|
||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto arr = GenArr<T>(nb);
|
||||
scalar_index->Build(nb, arr.data());
|
||||
assert_in<T>(scalar_index, arr);
|
||||
|
@ -93,8 +101,11 @@ TYPED_TEST_P(TypedScalarIndexTest, NotIn) {
|
|||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = milvus::DataType(dtype);
|
||||
create_index_info.index_type = index_type;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
auto scalar_index =
|
||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto arr = GenArr<T>(nb);
|
||||
scalar_index->Build(nb, arr.data());
|
||||
assert_not_in<T>(scalar_index, arr);
|
||||
|
@ -109,8 +120,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Reverse) {
|
|||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = milvus::DataType(dtype);
|
||||
create_index_info.index_type = index_type;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
auto scalar_index =
|
||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto arr = GenArr<T>(nb);
|
||||
scalar_index->Build(nb, arr.data());
|
||||
assert_reverse<T>(scalar_index, arr);
|
||||
|
@ -125,8 +139,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Range) {
|
|||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = milvus::DataType(dtype);
|
||||
create_index_info.index_type = index_type;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
auto scalar_index =
|
||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto arr = GenArr<T>(nb);
|
||||
scalar_index->Build(nb, arr.data());
|
||||
assert_range<T>(scalar_index, arr);
|
||||
|
@ -141,16 +158,22 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) {
|
|||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = milvus::DataType(dtype);
|
||||
create_index_info.index_type = index_type;
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
auto scalar_index =
|
||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(index.get());
|
||||
auto arr = GenArr<T>(nb);
|
||||
scalar_index->Build(nb, arr.data());
|
||||
|
||||
auto binary_set = index->Serialize(nullptr);
|
||||
auto copy_index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex(create_index_info);
|
||||
auto copy_index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateScalarIndex(
|
||||
create_index_info);
|
||||
copy_index->Load(binary_set);
|
||||
|
||||
auto copy_scalar_index = dynamic_cast<milvus::index::ScalarIndex<T>*>(copy_index.get());
|
||||
auto copy_scalar_index =
|
||||
dynamic_cast<milvus::index::ScalarIndex<T>*>(copy_index.get());
|
||||
ASSERT_EQ(nb, copy_scalar_index->Count());
|
||||
assert_in<T>(copy_scalar_index, arr);
|
||||
assert_not_in<T>(copy_scalar_index, arr);
|
||||
|
@ -159,8 +182,17 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) {
|
|||
}
|
||||
|
||||
// TODO: it's easy to overflow for int8_t. Design more reasonable ut.
|
||||
using ScalarT = ::testing::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
|
||||
using ScalarT =
|
||||
::testing::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
|
||||
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest, Dummy, Constructor, Count, In, NotIn, Range, Codec, Reverse);
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest,
|
||||
Dummy,
|
||||
Constructor,
|
||||
Count,
|
||||
In,
|
||||
NotIn,
|
||||
Range,
|
||||
Codec,
|
||||
Reverse);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexTest, ScalarT);
|
||||
|
|
|
@ -31,7 +31,9 @@ using milvus::indexbuilder::ScalarIndexCreatorPtr;
|
|||
using ScalarTestParams = std::pair<MapParams, MapParams>;
|
||||
|
||||
namespace {
|
||||
template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T> | std::is_same_v<T, std::string>>>
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_arithmetic_v<T> |
|
||||
std::is_same_v<T, std::string>>>
|
||||
inline void
|
||||
build_index(const ScalarIndexCreatorPtr& creator, const std::vector<T>& arr) {
|
||||
const int64_t dim = 8; // not important here
|
||||
|
@ -41,7 +43,8 @@ build_index(const ScalarIndexCreatorPtr& creator, const std::vector<T>& arr) {
|
|||
|
||||
template <>
|
||||
inline void
|
||||
build_index(const ScalarIndexCreatorPtr& creator, const std::vector<bool>& arr) {
|
||||
build_index(const ScalarIndexCreatorPtr& creator,
|
||||
const std::vector<bool>& arr) {
|
||||
schemapb::BoolArray pbarr;
|
||||
for (auto b : arr) {
|
||||
pbarr.add_data(b);
|
||||
|
@ -50,19 +53,20 @@ build_index(const ScalarIndexCreatorPtr& creator, const std::vector<bool>& arr)
|
|||
|
||||
creator->Build(ds);
|
||||
|
||||
delete[](char*) (ds->GetTensor());
|
||||
delete[](char*)(ds->GetTensor());
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
build_index(const ScalarIndexCreatorPtr& creator, const std::vector<std::string>& arr) {
|
||||
build_index(const ScalarIndexCreatorPtr& creator,
|
||||
const std::vector<std::string>& arr) {
|
||||
schemapb::StringArray pbarr;
|
||||
*(pbarr.mutable_data()) = {arr.begin(), arr.end()};
|
||||
auto ds = GenDsFromPB(pbarr);
|
||||
|
||||
creator->Build(ds);
|
||||
|
||||
delete[](char*) (ds->GetTensor());
|
||||
delete[](char*)(ds->GetTensor());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -79,7 +83,8 @@ class TypedScalarIndexCreatorTest : public ::testing::Test {
|
|||
// }
|
||||
};
|
||||
|
||||
using ScalarT = ::testing::Types<bool, int8_t, int16_t, int32_t, int64_t, float, double, std::string>;
|
||||
using ScalarT = ::testing::
|
||||
Types<bool, int8_t, int16_t, int32_t, int64_t, float, double, std::string>;
|
||||
|
||||
TYPED_TEST_CASE_P(TypedScalarIndexCreatorTest);
|
||||
|
||||
|
@ -97,8 +102,10 @@ TYPED_TEST_P(TypedScalarIndexCreatorTest, Constructor) {
|
|||
auto index_params = tp.second;
|
||||
auto serialized_type_params = generate_type_params(type_params);
|
||||
auto serialized_index_params = generate_index_params(index_params);
|
||||
auto creator = milvus::indexbuilder::CreateScalarIndex(milvus::DataType(dtype), serialized_type_params.c_str(),
|
||||
serialized_index_params.c_str());
|
||||
auto creator = milvus::indexbuilder::CreateScalarIndex(
|
||||
milvus::DataType(dtype),
|
||||
serialized_type_params.c_str(),
|
||||
serialized_index_params.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -110,17 +117,26 @@ TYPED_TEST_P(TypedScalarIndexCreatorTest, Codec) {
|
|||
auto index_params = tp.second;
|
||||
auto serialized_type_params = generate_type_params(type_params);
|
||||
auto serialized_index_params = generate_index_params(index_params);
|
||||
auto creator = milvus::indexbuilder::CreateScalarIndex(milvus::DataType(dtype), serialized_type_params.c_str(),
|
||||
serialized_index_params.c_str());
|
||||
auto creator = milvus::indexbuilder::CreateScalarIndex(
|
||||
milvus::DataType(dtype),
|
||||
serialized_type_params.c_str(),
|
||||
serialized_index_params.c_str());
|
||||
auto arr = GenArr<T>(nb);
|
||||
build_index<T>(creator, arr);
|
||||
auto binary_set = creator->Serialize();
|
||||
auto copy_creator = milvus::indexbuilder::CreateScalarIndex(
|
||||
milvus::DataType(dtype), serialized_type_params.c_str(), serialized_index_params.c_str());
|
||||
milvus::DataType(dtype),
|
||||
serialized_type_params.c_str(),
|
||||
serialized_index_params.c_str());
|
||||
copy_creator->Load(binary_set);
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexCreatorTest, Dummy, Constructor, Codec);
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexCreatorTest,
|
||||
Dummy,
|
||||
Constructor,
|
||||
Codec);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexCreatorTest, ScalarT);
|
||||
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck,
|
||||
TypedScalarIndexCreatorTest,
|
||||
ScalarT);
|
||||
|
|
|
@ -30,7 +30,8 @@ TEST(Sealed, without_predicate) {
|
|||
auto dim = 16;
|
||||
auto topK = 5;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fake_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
@ -64,12 +65,18 @@ TEST(Sealed, without_predicate) {
|
|||
auto query_ptr = vec_col.data() + 4200 * dim;
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
Timestamp time = 1000000;
|
||||
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
|
@ -81,11 +88,13 @@ TEST(Sealed, without_predicate) {
|
|||
create_index_info.metric_type = knowhere::metric::L2;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
|
||||
|
||||
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
|
||||
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, nullptr);
|
||||
|
||||
auto build_conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "100"}};
|
||||
auto build_conf =
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "100"}};
|
||||
|
||||
auto search_conf = knowhere::Json{{knowhere::indexparam::NPROBE, 10}};
|
||||
|
||||
|
@ -134,7 +143,8 @@ TEST(Sealed, with_predicate) {
|
|||
auto dim = 16;
|
||||
auto topK = 5;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fake_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
|
@ -172,12 +182,18 @@ TEST(Sealed, with_predicate) {
|
|||
auto query_ptr = vec_col.data() + 42000 * dim;
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
Timestamp time = 10000000;
|
||||
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
|
@ -187,11 +203,13 @@ TEST(Sealed, with_predicate) {
|
|||
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
||||
create_index_info.metric_type = knowhere::metric::L2;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
|
||||
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
|
||||
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, nullptr);
|
||||
|
||||
auto build_conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "100"}};
|
||||
auto build_conf =
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "100"}};
|
||||
|
||||
auto database = knowhere::GenDataSet(N, dim, vec_col.data());
|
||||
indexing->BuildWithDataset(database, build_conf);
|
||||
|
@ -203,7 +221,8 @@ TEST(Sealed, with_predicate) {
|
|||
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
|
||||
|
||||
auto search_conf =
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, {knowhere::indexparam::NPROBE, 10}};
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::indexparam::NPROBE, 10}};
|
||||
milvus::SearchInfo searchInfo;
|
||||
searchInfo.topk_ = topK;
|
||||
searchInfo.metric_type_ = knowhere::metric::L2;
|
||||
|
@ -237,7 +256,8 @@ TEST(Sealed, with_predicate_filter_all) {
|
|||
auto topK = 5;
|
||||
// auto metric_type = MetricType::METRIC_L2;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fake_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
std::string dsl = R"({
|
||||
|
@ -275,8 +295,10 @@ TEST(Sealed, with_predicate_filter_all) {
|
|||
auto query_ptr = vec_col.data() + 42000 * dim;
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
Timestamp time = 10000000;
|
||||
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
|
@ -285,11 +307,13 @@ TEST(Sealed, with_predicate_filter_all) {
|
|||
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
||||
create_index_info.metric_type = knowhere::metric::L2;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
|
||||
auto ivf_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
|
||||
auto ivf_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, nullptr);
|
||||
|
||||
auto ivf_build_conf = knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "100"},
|
||||
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
|
||||
auto ivf_build_conf =
|
||||
knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "100"},
|
||||
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
|
||||
|
||||
auto database = knowhere::GenDataSet(N, dim, vec_col.data());
|
||||
ivf_indexing->BuildWithDataset(database, ivf_build_conf);
|
||||
|
@ -311,19 +335,22 @@ TEST(Sealed, with_predicate_filter_all) {
|
|||
auto sr = ivf_sealed_segment->Search(plan.get(), ph_group.get(), time);
|
||||
EXPECT_EQ(sr->get_total_result_count(), 0);
|
||||
|
||||
auto hnsw_conf = knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::HNSW_M, "16"},
|
||||
{knowhere::indexparam::EFCONSTRUCTION, "200"},
|
||||
{knowhere::indexparam::EF, "200"},
|
||||
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
|
||||
auto hnsw_conf =
|
||||
knowhere::Json{{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::HNSW_M, "16"},
|
||||
{knowhere::indexparam::EFCONSTRUCTION, "200"},
|
||||
{knowhere::indexparam::EF, "200"},
|
||||
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}};
|
||||
|
||||
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
||||
create_index_info.metric_type = knowhere::metric::L2;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_HNSW;
|
||||
auto hnsw_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, nullptr);
|
||||
auto hnsw_indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, nullptr);
|
||||
hnsw_indexing->BuildWithDataset(database, hnsw_conf);
|
||||
|
||||
auto hnsw_vec_index = dynamic_cast<index::VectorIndex*>(hnsw_indexing.get());
|
||||
auto hnsw_vec_index =
|
||||
dynamic_cast<index::VectorIndex*>(hnsw_indexing.get());
|
||||
EXPECT_EQ(hnsw_vec_index->Count(), N);
|
||||
EXPECT_EQ(hnsw_vec_index->GetDim(), dim);
|
||||
|
||||
|
@ -347,7 +374,8 @@ TEST(Sealed, LoadFieldData) {
|
|||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fakevec_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
|
||||
|
@ -396,7 +424,8 @@ TEST(Sealed, LoadFieldData) {
|
|||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
ASSERT_ANY_THROW(segment->Search(plan.get(), ph_group.get(), time));
|
||||
|
||||
|
@ -441,7 +470,8 @@ TEST(Sealed, LoadFieldDataMmap) {
|
|||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fakevec_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
|
||||
|
@ -490,7 +520,8 @@ TEST(Sealed, LoadFieldDataMmap) {
|
|||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
ASSERT_ANY_THROW(segment->Search(plan.get(), ph_group.get(), time));
|
||||
|
||||
|
@ -534,7 +565,8 @@ TEST(Sealed, LoadScalarIndex) {
|
|||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fakevec_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
|
||||
|
@ -579,19 +611,24 @@ TEST(Sealed, LoadScalarIndex) {
|
|||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
LoadFieldDataInfo row_id_info;
|
||||
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
|
||||
FieldMeta row_id_field_meta(
|
||||
FieldName("RowID"), RowFieldID, DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(
|
||||
dataset.row_ids_.data(), N, row_id_field_meta);
|
||||
row_id_info.field_data = array.get();
|
||||
row_id_info.row_count = dataset.row_ids_.size();
|
||||
row_id_info.field_id = RowFieldID.get(); // field id for RowId
|
||||
segment->LoadFieldData(row_id_info);
|
||||
|
||||
LoadFieldDataInfo ts_info;
|
||||
FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
|
||||
array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta);
|
||||
FieldMeta ts_field_meta(
|
||||
FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
|
||||
array =
|
||||
CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta);
|
||||
ts_info.field_data = array.get();
|
||||
ts_info.row_count = dataset.timestamps_.size();
|
||||
ts_info.field_id = TimestampFieldID.get();
|
||||
|
@ -609,7 +646,8 @@ TEST(Sealed, LoadScalarIndex) {
|
|||
counter_index.field_type = DataType::INT64;
|
||||
counter_index.index_params["index_type"] = "sort";
|
||||
auto counter_data = dataset.get_col<int64_t>(counter_id);
|
||||
counter_index.index = std::move(GenScalarIndexing<int64_t>(N, counter_data.data()));
|
||||
counter_index.index =
|
||||
std::move(GenScalarIndexing<int64_t>(N, counter_data.data()));
|
||||
segment->LoadIndex(counter_index);
|
||||
|
||||
LoadIndexInfo double_index;
|
||||
|
@ -617,7 +655,8 @@ TEST(Sealed, LoadScalarIndex) {
|
|||
double_index.field_type = DataType::DOUBLE;
|
||||
double_index.index_params["index_type"] = "sort";
|
||||
auto double_data = dataset.get_col<double>(double_id);
|
||||
double_index.index = std::move(GenScalarIndexing<double>(N, double_data.data()));
|
||||
double_index.index =
|
||||
std::move(GenScalarIndexing<double>(N, double_data.data()));
|
||||
segment->LoadIndex(double_index);
|
||||
|
||||
LoadIndexInfo nothing_index;
|
||||
|
@ -625,7 +664,8 @@ TEST(Sealed, LoadScalarIndex) {
|
|||
nothing_index.field_type = DataType::INT32;
|
||||
nothing_index.index_params["index_type"] = "sort";
|
||||
auto nothing_data = dataset.get_col<int32_t>(nothing_id);
|
||||
nothing_index.index = std::move(GenScalarIndexing<int32_t>(N, nothing_data.data()));
|
||||
nothing_index.index =
|
||||
std::move(GenScalarIndexing<int32_t>(N, nothing_data.data()));
|
||||
segment->LoadIndex(nothing_index);
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
|
@ -639,7 +679,8 @@ TEST(Sealed, Delete) {
|
|||
auto N = 10;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto fakevec_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fakevec_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto counter_id = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto double_id = schema->AddDebugField("double", DataType::DOUBLE);
|
||||
auto nothing_id = schema->AddDebugField("nothing", DataType::INT32);
|
||||
|
@ -682,7 +723,8 @@ TEST(Sealed, Delete) {
|
|||
auto plan = CreatePlan(*schema, dsl);
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
ASSERT_ANY_THROW(segment->Search(plan.get(), ph_group.get(), time));
|
||||
|
||||
|
@ -705,11 +747,14 @@ TEST(Sealed, Delete) {
|
|||
int64_t new_count = 3;
|
||||
std::vector<idx_t> new_pks{6, 7, 8};
|
||||
auto new_ids = std::make_unique<IdArray>();
|
||||
new_ids->mutable_int_id()->mutable_data()->Add(new_pks.begin(), new_pks.end());
|
||||
new_ids->mutable_int_id()->mutable_data()->Add(new_pks.begin(),
|
||||
new_pks.end());
|
||||
std::vector<idx_t> new_timestamps{10, 10, 10};
|
||||
auto reserved_offset = segment->PreDelete(new_count);
|
||||
ASSERT_EQ(reserved_offset, row_count);
|
||||
segment->Delete(reserved_offset, new_count, new_ids.get(),
|
||||
segment->Delete(reserved_offset,
|
||||
new_count,
|
||||
new_ids.get(),
|
||||
reinterpret_cast<const Timestamp*>(new_timestamps.data()));
|
||||
}
|
||||
|
||||
|
@ -730,7 +775,8 @@ GenRandomFloatVecs(int N, int dim) {
|
|||
srand(time(NULL));
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int j = 0; j < dim; j++) {
|
||||
vecs.push_back(static_cast<float>(rand()) / static_cast<float>(RAND_MAX));
|
||||
vecs.push_back(static_cast<float>(rand()) /
|
||||
static_cast<float>(RAND_MAX));
|
||||
}
|
||||
}
|
||||
return vecs;
|
||||
|
@ -750,7 +796,8 @@ GenQueryVecs(int N, int dim) {
|
|||
auto
|
||||
transfer_to_fields_data(const std::vector<float>& vecs) {
|
||||
auto arr = std::make_unique<DataArray>();
|
||||
*(arr->mutable_vectors()->mutable_float_vector()->mutable_data()) = {vecs.begin(), vecs.end()};
|
||||
*(arr->mutable_vectors()->mutable_float_vector()->mutable_data()) = {
|
||||
vecs.begin(), vecs.end()};
|
||||
return arr;
|
||||
}
|
||||
|
||||
|
@ -758,7 +805,8 @@ TEST(Sealed, BF) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto metric_type = "L2";
|
||||
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fake_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
|
@ -788,13 +836,16 @@ TEST(Sealed, BF) {
|
|||
output_field_ids: 101)") %
|
||||
topK;
|
||||
auto serialized_expr_plan = fmt.str();
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan.data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
auto binary_plan =
|
||||
translate_text_plan_to_binary_plan(serialized_expr_plan.data());
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
|
||||
auto num_queries = 10;
|
||||
auto query = GenQueryVecs(num_queries, dim);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, query);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
auto result = segment->Search(plan.get(), ph_group.get(), MAX_TIMESTAMP);
|
||||
auto ves = SearchResultToVector(*result);
|
||||
|
@ -811,7 +862,8 @@ TEST(Sealed, BF_Overflow) {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto metric_type = "L2";
|
||||
auto fake_id = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto fake_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
|
@ -839,13 +891,16 @@ TEST(Sealed, BF_Overflow) {
|
|||
output_field_ids: 101)") %
|
||||
topK;
|
||||
auto serialized_expr_plan = fmt.str();
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan.data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
auto binary_plan =
|
||||
translate_text_plan_to_binary_plan(serialized_expr_plan.data());
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
|
||||
auto num_queries = 10;
|
||||
auto query = GenQueryVecs(num_queries, dim);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, query);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
auto result = segment->Search(plan.get(), ph_group.get(), MAX_TIMESTAMP);
|
||||
auto ves = SearchResultToVector(*result);
|
||||
|
@ -894,7 +949,8 @@ TEST(Sealed, RealCount) {
|
|||
ASSERT_EQ(del_offset1, 0);
|
||||
auto del_ids1 = GenPKs(pks.begin(), pks.begin() + half);
|
||||
auto del_tss1 = GenTss(half, c);
|
||||
auto status = segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
|
||||
auto status =
|
||||
segment->Delete(del_offset1, half, del_ids1.get(), del_tss1.data());
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_EQ(c - half, segment->get_real_count());
|
||||
|
||||
|
@ -902,7 +958,8 @@ TEST(Sealed, RealCount) {
|
|||
auto del_offset2 = segment->PreDelete(half);
|
||||
ASSERT_EQ(del_offset2, half);
|
||||
auto del_tss2 = GenTss(half, c + half);
|
||||
status = segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
|
||||
status =
|
||||
segment->Delete(del_offset2, half, del_ids1.get(), del_tss2.data());
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_EQ(c - half, segment->get_real_count());
|
||||
|
||||
|
|
|
@ -37,9 +37,13 @@ generate_data(int N) {
|
|||
for (auto& x : vec) {
|
||||
x = distribution(er);
|
||||
}
|
||||
raw_data.insert(raw_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec));
|
||||
raw_data.insert(raw_data.end(),
|
||||
(const char*)std::begin(vec),
|
||||
(const char*)std::end(vec));
|
||||
int age = ei() % 100;
|
||||
raw_data.insert(raw_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age));
|
||||
raw_data.insert(raw_data.end(),
|
||||
(const char*)&age,
|
||||
((const char*)&age) + sizeof(age));
|
||||
}
|
||||
return std::make_tuple(raw_data, timestamps, uids);
|
||||
}
|
||||
|
@ -48,7 +52,8 @@ generate_data(int N) {
|
|||
TEST(SegmentCoreTest, NormalDistributionTest) {
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
int N = 100 * 1000;
|
||||
auto [raw_data, timestamps, uids] = generate_data(N);
|
||||
|
@ -63,7 +68,8 @@ TEST(SegmentCoreTest, MockTest2) {
|
|||
|
||||
// schema
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
|
@ -71,29 +77,37 @@ TEST(SegmentCoreTest, MockTest2) {
|
|||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
auto reserved_begin = segment->PreInsert(N);
|
||||
segment->Insert(reserved_begin, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(reserved_begin,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
}
|
||||
|
||||
TEST(SegmentCoreTest, SmallIndex) {
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
}
|
||||
|
||||
TEST(InsertRecordTest, growing_int64_t) {
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto record = milvus::segcore::InsertRecord<false>(*schema, int64_t(32));
|
||||
const int N = 100000;
|
||||
|
||||
for (int i = 1; i <= N; i++) record.insert_pk(PkType(int64_t(i)), int64_t(i));
|
||||
for (int i = 1; i <= N; i++)
|
||||
record.insert_pk(PkType(int64_t(i)), int64_t(i));
|
||||
|
||||
for (int i = 1; i <= N; i++) {
|
||||
std::vector<SegOffset> offset = record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
|
||||
std::vector<SegOffset> offset =
|
||||
record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
|
||||
ASSERT_EQ(offset[0].get(), int64_t(i));
|
||||
}
|
||||
}
|
||||
|
@ -101,16 +115,19 @@ TEST(InsertRecordTest, growing_int64_t) {
|
|||
TEST(InsertRecordTest, growing_string) {
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto record = milvus::segcore::InsertRecord<false>(*schema, int64_t(32));
|
||||
const int N = 100000;
|
||||
|
||||
for (int i = 1; i <= N; i++) record.insert_pk(PkType(std::to_string(i)), int64_t(i));
|
||||
for (int i = 1; i <= N; i++)
|
||||
record.insert_pk(PkType(std::to_string(i)), int64_t(i));
|
||||
|
||||
for (int i = 1; i <= N; i++) {
|
||||
std::vector<SegOffset> offset = record.search_pk(std::to_string(i), int64_t(N + 1));
|
||||
std::vector<SegOffset> offset =
|
||||
record.search_pk(std::to_string(i), int64_t(N + 1));
|
||||
ASSERT_EQ(offset[0].get(), int64_t(i));
|
||||
}
|
||||
}
|
||||
|
@ -118,17 +135,20 @@ TEST(InsertRecordTest, growing_string) {
|
|||
TEST(InsertRecordTest, sealed_int64_t) {
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto record = milvus::segcore::InsertRecord<true>(*schema, int64_t(32));
|
||||
const int N = 100000;
|
||||
|
||||
for (int i = N; i >= 1; i--) record.insert_pk(PkType(int64_t(i)), int64_t(i));
|
||||
for (int i = N; i >= 1; i--)
|
||||
record.insert_pk(PkType(int64_t(i)), int64_t(i));
|
||||
record.seal_pks();
|
||||
|
||||
for (int i = 1; i <= N; i++) {
|
||||
std::vector<SegOffset> offset = record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
|
||||
std::vector<SegOffset> offset =
|
||||
record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
|
||||
ASSERT_EQ(offset[0].get(), int64_t(i));
|
||||
}
|
||||
}
|
||||
|
@ -136,18 +156,21 @@ TEST(InsertRecordTest, sealed_int64_t) {
|
|||
TEST(InsertRecordTest, sealed_string) {
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto record = milvus::segcore::InsertRecord<true>(*schema, int64_t(32));
|
||||
const int N = 100000;
|
||||
|
||||
for (int i = 1; i <= N; i++) record.insert_pk(PkType(std::to_string(i)), int64_t(i));
|
||||
for (int i = 1; i <= N; i++)
|
||||
record.insert_pk(PkType(std::to_string(i)), int64_t(i));
|
||||
|
||||
record.seal_pks();
|
||||
|
||||
for (int i = 1; i <= N; i++) {
|
||||
std::vector<SegOffset> offset = record.search_pk(std::to_string(i), int64_t(N + 1));
|
||||
std::vector<SegOffset> offset =
|
||||
record.search_pk(std::to_string(i), int64_t(N + 1));
|
||||
ASSERT_EQ(offset[0].get(), int64_t(i));
|
||||
}
|
||||
}
|
|
@ -23,9 +23,11 @@ TEST(Span, Naive) {
|
|||
int64_t N = ROW_COUNT;
|
||||
constexpr int64_t size_per_chunk = 32 * 1024;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto bin_vec_fid = schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
|
||||
auto bin_vec_fid = schema->AddDebugField(
|
||||
"binaryvec", DataType::VECTOR_BINARY, 512, knowhere::metric::JACCARD);
|
||||
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
|
||||
auto float_vec_fid = schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, knowhere::metric::L2);
|
||||
auto float_vec_fid = schema->AddDebugField(
|
||||
"floatvec", DataType::VECTOR_FLOAT, 32, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
|
@ -33,7 +35,11 @@ TEST(Span, Naive) {
|
|||
auto seg_conf = SegcoreConfig::default_config();
|
||||
auto segment = CreateGrowingSegment(schema, -1, seg_conf);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(bin_vec_fid);
|
||||
auto age_ptr = dataset.get_col<float>(float_fid);
|
||||
auto float_ptr = dataset.get_col<float>(float_vec_fid);
|
||||
|
@ -43,9 +49,11 @@ TEST(Span, Naive) {
|
|||
auto row_count = interface.get_row_count();
|
||||
ASSERT_EQ(N, row_count);
|
||||
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
auto vec_span = interface.chunk_data<milvus::BinaryVector>(bin_vec_fid, chunk_id);
|
||||
auto vec_span =
|
||||
interface.chunk_data<milvus::BinaryVector>(bin_vec_fid, chunk_id);
|
||||
auto age_span = interface.chunk_data<float>(float_fid, chunk_id);
|
||||
auto float_span = interface.chunk_data<milvus::FloatVector>(float_vec_fid, chunk_id);
|
||||
auto float_span =
|
||||
interface.chunk_data<milvus::FloatVector>(float_vec_fid, chunk_id);
|
||||
auto begin = chunk_id * size_per_chunk;
|
||||
auto end = std::min((chunk_id + 1) * size_per_chunk, N);
|
||||
auto size_of_chunk = end - begin;
|
||||
|
|
|
@ -46,7 +46,10 @@ GenGenericValue(T value) {
|
|||
}
|
||||
|
||||
auto
|
||||
GenColumnInfo(int64_t field_id, proto::schema::DataType field_type, bool auto_id, bool is_pk) {
|
||||
GenColumnInfo(int64_t field_id,
|
||||
proto::schema::DataType field_type,
|
||||
bool auto_id,
|
||||
bool is_pk) {
|
||||
auto column_info = new proto::plan::ColumnInfo();
|
||||
column_info->set_field_id(field_id);
|
||||
column_info->set_data_type(field_type);
|
||||
|
@ -56,7 +59,10 @@ GenColumnInfo(int64_t field_id, proto::schema::DataType field_type, bool auto_id
|
|||
}
|
||||
|
||||
auto
|
||||
GenQueryInfo(int64_t topk, std::string metric_type, std::string search_params, int64_t round_decimal = -1) {
|
||||
GenQueryInfo(int64_t topk,
|
||||
std::string metric_type,
|
||||
std::string search_params,
|
||||
int64_t round_decimal = -1) {
|
||||
auto query_info = new proto::plan::QueryInfo();
|
||||
query_info->set_topk(topk);
|
||||
query_info->set_metric_type(metric_type);
|
||||
|
@ -66,7 +72,10 @@ GenQueryInfo(int64_t topk, std::string metric_type, std::string search_params, i
|
|||
}
|
||||
|
||||
auto
|
||||
GenAnns(proto::plan::Expr* predicate, bool is_binary, int64_t field_id, std::string placeholder_tag = "$0") {
|
||||
GenAnns(proto::plan::Expr* predicate,
|
||||
bool is_binary,
|
||||
int64_t field_id,
|
||||
std::string placeholder_tag = "$0") {
|
||||
auto query_info = GenQueryInfo(10, "L2", "{\"nprobe\": 10}", -1);
|
||||
auto anns = new proto::plan::VectorANNS();
|
||||
anns->set_is_binary(is_binary);
|
||||
|
@ -146,23 +155,32 @@ GenPlanNode() {
|
|||
}
|
||||
|
||||
void
|
||||
SetTargetEntry(std::unique_ptr<proto::plan::PlanNode>& plan_node, const std::vector<int64_t>& output_fields) {
|
||||
SetTargetEntry(std::unique_ptr<proto::plan::PlanNode>& plan_node,
|
||||
const std::vector<int64_t>& output_fields) {
|
||||
for (auto id : output_fields) {
|
||||
plan_node->add_output_field_ids(id);
|
||||
}
|
||||
}
|
||||
|
||||
auto
|
||||
GenTermPlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta, const std::vector<std::string>& strs)
|
||||
GenTermPlan(const FieldMeta& fvec_meta,
|
||||
const FieldMeta& str_meta,
|
||||
const std::vector<std::string>& strs)
|
||||
-> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto term_expr = GenTermExpr<std::string>(strs);
|
||||
term_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_term_expr(term_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
auto anns = GenAnns(expr,
|
||||
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(),
|
||||
"$0");
|
||||
|
||||
auto plan_node = GenPlanNode();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
|
@ -171,8 +189,12 @@ GenTermPlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta, const std::ve
|
|||
|
||||
auto
|
||||
GenAlwaysFalseExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto term_expr = GenTermExpr<std::string>({}); // in empty set, always false.
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto term_expr =
|
||||
GenTermExpr<std::string>({}); // in empty set, always false.
|
||||
term_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
|
@ -193,8 +215,10 @@ GenAlwaysTrueExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|||
auto
|
||||
GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
||||
auto anns = GenAnns(always_false_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(), "$0");
|
||||
auto anns = GenAnns(always_false_expr,
|
||||
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(),
|
||||
"$0");
|
||||
|
||||
auto plan_node = GenPlanNode();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
|
@ -204,8 +228,10 @@ GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|||
auto
|
||||
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto always_true_expr = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
||||
auto anns =
|
||||
GenAnns(always_true_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
auto anns = GenAnns(always_true_expr,
|
||||
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(),
|
||||
"$0");
|
||||
|
||||
auto plan_node = GenPlanNode();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
|
@ -217,7 +243,8 @@ GenTestSchema() {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
return schema;
|
||||
|
@ -228,7 +255,8 @@ GenStrPKSchema() {
|
|||
auto schema = std::make_shared<Schema>();
|
||||
auto pk = schema->AddDebugField("str", DataType::VARCHAR);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
return schema;
|
||||
|
@ -252,7 +280,11 @@ TEST(StringExpr, Term) {
|
|||
}();
|
||||
|
||||
std::map<int, std::vector<std::string>> terms = {
|
||||
{0, {"2000", "3000"}}, {1, {"2000"}}, {2, {"3000"}}, {3, {}}, {4, {vec_2k_3k}},
|
||||
{0, {"2000", "3000"}},
|
||||
{1, {"2000"}},
|
||||
{2, {"3000"}},
|
||||
{3, {}},
|
||||
{4, {vec_2k_3k}},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
|
@ -266,11 +298,16 @@ TEST(StringExpr, Term) {
|
|||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [_, term] : terms) {
|
||||
auto plan_proto = GenTermPlan(fvec_meta, str_meta, term);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
|
@ -296,11 +333,18 @@ TEST(StringExpr, Compare) {
|
|||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
|
||||
|
||||
auto gen_compare_plan = [&, fvec_meta, str_meta,
|
||||
another_str_meta](proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto str_col_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto gen_compare_plan =
|
||||
[&, fvec_meta, str_meta, another_str_meta](
|
||||
proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto str_col_info = GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto another_str_col_info =
|
||||
GenColumnInfo(another_str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
GenColumnInfo(another_str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
|
||||
auto compare_expr = GenCompareExpr(op);
|
||||
compare_expr->set_allocated_left_column_info(str_col_info);
|
||||
|
@ -309,22 +353,37 @@ TEST(StringExpr, Compare) {
|
|||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_compare_expr(compare_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
auto anns =
|
||||
GenAnns(expr,
|
||||
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(),
|
||||
"$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
};
|
||||
|
||||
std::vector<std::tuple<proto::plan::OpType, std::function<bool(std::string, std::string)>>> testcases{
|
||||
{proto::plan::OpType::GreaterThan, [](std::string v1, std::string v2) { return v1 > v2; }},
|
||||
{proto::plan::OpType::GreaterEqual, [](std::string v1, std::string v2) { return v1 >= v2; }},
|
||||
{proto::plan::OpType::LessThan, [](std::string v1, std::string v2) { return v1 < v2; }},
|
||||
{proto::plan::OpType::LessEqual, [](std::string v1, std::string v2) { return v1 <= v2; }},
|
||||
{proto::plan::OpType::Equal, [](std::string v1, std::string v2) { return v1 == v2; }},
|
||||
{proto::plan::OpType::NotEqual, [](std::string v1, std::string v2) { return v1 != v2; }},
|
||||
{proto::plan::OpType::PrefixMatch, [](std::string v1, std::string v2) { return PrefixMatch(v1, v2); }},
|
||||
};
|
||||
std::vector<std::tuple<proto::plan::OpType,
|
||||
std::function<bool(std::string, std::string)>>>
|
||||
testcases{
|
||||
{proto::plan::OpType::GreaterThan,
|
||||
[](std::string v1, std::string v2) { return v1 > v2; }},
|
||||
{proto::plan::OpType::GreaterEqual,
|
||||
[](std::string v1, std::string v2) { return v1 >= v2; }},
|
||||
{proto::plan::OpType::LessThan,
|
||||
[](std::string v1, std::string v2) { return v1 < v2; }},
|
||||
{proto::plan::OpType::LessEqual,
|
||||
[](std::string v1, std::string v2) { return v1 <= v2; }},
|
||||
{proto::plan::OpType::Equal,
|
||||
[](std::string v1, std::string v2) { return v1 == v2; }},
|
||||
{proto::plan::OpType::NotEqual,
|
||||
[](std::string v1, std::string v2) { return v1 != v2; }},
|
||||
{proto::plan::OpType::PrefixMatch,
|
||||
[](std::string v1, std::string v2) {
|
||||
return PrefixMatch(v1, v2);
|
||||
}},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
|
@ -334,7 +393,8 @@ TEST(StringExpr, Compare) {
|
|||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
|
||||
auto reserve_col = [&, raw_data](const FieldMeta& field_meta, std::vector<std::string>& str_col) {
|
||||
auto reserve_col = [&, raw_data](const FieldMeta& field_meta,
|
||||
std::vector<std::string>& str_col) {
|
||||
auto new_str_col = raw_data.get_col(field_meta.get_id());
|
||||
auto begin = new_str_col->scalars().string_data().data().begin();
|
||||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
|
@ -346,12 +406,17 @@ TEST(StringExpr, Compare) {
|
|||
|
||||
{
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [op, ref_func] : testcases) {
|
||||
auto plan_proto = gen_compare_plan(op);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
|
@ -377,29 +442,51 @@ TEST(StringExpr, UnaryRange) {
|
|||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto gen_unary_range_plan = [&, fvec_meta, str_meta](proto::plan::OpType op,
|
||||
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto gen_unary_range_plan =
|
||||
[&, fvec_meta, str_meta](
|
||||
proto::plan::OpType op,
|
||||
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto unary_range_expr = GenUnaryRangeExpr(op, value);
|
||||
unary_range_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_unary_range_expr(unary_range_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
auto anns =
|
||||
GenAnns(expr,
|
||||
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(),
|
||||
"$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return std::move(plan_node);
|
||||
};
|
||||
|
||||
std::vector<std::tuple<proto::plan::OpType, std::string, std::function<bool(std::string)>>> testcases{
|
||||
{proto::plan::OpType::GreaterThan, "2000", [](std::string val) { return val > "2000"; }},
|
||||
{proto::plan::OpType::GreaterEqual, "2000", [](std::string val) { return val >= "2000"; }},
|
||||
{proto::plan::OpType::LessThan, "3000", [](std::string val) { return val < "3000"; }},
|
||||
{proto::plan::OpType::LessEqual, "3000", [](std::string val) { return val <= "3000"; }},
|
||||
{proto::plan::OpType::PrefixMatch, "a", [](std::string val) { return PrefixMatch(val, "a"); }},
|
||||
};
|
||||
std::vector<std::tuple<proto::plan::OpType,
|
||||
std::string,
|
||||
std::function<bool(std::string)>>>
|
||||
testcases{
|
||||
{proto::plan::OpType::GreaterThan,
|
||||
"2000",
|
||||
[](std::string val) { return val > "2000"; }},
|
||||
{proto::plan::OpType::GreaterEqual,
|
||||
"2000",
|
||||
[](std::string val) { return val >= "2000"; }},
|
||||
{proto::plan::OpType::LessThan,
|
||||
"3000",
|
||||
[](std::string val) { return val < "3000"; }},
|
||||
{proto::plan::OpType::LessEqual,
|
||||
"3000",
|
||||
[](std::string val) { return val <= "3000"; }},
|
||||
{proto::plan::OpType::PrefixMatch,
|
||||
"a",
|
||||
[](std::string val) { return PrefixMatch(val, "a"); }},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
|
@ -412,11 +499,16 @@ TEST(StringExpr, UnaryRange) {
|
|||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [op, value, ref_func] : testcases) {
|
||||
auto plan_proto = gen_unary_range_plan(op, value);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
|
@ -428,7 +520,8 @@ TEST(StringExpr, UnaryRange) {
|
|||
|
||||
auto val = str_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||
ASSERT_EQ(ans, ref)
|
||||
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -441,16 +534,28 @@ TEST(StringExpr, BinaryRange) {
|
|||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto gen_binary_range_plan = [&, fvec_meta, str_meta](bool lb_inclusive, bool ub_inclusive, std::string lb,
|
||||
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
||||
auto binary_range_expr = GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
|
||||
auto gen_binary_range_plan =
|
||||
[&, fvec_meta, str_meta](
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive,
|
||||
std::string lb,
|
||||
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto binary_range_expr =
|
||||
GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
|
||||
binary_range_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = GenExpr().release();
|
||||
expr->set_allocated_binary_range_expr(binary_range_expr);
|
||||
|
||||
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
||||
auto anns =
|
||||
GenAnns(expr,
|
||||
fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
||||
fvec_meta.get_id().get(),
|
||||
"$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
|
@ -458,13 +563,34 @@ TEST(StringExpr, BinaryRange) {
|
|||
};
|
||||
|
||||
// bool lb_inclusive, bool ub_inclusive, std::string lb, std::string ub
|
||||
std::vector<std::tuple<bool, bool, std::string, std::string, std::function<bool(std::string)>>> testcases{
|
||||
{false, false, "2000", "3000", [](std::string val) { return val > "2000" && val < "3000"; }},
|
||||
{false, true, "2000", "3000", [](std::string val) { return val > "2000" && val <= "3000"; }},
|
||||
{true, false, "2000", "3000", [](std::string val) { return val >= "2000" && val < "3000"; }},
|
||||
{true, true, "2000", "3000", [](std::string val) { return val >= "2000" && val <= "3000"; }},
|
||||
{true, true, "2000", "1000", [](std::string val) { return false; }},
|
||||
};
|
||||
std::vector<std::tuple<bool,
|
||||
bool,
|
||||
std::string,
|
||||
std::string,
|
||||
std::function<bool(std::string)>>>
|
||||
testcases{
|
||||
{false,
|
||||
false,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string val) { return val > "2000" && val < "3000"; }},
|
||||
{false,
|
||||
true,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string val) { return val > "2000" && val <= "3000"; }},
|
||||
{true,
|
||||
false,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string val) { return val >= "2000" && val < "3000"; }},
|
||||
{true,
|
||||
true,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string val) { return val >= "2000" && val <= "3000"; }},
|
||||
{true, true, "2000", "1000", [](std::string val) { return false; }},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema);
|
||||
int N = 1000;
|
||||
|
@ -477,13 +603,20 @@ TEST(StringExpr, BinaryRange) {
|
|||
auto end = new_str_col->scalars().string_data().data().end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] : testcases) {
|
||||
auto plan_proto = gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] :
|
||||
testcases) {
|
||||
auto plan_proto =
|
||||
gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
@ -493,8 +626,9 @@ TEST(StringExpr, BinaryRange) {
|
|||
|
||||
auto val = str_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i
|
||||
<< "!!" << val;
|
||||
ASSERT_EQ(ans, ref)
|
||||
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
||||
<< "@" << ub << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -512,20 +646,27 @@ TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
|
|||
auto round_decimal = -1;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
||||
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||
auto str_col =
|
||||
dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||
auto query_ptr = vec_col.data();
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->disable_small_index(); // brute-force search.
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto plan_proto = GenAlwaysTruePlan(fvec_meta, str_meta);
|
||||
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
auto num_queries = 5;
|
||||
auto topk = 10;
|
||||
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
Timestamp time = MAX_TIMESTAMP;
|
||||
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
|
@ -538,19 +679,25 @@ TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
|
|||
dim, //
|
||||
query_ptr //
|
||||
};
|
||||
auto sub_result = BruteForceSearch(search_dataset, vec_col.data(), N, knowhere::Json(), nullptr);
|
||||
auto sub_result = BruteForceSearch(
|
||||
search_dataset, vec_col.data(), N, knowhere::Json(), nullptr);
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), time);
|
||||
segment->FillPrimaryKeys(plan.get(), *sr);
|
||||
segment->FillTargetEntry(plan.get(), *sr);
|
||||
ASSERT_EQ(sr->pk_type_, DataType::VARCHAR);
|
||||
ASSERT_TRUE(sr->output_fields_data_.find(str_meta.get_id()) != sr->output_fields_data_.end());
|
||||
auto retrieved_str_col = sr->output_fields_data_[str_meta.get_id()]->scalars().string_data().data();
|
||||
ASSERT_TRUE(sr->output_fields_data_.find(str_meta.get_id()) !=
|
||||
sr->output_fields_data_.end());
|
||||
auto retrieved_str_col = sr->output_fields_data_[str_meta.get_id()]
|
||||
->scalars()
|
||||
.string_data()
|
||||
.data();
|
||||
for (auto q = 0; q < num_queries; q++) {
|
||||
for (auto k = 0; k < topk; k++) {
|
||||
auto offset = q * topk + k;
|
||||
auto seg_offset = sub_result.get_seg_offsets()[offset];
|
||||
ASSERT_EQ(std::get<std::string>(sr->primary_keys_[offset]), str_col[seg_offset]);
|
||||
ASSERT_EQ(std::get<std::string>(sr->primary_keys_[offset]),
|
||||
str_col[seg_offset]);
|
||||
ASSERT_EQ(retrieved_str_col[offset], str_col[seg_offset]);
|
||||
}
|
||||
}
|
||||
|
@ -567,11 +714,16 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
|||
auto N = 100000;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
||||
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||
auto str_col =
|
||||
dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->disable_small_index(); // brute-force search.
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto expr_proto = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
||||
auto plan_proto = GenPlanNode();
|
||||
|
@ -585,5 +737,6 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
|||
ASSERT_EQ(retrieved->ids().str_id().data().size(), N);
|
||||
ASSERT_EQ(retrieved->offset().size(), N);
|
||||
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
||||
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(), N);
|
||||
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(),
|
||||
N);
|
||||
}
|
||||
|
|
|
@ -111,7 +111,8 @@ TEST_F(StringIndexMarisaTest, Range) {
|
|||
TEST_F(StringIndexMarisaTest, Reverse) {
|
||||
auto index_types = GetIndexTypes<std::string>();
|
||||
for (const auto& index_type : index_types) {
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex<std::string>(index_type);
|
||||
auto index = milvus::index::IndexFactory::GetInstance()
|
||||
.CreateScalarIndex<std::string>(index_type);
|
||||
index->Build(nb, strs.data());
|
||||
assert_reverse<std::string>(index.get(), strs);
|
||||
}
|
||||
|
@ -135,21 +136,24 @@ TEST_F(StringIndexMarisaTest, Query) {
|
|||
|
||||
{
|
||||
auto ds = knowhere::GenDataSet(strs.size(), 8, strs.data());
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::In);
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
|
||||
milvus::OpType::In);
|
||||
auto bitset = index->Query(ds);
|
||||
ASSERT_TRUE(bitset->any());
|
||||
}
|
||||
|
||||
{
|
||||
auto ds = knowhere::GenDataSet(strs.size(), 8, strs.data());
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::NotIn);
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
|
||||
milvus::OpType::NotIn);
|
||||
auto bitset = index->Query(ds);
|
||||
ASSERT_TRUE(bitset->none());
|
||||
}
|
||||
|
||||
{
|
||||
auto ds = std::make_shared<knowhere::DataSet>();
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::GreaterEqual);
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
|
||||
milvus::OpType::GreaterEqual);
|
||||
ds->Set<std::string>(milvus::index::RANGE_VALUE, "0");
|
||||
auto bitset = index->Query(ds);
|
||||
ASSERT_EQ(bitset->size(), strs.size());
|
||||
|
@ -158,7 +162,8 @@ TEST_F(StringIndexMarisaTest, Query) {
|
|||
|
||||
{
|
||||
auto ds = std::make_shared<knowhere::DataSet>();
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::Range);
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
|
||||
milvus::OpType::Range);
|
||||
ds->Set<std::string>(milvus::index::LOWER_BOUND_VALUE, "0");
|
||||
ds->Set<std::string>(milvus::index::UPPER_BOUND_VALUE, "range");
|
||||
ds->Set<bool>(milvus::index::LOWER_BOUND_INCLUSIVE, true);
|
||||
|
@ -170,8 +175,10 @@ TEST_F(StringIndexMarisaTest, Query) {
|
|||
{
|
||||
for (size_t i = 0; i < strs.size(); i++) {
|
||||
auto ds = std::make_shared<knowhere::DataSet>();
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE, milvus::OpType::PrefixMatch);
|
||||
ds->Set<std::string>(milvus::index::PREFIX_VALUE, std::move(strs[i]));
|
||||
ds->Set<milvus::OpType>(milvus::index::OPERATOR_TYPE,
|
||||
milvus::OpType::PrefixMatch);
|
||||
ds->Set<std::string>(milvus::index::PREFIX_VALUE,
|
||||
std::move(strs[i]));
|
||||
auto bitset = index->Query(ds);
|
||||
ASSERT_EQ(bitset->size(), strs.size());
|
||||
ASSERT_TRUE(bitset->test(i));
|
||||
|
@ -255,7 +262,8 @@ TEST_F(StringIndexMarisaTest, Codec) {
|
|||
}
|
||||
|
||||
TEST_F(StringIndexMarisaTest, BaseIndexCodec) {
|
||||
milvus::index::IndexBasePtr index = milvus::index::CreateStringIndexMarisa();
|
||||
milvus::index::IndexBasePtr index =
|
||||
milvus::index::CreateStringIndexMarisa();
|
||||
std::vector<std::string> strings(nb);
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
strings[i] = std::to_string(std::rand() % 10);
|
||||
|
|
|
@ -20,14 +20,22 @@ using namespace milvus::segcore;
|
|||
TEST(TimestampIndex, Naive) {
|
||||
SUCCEED();
|
||||
std::vector<Timestamp> timestamps{
|
||||
1, 2, 14, 11, 13, 22, 21, 20,
|
||||
1,
|
||||
2,
|
||||
14,
|
||||
11,
|
||||
13,
|
||||
22,
|
||||
21,
|
||||
20,
|
||||
};
|
||||
std::vector<int64_t> lengths = {2, 3, 3};
|
||||
TimestampIndex index;
|
||||
index.set_length_meta(lengths);
|
||||
index.build_with(timestamps.data(), timestamps.size());
|
||||
|
||||
auto guessed_slice = GenerateFakeSlices(timestamps.data(), timestamps.size(), 2);
|
||||
auto guessed_slice =
|
||||
GenerateFakeSlices(timestamps.data(), timestamps.size(), 2);
|
||||
ASSERT_EQ(guessed_slice.size(), lengths.size());
|
||||
for (auto i = 0; i < lengths.size(); ++i) {
|
||||
ASSERT_EQ(guessed_slice[i], lengths[i]);
|
||||
|
|
|
@ -21,12 +21,16 @@ TEST(Util, StringMatch) {
|
|||
using namespace milvus::query;
|
||||
|
||||
ASSERT_ANY_THROW(Match(1, 2, OpType::PrefixMatch));
|
||||
ASSERT_ANY_THROW(Match(std::string("not_match_operation"), std::string("not_match"), OpType::LessEqual));
|
||||
ASSERT_ANY_THROW(Match(std::string("not_match_operation"),
|
||||
std::string("not_match"),
|
||||
OpType::LessEqual));
|
||||
|
||||
ASSERT_TRUE(PrefixMatch("prefix1", "prefix"));
|
||||
ASSERT_TRUE(PostfixMatch("1postfix", "postfix"));
|
||||
ASSERT_TRUE(Match(std::string("prefix1"), std::string("prefix"), OpType::PrefixMatch));
|
||||
ASSERT_TRUE(Match(std::string("1postfix"), std::string("postfix"), OpType::PostfixMatch));
|
||||
ASSERT_TRUE(Match(
|
||||
std::string("prefix1"), std::string("prefix"), OpType::PrefixMatch));
|
||||
ASSERT_TRUE(Match(
|
||||
std::string("1postfix"), std::string("postfix"), OpType::PostfixMatch));
|
||||
|
||||
ASSERT_FALSE(PrefixMatch("", "longer"));
|
||||
ASSERT_FALSE(PostfixMatch("", "longer"));
|
||||
|
@ -41,7 +45,8 @@ TEST(Util, GetDeleteBitmap) {
|
|||
using namespace milvus::segcore;
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
auto N = 10;
|
||||
|
@ -74,7 +79,11 @@ TEST(Util, GetDeleteBitmap) {
|
|||
auto query_timestamp = tss[N - 1];
|
||||
auto del_barrier = get_barrier(delete_record, query_timestamp);
|
||||
auto insert_barrier = get_barrier(insert_record, query_timestamp);
|
||||
auto res_bitmap = get_deleted_bitmap(del_barrier, insert_barrier, delete_record, insert_record, query_timestamp);
|
||||
auto res_bitmap = get_deleted_bitmap(del_barrier,
|
||||
insert_barrier,
|
||||
delete_record,
|
||||
insert_record,
|
||||
query_timestamp);
|
||||
ASSERT_EQ(res_bitmap->bitmap_ptr->count(), 0);
|
||||
|
||||
// test case insert repeated pk1 (ts = {1 ... N}) -> delete pk1 (ts = N) -> query (ts = N)
|
||||
|
@ -86,12 +95,17 @@ TEST(Util, GetDeleteBitmap) {
|
|||
delete_record.ack_responder_.AddSegment(offset, offset + 1);
|
||||
|
||||
del_barrier = get_barrier(delete_record, query_timestamp);
|
||||
res_bitmap = get_deleted_bitmap(del_barrier, insert_barrier, delete_record, insert_record, query_timestamp);
|
||||
res_bitmap = get_deleted_bitmap(del_barrier,
|
||||
insert_barrier,
|
||||
delete_record,
|
||||
insert_record,
|
||||
query_timestamp);
|
||||
ASSERT_EQ(res_bitmap->bitmap_ptr->count(), N - 1);
|
||||
|
||||
// test case insert repeated pk1 (ts = {1 ... N}) -> delete pk1 (ts = N) -> query (ts = N/2)
|
||||
query_timestamp = tss[N - 1] / 2;
|
||||
del_barrier = get_barrier(delete_record, query_timestamp);
|
||||
res_bitmap = get_deleted_bitmap(del_barrier, N, delete_record, insert_record, query_timestamp);
|
||||
res_bitmap = get_deleted_bitmap(
|
||||
del_barrier, N, delete_record, insert_record, query_timestamp);
|
||||
ASSERT_EQ(res_bitmap->bitmap_ptr->count(), 0);
|
||||
}
|
||||
|
|
|
@ -34,7 +34,8 @@ compare_double(double x, double y, double epsilon = 0.000001f) {
|
|||
}
|
||||
|
||||
inline void
|
||||
assert_order(const milvus::SearchResult& result, const knowhere::MetricType& metric_type) {
|
||||
assert_order(const milvus::SearchResult& result,
|
||||
const knowhere::MetricType& metric_type) {
|
||||
bool dsc = milvus::PositivelyRelated(metric_type);
|
||||
auto& ids = result.seg_offsets_;
|
||||
auto& dist = result.distances_;
|
||||
|
@ -143,7 +144,8 @@ assert_reverse(ScalarIndex<double>* index, const std::vector<double>& arr) {
|
|||
|
||||
template <>
|
||||
inline void
|
||||
assert_reverse(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
|
||||
assert_reverse(ScalarIndex<std::string>* index,
|
||||
const std::vector<std::string>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_TRUE(arr[offset].compare(index->Reverse_Lookup(offset)) == 0);
|
||||
}
|
||||
|
@ -151,7 +153,8 @@ assert_reverse(ScalarIndex<std::string>* index, const std::vector<std::string>&
|
|||
|
||||
template <>
|
||||
inline void
|
||||
assert_in(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
|
||||
assert_in(ScalarIndex<std::string>* index,
|
||||
const std::vector<std::string>& arr) {
|
||||
auto bitset1 = index->In(arr.size(), arr.data());
|
||||
ASSERT_EQ(arr.size(), bitset1->size());
|
||||
ASSERT_TRUE(bitset1->any());
|
||||
|
@ -159,7 +162,8 @@ assert_in(ScalarIndex<std::string>* index, const std::vector<std::string>& arr)
|
|||
|
||||
template <>
|
||||
inline void
|
||||
assert_not_in(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
|
||||
assert_not_in(ScalarIndex<std::string>* index,
|
||||
const std::vector<std::string>& arr) {
|
||||
auto bitset1 = index->NotIn(arr.size(), arr.data());
|
||||
ASSERT_EQ(arr.size(), bitset1->size());
|
||||
ASSERT_TRUE(bitset1->none());
|
||||
|
@ -167,7 +171,8 @@ assert_not_in(ScalarIndex<std::string>* index, const std::vector<std::string>& a
|
|||
|
||||
template <>
|
||||
inline void
|
||||
assert_range(ScalarIndex<std::string>* index, const std::vector<std::string>& arr) {
|
||||
assert_range(ScalarIndex<std::string>* index,
|
||||
const std::vector<std::string>& arr) {
|
||||
auto test_min = arr[0];
|
||||
auto test_max = arr[arr.size() - 1];
|
||||
|
||||
|
|
|
@ -92,12 +92,17 @@ struct GeneratedData {
|
|||
int len = raw_->num_rows() * field_meta.get_dim();
|
||||
ret.resize(len);
|
||||
auto src_data =
|
||||
reinterpret_cast<const T*>(target_field_data.vectors().float_vector().data().data());
|
||||
reinterpret_cast<const T*>(target_field_data.vectors()
|
||||
.float_vector()
|
||||
.data()
|
||||
.data());
|
||||
std::copy_n(src_data, len, ret.data());
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
} else if (field_meta.get_data_type() ==
|
||||
DataType::VECTOR_BINARY) {
|
||||
int len = raw_->num_rows() * (field_meta.get_dim() / 8);
|
||||
ret.resize(len);
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.vectors().binary_vector().data());
|
||||
auto src_data = reinterpret_cast<const T*>(
|
||||
target_field_data.vectors().binary_vector().data());
|
||||
std::copy_n(src_data, len, ret.data());
|
||||
} else {
|
||||
PanicInfo("unsupported");
|
||||
|
@ -107,36 +112,44 @@ struct GeneratedData {
|
|||
}
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().bool_data().data().data());
|
||||
auto src_data = reinterpret_cast<const T*>(
|
||||
target_field_data.scalars().bool_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32: {
|
||||
auto src_data =
|
||||
reinterpret_cast<const int32_t*>(target_field_data.scalars().int_data().data().data());
|
||||
auto src_data = reinterpret_cast<const int32_t*>(
|
||||
target_field_data.scalars().int_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().long_data().data().data());
|
||||
auto src_data = reinterpret_cast<const T*>(
|
||||
target_field_data.scalars().long_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().float_data().data().data());
|
||||
auto src_data = reinterpret_cast<const T*>(
|
||||
target_field_data.scalars().float_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().double_data().data().data());
|
||||
auto src_data =
|
||||
reinterpret_cast<const T*>(target_field_data.scalars()
|
||||
.double_data()
|
||||
.data()
|
||||
.data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto ret_data = reinterpret_cast<std::string*>(ret.data());
|
||||
auto src_data = target_field_data.scalars().string_data().data();
|
||||
auto src_data =
|
||||
target_field_data.scalars().string_data().data();
|
||||
std::copy(src_data.begin(), src_data.end(), ret_data);
|
||||
|
||||
break;
|
||||
|
@ -163,19 +176,29 @@ struct GeneratedData {
|
|||
private:
|
||||
GeneratedData() = default;
|
||||
friend GeneratedData
|
||||
DataGen(SchemaPtr schema, int64_t N, uint64_t seed, uint64_t ts_offset, int repeat_count);
|
||||
DataGen(SchemaPtr schema,
|
||||
int64_t N,
|
||||
uint64_t seed,
|
||||
uint64_t ts_offset,
|
||||
int repeat_count);
|
||||
};
|
||||
|
||||
inline GeneratedData
|
||||
DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0, int repeat_count = 1) {
|
||||
DataGen(SchemaPtr schema,
|
||||
int64_t N,
|
||||
uint64_t seed = 42,
|
||||
uint64_t ts_offset = 0,
|
||||
int repeat_count = 1) {
|
||||
using std::vector;
|
||||
std::default_random_engine er(seed);
|
||||
std::normal_distribution<> distr(0, 1);
|
||||
int offset = 0;
|
||||
|
||||
auto insert_data = std::make_unique<InsertData>();
|
||||
auto insert_cols = [&insert_data](auto& data, int64_t count, auto& field_meta) {
|
||||
auto array = milvus::segcore::CreateDataArrayFrom(data.data(), count, field_meta);
|
||||
auto insert_cols = [&insert_data](
|
||||
auto& data, int64_t count, auto& field_meta) {
|
||||
auto array = milvus::segcore::CreateDataArrayFrom(
|
||||
data.data(), count, field_meta);
|
||||
insert_data->mutable_fields_data()->AddAllocated(array.release());
|
||||
};
|
||||
|
||||
|
@ -185,7 +208,8 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0,
|
|||
case DataType::VECTOR_FLOAT: {
|
||||
auto dim = field_meta.get_dim();
|
||||
vector<float> final(dim * N);
|
||||
bool is_ip = starts_with(field_meta.get_name().get(), "normalized");
|
||||
bool is_ip =
|
||||
starts_with(field_meta.get_name().get(), "normalized");
|
||||
#pragma omp parallel for
|
||||
for (int n = 0; n < N; ++n) {
|
||||
vector<float> data(dim);
|
||||
|
@ -204,7 +228,8 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42, uint64_t ts_offset = 0,
|
|||
}
|
||||
}
|
||||
|
||||
std::copy(data.begin(), data.end(), final.begin() + dim * n);
|
||||
std::copy(
|
||||
data.begin(), data.end(), final.begin() + dim * n);
|
||||
}
|
||||
insert_cols(final, N, field_meta);
|
||||
break;
|
||||
|
@ -318,7 +343,9 @@ CreatePlaceholderGroup(int64_t num_queries, int dim, int64_t seed = 42) {
|
|||
}
|
||||
|
||||
inline auto
|
||||
CreatePlaceholderGroup(int64_t num_queries, int dim, const std::vector<float>& vecs) {
|
||||
CreatePlaceholderGroup(int64_t num_queries,
|
||||
int dim,
|
||||
const std::vector<float>& vecs) {
|
||||
namespace ser = milvus::proto::common;
|
||||
ser::PlaceholderGroup raw_group;
|
||||
auto value = raw_group.add_placeholders();
|
||||
|
@ -355,7 +382,9 @@ CreatePlaceholderGroupFromBlob(int64_t num_queries, int dim, const float* src) {
|
|||
}
|
||||
|
||||
inline auto
|
||||
CreateBinaryPlaceholderGroup(int64_t num_queries, int64_t dim, int64_t seed = 42) {
|
||||
CreateBinaryPlaceholderGroup(int64_t num_queries,
|
||||
int64_t dim,
|
||||
int64_t seed = 42) {
|
||||
assert(dim % 8 == 0);
|
||||
namespace ser = milvus::proto::common;
|
||||
ser::PlaceholderGroup raw_group;
|
||||
|
@ -375,7 +404,9 @@ CreateBinaryPlaceholderGroup(int64_t num_queries, int64_t dim, int64_t seed = 42
|
|||
}
|
||||
|
||||
inline auto
|
||||
CreateBinaryPlaceholderGroupFromBlob(int64_t num_queries, int64_t dim, const uint8_t* ptr) {
|
||||
CreateBinaryPlaceholderGroupFromBlob(int64_t num_queries,
|
||||
int64_t dim,
|
||||
const uint8_t* ptr) {
|
||||
assert(dim % 8 == 0);
|
||||
namespace ser = milvus::proto::common;
|
||||
ser::PlaceholderGroup raw_group;
|
||||
|
@ -402,7 +433,8 @@ SearchResultToVector(const SearchResult& sr) {
|
|||
for (int q = 0; q < num_queries; ++q) {
|
||||
for (int k = 0; k < topk; ++k) {
|
||||
int index = q * topk + k;
|
||||
result.emplace_back(std::make_pair(sr.seg_offsets_[index], sr.distances_[index]));
|
||||
result.emplace_back(
|
||||
std::make_pair(sr.seg_offsets_[index], sr.distances_[index]));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
@ -417,7 +449,8 @@ SearchResultToJson(const SearchResult& sr) {
|
|||
std::vector<std::string> result;
|
||||
for (int k = 0; k < topk; ++k) {
|
||||
int index = q * topk + k;
|
||||
result.emplace_back(std::to_string(sr.seg_offsets_[index]) + "->" + std::to_string(sr.distances_[index]));
|
||||
result.emplace_back(std::to_string(sr.seg_offsets_[index]) + "->" +
|
||||
std::to_string(sr.distances_[index]));
|
||||
}
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
|
@ -433,7 +466,8 @@ SealedLoadFieldData(const GeneratedData& dataset,
|
|||
{
|
||||
LoadFieldDataInfo info;
|
||||
FieldMeta field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), row_count, field_meta);
|
||||
auto array = CreateScalarDataArrayFrom(
|
||||
dataset.row_ids_.data(), row_count, field_meta);
|
||||
info.field_data = array.get();
|
||||
info.row_count = dataset.row_ids_.size();
|
||||
info.field_id = RowFieldID.get(); // field id for RowId
|
||||
|
@ -441,8 +475,10 @@ SealedLoadFieldData(const GeneratedData& dataset,
|
|||
}
|
||||
{
|
||||
LoadFieldDataInfo info;
|
||||
FieldMeta field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), row_count, field_meta);
|
||||
FieldMeta field_meta(
|
||||
FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(
|
||||
dataset.timestamps_.data(), row_count, field_meta);
|
||||
info.field_data = array.get();
|
||||
info.row_count = dataset.timestamps_.size();
|
||||
info.field_id = TimestampFieldID.get();
|
||||
|
@ -474,13 +510,16 @@ SealedCreator(SchemaPtr schema, const GeneratedData& dataset) {
|
|||
inline std::unique_ptr<milvus::index::VectorIndex>
|
||||
GenVecIndexing(int64_t N, int64_t dim, const float* vec) {
|
||||
// {knowhere::IndexParams::nprobe, 10},
|
||||
auto conf = knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "1024"},
|
||||
{knowhere::meta::DEVICE_ID, 0}};
|
||||
auto conf =
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "1024"},
|
||||
{knowhere::meta::DEVICE_ID, 0}};
|
||||
auto database = knowhere::GenDataSet(N, dim, vec);
|
||||
auto indexing = std::make_unique<index::VectorMemNMIndex>(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
knowhere::metric::L2, IndexMode::MODE_CPU);
|
||||
auto indexing = std::make_unique<index::VectorMemNMIndex>(
|
||||
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
knowhere::metric::L2,
|
||||
IndexMode::MODE_CPU);
|
||||
indexing->BuildWithDataset(database, conf);
|
||||
return indexing;
|
||||
}
|
||||
|
@ -502,7 +541,8 @@ GenScalarIndexing(int64_t N, const T* data) {
|
|||
inline std::vector<char>
|
||||
translate_text_plan_to_binary_plan(const char* text_plan) {
|
||||
proto::plan::PlanNode plan_node;
|
||||
auto ok = google::protobuf::TextFormat::ParseFromString(text_plan, &plan_node);
|
||||
auto ok =
|
||||
google::protobuf::TextFormat::ParseFromString(text_plan, &plan_node);
|
||||
AssertInfo(ok, "Failed to parse");
|
||||
|
||||
std::string binary_plan;
|
||||
|
|
|
@ -92,7 +92,15 @@ get_default_storage_config() {
|
|||
auto iamEndPoint = minioConfig["iamEndpoint"].as<std::string>();
|
||||
auto bucketName = minioConfig["bucketName"].as<std::string>();
|
||||
|
||||
return StorageConfig{endpoint, bucketName, accessKey, accessValue, rootPath, "minio", iamEndPoint, useSSL, useIam};
|
||||
return StorageConfig{endpoint,
|
||||
bucketName,
|
||||
accessKey,
|
||||
accessValue,
|
||||
rootPath,
|
||||
"minio",
|
||||
iamEndPoint,
|
||||
useSSL,
|
||||
useIam};
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -173,7 +181,8 @@ class TestConfigWrapper {
|
|||
strcpy(const_cast<char*>(config_.address), address.c_str());
|
||||
strcpy(const_cast<char*>(config_.bucket_name), bucketName.c_str());
|
||||
strcpy(const_cast<char*>(config_.access_key_id), accessKey.c_str());
|
||||
strcpy(const_cast<char*>(config_.access_key_value), accessValue.c_str());
|
||||
strcpy(const_cast<char*>(config_.access_key_value),
|
||||
accessValue.c_str());
|
||||
strcpy(const_cast<char*>(config_.remote_root_path), rootPath.c_str());
|
||||
strcpy(const_cast<char*>(config_.storage_type), storage_type.c_str());
|
||||
strcpy(const_cast<char*>(config_.iam_endpoint), iamEndPoint.c_str());
|
||||
|
@ -190,7 +199,8 @@ get_default_cstorage_config() {
|
|||
}
|
||||
|
||||
auto
|
||||
generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type) {
|
||||
generate_build_conf(const milvus::IndexType& index_type,
|
||||
const milvus::MetricType& metric_type) {
|
||||
if (index_type == knowhere::IndexEnum::INDEX_FAISS_IDMAP) {
|
||||
return knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
|
@ -198,8 +208,10 @@ generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricTyp
|
|||
};
|
||||
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
|
||||
return knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type}, {knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{knowhere::indexparam::NLIST, "16"}, {knowhere::indexparam::M, "4"},
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{knowhere::indexparam::NLIST, "16"},
|
||||
{knowhere::indexparam::M, "4"},
|
||||
{knowhere::indexparam::NBITS, "8"},
|
||||
};
|
||||
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) {
|
||||
|
@ -252,12 +264,15 @@ generate_build_conf(const milvus::IndexType& index_type, const milvus::MetricTyp
|
|||
}
|
||||
|
||||
auto
|
||||
generate_load_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type, int64_t nb) {
|
||||
generate_load_conf(const milvus::IndexType& index_type,
|
||||
const milvus::MetricType& metric_type,
|
||||
int64_t nb) {
|
||||
if (index_type == knowhere::IndexEnum::INDEX_DISKANN) {
|
||||
return knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET, std::to_string(0.0002)},
|
||||
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET,
|
||||
std::to_string(0.0002)},
|
||||
};
|
||||
}
|
||||
return knowhere::Json();
|
||||
|
@ -275,12 +290,14 @@ search_with_nprobe_list() {
|
|||
}
|
||||
|
||||
auto
|
||||
generate_search_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type) {
|
||||
generate_search_conf(const milvus::IndexType& index_type,
|
||||
const milvus::MetricType& metric_type) {
|
||||
auto conf = milvus::Config{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
};
|
||||
|
||||
if (milvus::index::is_in_list<milvus::IndexType>(index_type, search_with_nprobe_list)) {
|
||||
if (milvus::index::is_in_list<milvus::IndexType>(index_type,
|
||||
search_with_nprobe_list)) {
|
||||
conf[knowhere::indexparam::NPROBE] = 4;
|
||||
} else if (index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
||||
conf[knowhere::indexparam::EF] = 200;
|
||||
|
@ -293,7 +310,8 @@ generate_search_conf(const milvus::IndexType& index_type, const milvus::MetricTy
|
|||
}
|
||||
|
||||
auto
|
||||
generate_range_search_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type) {
|
||||
generate_range_search_conf(const milvus::IndexType& index_type,
|
||||
const milvus::MetricType& metric_type) {
|
||||
auto conf = milvus::Config{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
};
|
||||
|
@ -306,7 +324,8 @@ generate_range_search_conf(const milvus::IndexType& index_type, const milvus::Me
|
|||
conf[knowhere::meta::RANGE_FILTER] = 0.1;
|
||||
}
|
||||
|
||||
if (milvus::index::is_in_list<milvus::IndexType>(index_type, search_with_nprobe_list)) {
|
||||
if (milvus::index::is_in_list<milvus::IndexType>(index_type,
|
||||
search_with_nprobe_list)) {
|
||||
conf[knowhere::indexparam::NPROBE] = 4;
|
||||
} else if (index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
||||
conf[knowhere::indexparam::EF] = 200;
|
||||
|
@ -319,7 +338,8 @@ generate_range_search_conf(const milvus::IndexType& index_type, const milvus::Me
|
|||
}
|
||||
|
||||
auto
|
||||
generate_params(const knowhere::IndexType& index_type, const knowhere::MetricType& metric_type) {
|
||||
generate_params(const knowhere::IndexType& index_type,
|
||||
const knowhere::MetricType& metric_type) {
|
||||
namespace indexcgo = milvus::proto::indexcgo;
|
||||
|
||||
indexcgo::TypeParams type_params;
|
||||
|
@ -328,7 +348,8 @@ generate_params(const knowhere::IndexType& index_type, const knowhere::MetricTyp
|
|||
auto configs = generate_build_conf(index_type, metric_type);
|
||||
for (auto& [key, value] : configs.items()) {
|
||||
auto param = index_params.add_params();
|
||||
auto value_str = value.is_string() ? value.get<std::string>() : value.dump();
|
||||
auto value_str =
|
||||
value.is_string() ? value.get<std::string>() : value.dump();
|
||||
param->set_key(key);
|
||||
param->set_value(value_str);
|
||||
}
|
||||
|
@ -341,13 +362,18 @@ generate_params(const knowhere::IndexType& index_type, const knowhere::MetricTyp
|
|||
}
|
||||
|
||||
auto
|
||||
GenDataset(int64_t N, const knowhere::MetricType& metric_type, bool is_binary, int64_t dim = DIM) {
|
||||
GenDataset(int64_t N,
|
||||
const knowhere::MetricType& metric_type,
|
||||
bool is_binary,
|
||||
int64_t dim = DIM) {
|
||||
auto schema = std::make_shared<milvus::Schema>();
|
||||
if (!is_binary) {
|
||||
schema->AddDebugField("fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
schema->AddDebugField(
|
||||
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
} else {
|
||||
schema->AddDebugField("fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
|
||||
schema->AddDebugField(
|
||||
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
}
|
||||
}
|
||||
|
@ -408,15 +434,22 @@ Jaccard(const uint8_t* point_a, const uint8_t* point_b, int dim) {
|
|||
}
|
||||
|
||||
float
|
||||
CountDistance(
|
||||
const void* point_a, const void* point_b, int dim, const knowhere::MetricType& metric, bool is_binary = false) {
|
||||
CountDistance(const void* point_a,
|
||||
const void* point_b,
|
||||
int dim,
|
||||
const knowhere::MetricType& metric,
|
||||
bool is_binary = false) {
|
||||
if (point_a == nullptr || point_b == nullptr) {
|
||||
return std::numeric_limits<float>::max();
|
||||
}
|
||||
if (milvus::IsMetricType(metric, knowhere::metric::L2)) {
|
||||
return L2(static_cast<const float*>(point_a), static_cast<const float*>(point_b), dim);
|
||||
return L2(static_cast<const float*>(point_a),
|
||||
static_cast<const float*>(point_b),
|
||||
dim);
|
||||
} else if (milvus::IsMetricType(metric, knowhere::metric::JACCARD)) {
|
||||
return Jaccard(static_cast<const uint8_t*>(point_a), static_cast<const uint8_t*>(point_b), dim);
|
||||
return Jaccard(static_cast<const uint8_t*>(point_a),
|
||||
static_cast<const uint8_t*>(point_b),
|
||||
dim);
|
||||
} else {
|
||||
return std::numeric_limits<float>::max();
|
||||
}
|
||||
|
@ -437,7 +470,8 @@ CheckDistances(const QueryResultPtr& result,
|
|||
for (auto j = 0; j < k; ++j) {
|
||||
auto dis = result->distances_[i * k + j];
|
||||
auto id = result->seg_offsets_[i * k + j];
|
||||
auto count_dis = CountDistance(query_vecs + i * dim, base_vecs + id * dim, dim, metric);
|
||||
auto count_dis = CountDistance(
|
||||
query_vecs + i * dim, base_vecs + id * dim, dim, metric);
|
||||
// assert(std::abs(dis - count_dis) < threshold);
|
||||
}
|
||||
}
|
||||
|
@ -472,7 +506,9 @@ generate_index_params(const MapParams& m) {
|
|||
}
|
||||
|
||||
// TODO: std::is_arithmetic_v, hard to compare float point value. std::is_integral_v.
|
||||
template <typename T, typename = typename std::enable_if_t<std::is_arithmetic_v<T> || std::is_same_v<T, std::string>>>
|
||||
template <typename T,
|
||||
typename = typename std::enable_if_t<std::is_arithmetic_v<T> ||
|
||||
std::is_same_v<T, std::string>>>
|
||||
inline std::vector<T>
|
||||
GenArr(int64_t n) {
|
||||
auto max_i8 = std::numeric_limits<int8_t>::max() - 1;
|
||||
|
@ -507,7 +543,8 @@ GenArr<std::string>(int64_t n) {
|
|||
std::vector<ScalarTestParams>
|
||||
GenBoolParams() {
|
||||
std::vector<ScalarTestParams> ret;
|
||||
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
|
||||
ret.emplace_back(
|
||||
ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
|
||||
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "flat"}}));
|
||||
return ret;
|
||||
}
|
||||
|
@ -519,7 +556,9 @@ GenStringParams() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
template <typename T, typename = typename std::enable_if_t<std::is_arithmetic_v<T> | std::is_same_v<std::string, T>>>
|
||||
template <typename T,
|
||||
typename = typename std::enable_if_t<std::is_arithmetic_v<T> |
|
||||
std::is_same_v<std::string, T>>>
|
||||
inline std::vector<ScalarTestParams>
|
||||
GenParams() {
|
||||
if (std::is_same_v<std::string, T>) {
|
||||
|
@ -531,7 +570,8 @@ GenParams() {
|
|||
}
|
||||
|
||||
std::vector<ScalarTestParams> ret;
|
||||
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
|
||||
ret.emplace_back(
|
||||
ScalarTestParams(MapParams(), {{"index_type", "inverted_index"}}));
|
||||
ret.emplace_back(ScalarTestParams(MapParams(), {{"index_type", "flat"}}));
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue