2020-11-24 13:28:38 +00:00
|
|
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
|
|
|
|
2023-12-05 08:48:54 +00:00
|
|
|
#include <arrow/record_batch.h>
|
|
|
|
#include <arrow/type_fwd.h>
|
2020-09-15 02:00:00 +00:00
|
|
|
#include <gtest/gtest.h>
|
|
|
|
|
2023-12-05 08:48:54 +00:00
|
|
|
#include <boost/filesystem/operations.hpp>
|
2020-09-15 02:00:00 +00:00
|
|
|
#include <iostream>
|
2023-12-05 08:48:54 +00:00
|
|
|
#include <memory>
|
2020-09-15 02:00:00 +00:00
|
|
|
#include <random>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
2023-12-05 08:48:54 +00:00
|
|
|
#include "arrow/type.h"
|
2023-09-11 12:43:17 +00:00
|
|
|
#include "common/EasyAssert.h"
|
2024-01-10 13:58:51 +00:00
|
|
|
#include "common/Tracer.h"
|
2023-11-10 07:44:22 +00:00
|
|
|
#include "common/Types.h"
|
2023-12-05 08:48:54 +00:00
|
|
|
#include "index/Index.h"
|
2023-08-10 05:59:15 +00:00
|
|
|
#include "knowhere/comp/index_param.h"
|
2023-09-11 12:43:17 +00:00
|
|
|
#include "nlohmann/json.hpp"
|
2021-10-13 03:00:39 +00:00
|
|
|
#include "query/SearchBruteForce.h"
|
2020-11-18 02:07:05 +00:00
|
|
|
#include "segcore/Reduce.h"
|
2022-09-21 12:16:51 +00:00
|
|
|
#include "index/IndexFactory.h"
|
|
|
|
#include "common/QueryResult.h"
|
2023-08-10 05:59:15 +00:00
|
|
|
#include "segcore/Types.h"
|
2023-12-05 08:48:54 +00:00
|
|
|
#include "storage/options.h"
|
2022-09-21 12:16:51 +00:00
|
|
|
#include "test_utils/indexbuilder_test_utils.h"
|
2023-08-13 12:41:31 +00:00
|
|
|
#include "test_utils/storage_test_utils.h"
|
2020-11-30 14:14:19 +00:00
|
|
|
#include "test_utils/DataGen.h"
|
2021-10-13 03:00:39 +00:00
|
|
|
#include "test_utils/Timer.h"
|
2023-06-25 06:38:44 +00:00
|
|
|
#include "storage/Util.h"
|
2023-12-05 08:48:54 +00:00
|
|
|
#include <boost/filesystem.hpp>
|
2022-09-21 12:16:51 +00:00
|
|
|
|
2020-09-15 02:00:00 +00:00
|
|
|
using namespace milvus;
|
2021-10-13 03:00:39 +00:00
|
|
|
using namespace milvus::segcore;
|
2020-09-15 02:00:00 +00:00
|
|
|
|
|
|
|
namespace {
|
2020-10-23 10:01:24 +00:00
|
|
|
template <int DIM>
|
|
|
|
auto
|
|
|
|
generate_data(int N) {
|
|
|
|
std::vector<float> raw_data;
|
2020-09-15 09:41:05 +00:00
|
|
|
std::vector<uint64_t> timestamps;
|
|
|
|
std::vector<int64_t> uids;
|
|
|
|
std::default_random_engine er(42);
|
|
|
|
std::uniform_real_distribution<> distribution(0.0, 1.0);
|
|
|
|
std::default_random_engine ei(42);
|
|
|
|
for (int i = 0; i < N; ++i) {
|
|
|
|
uids.push_back(10 * N + i);
|
|
|
|
timestamps.push_back(0);
|
|
|
|
// append vec
|
2021-10-13 03:00:39 +00:00
|
|
|
std::vector<float> vec(DIM);
|
2020-10-23 10:01:24 +00:00
|
|
|
for (auto& x : vec) {
|
2020-09-15 09:41:05 +00:00
|
|
|
x = distribution(er);
|
|
|
|
}
|
2020-10-23 10:01:24 +00:00
|
|
|
raw_data.insert(raw_data.end(), std::begin(vec), std::end(vec));
|
2020-09-15 02:00:00 +00:00
|
|
|
}
|
2020-09-15 09:41:05 +00:00
|
|
|
return std::make_tuple(raw_data, timestamps, uids);
|
|
|
|
}
|
2020-10-23 10:01:24 +00:00
|
|
|
} // namespace
|
2020-09-17 10:37:54 +00:00
|
|
|
|
2023-09-11 12:43:17 +00:00
|
|
|
SegcoreError
|
2022-05-23 08:41:58 +00:00
|
|
|
merge_into(int64_t queries,
|
|
|
|
int64_t topk,
|
|
|
|
float* distances,
|
|
|
|
int64_t* uids,
|
|
|
|
const float* new_distances,
|
|
|
|
const int64_t* new_uids) {
|
|
|
|
for (int64_t qn = 0; qn < queries; ++qn) {
|
|
|
|
auto base = qn * topk;
|
|
|
|
auto src2_dis = distances + base;
|
|
|
|
auto src2_uids = uids + base;
|
|
|
|
|
|
|
|
auto src1_dis = new_distances + base;
|
|
|
|
auto src1_uids = new_uids + base;
|
|
|
|
|
|
|
|
std::vector<float> buf_dis(topk);
|
|
|
|
std::vector<int64_t> buf_uids(topk);
|
|
|
|
|
|
|
|
auto it1 = 0;
|
|
|
|
auto it2 = 0;
|
|
|
|
|
|
|
|
for (auto buf = 0; buf < topk; ++buf) {
|
|
|
|
if (src1_dis[it1] <= src2_dis[it2]) {
|
|
|
|
buf_dis[buf] = src1_dis[it1];
|
|
|
|
buf_uids[buf] = src1_uids[it1];
|
|
|
|
++it1;
|
|
|
|
} else {
|
|
|
|
buf_dis[buf] = src2_dis[it2];
|
|
|
|
buf_uids[buf] = src2_uids[it2];
|
|
|
|
++it2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::copy_n(buf_dis.data(), topk, src2_dis);
|
|
|
|
std::copy_n(buf_uids.data(), topk, src2_uids);
|
|
|
|
}
|
2023-09-11 12:43:17 +00:00
|
|
|
return SegcoreError::success();
|
2022-05-23 08:41:58 +00:00
|
|
|
}
|
|
|
|
|
2023-02-10 06:24:32 +00:00
|
|
|
/*
|
2020-10-23 10:01:24 +00:00
|
|
|
TEST(Indexing, SmartBruteForce) {
|
2022-09-21 12:16:51 +00:00
|
|
|
int64_t N = 1000;
|
2020-10-23 10:01:24 +00:00
|
|
|
auto [raw_data, timestamps, uids] = generate_data<DIM>(N);
|
2022-12-29 07:29:31 +00:00
|
|
|
|
|
|
|
constexpr int64_t queries = 3;
|
|
|
|
auto total_count = queries * K;
|
|
|
|
|
2020-10-23 10:01:24 +00:00
|
|
|
auto raw = (const float*)raw_data.data();
|
2022-09-21 12:16:51 +00:00
|
|
|
EXPECT_NE(raw, nullptr);
|
2020-09-17 10:37:54 +00:00
|
|
|
|
|
|
|
auto query_data = raw;
|
|
|
|
|
2021-10-13 03:00:39 +00:00
|
|
|
std::vector<int64_t> final_uids(total_count, -1);
|
|
|
|
std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
|
2020-09-17 10:37:54 +00:00
|
|
|
|
2020-12-23 11:02:37 +00:00
|
|
|
for (int beg = 0; beg < N; beg += TestChunkSize) {
|
2021-10-13 03:00:39 +00:00
|
|
|
std::vector<int64_t> buf_uids(total_count, -1);
|
|
|
|
std::vector<float> buf_dis(total_count, std::numeric_limits<float>::max());
|
2022-09-21 12:16:51 +00:00
|
|
|
faiss::float_maxheap_array_t buf = {queries, K, buf_uids.data(), buf_dis.data()};
|
2020-12-23 11:02:37 +00:00
|
|
|
auto end = beg + TestChunkSize;
|
2020-09-17 10:37:54 +00:00
|
|
|
if (end > N) {
|
|
|
|
end = N;
|
|
|
|
}
|
|
|
|
auto nsize = end - beg;
|
|
|
|
auto src_data = raw + beg * DIM;
|
|
|
|
|
|
|
|
faiss::knn_L2sqr(query_data, src_data, DIM, queries, nsize, &buf, nullptr);
|
2020-11-24 11:09:57 +00:00
|
|
|
for (auto& x : buf_uids) {
|
|
|
|
x = uids[x + beg];
|
2020-09-17 10:37:54 +00:00
|
|
|
}
|
2022-09-21 12:16:51 +00:00
|
|
|
merge_into(queries, K, final_dis.data(), final_uids.data(), buf_dis.data(), buf_uids.data());
|
2020-09-17 10:37:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (int qn = 0; qn < queries; ++qn) {
|
2022-09-21 12:16:51 +00:00
|
|
|
for (int kn = 0; kn < K; ++kn) {
|
|
|
|
auto index = qn * K + kn;
|
2021-10-13 03:00:39 +00:00
|
|
|
std::cout << final_uids[index] << "->" << final_dis[index] << std::endl;
|
2020-09-17 10:37:54 +00:00
|
|
|
}
|
2021-10-13 03:00:39 +00:00
|
|
|
std::cout << std::endl;
|
2020-09-17 10:37:54 +00:00
|
|
|
}
|
|
|
|
}
|
2023-02-10 06:24:32 +00:00
|
|
|
*/
|
2020-12-08 10:51:07 +00:00
|
|
|
TEST(Indexing, BinaryBruteForce) {
|
2020-11-30 14:14:19 +00:00
|
|
|
int64_t N = 100000;
|
|
|
|
int64_t num_queries = 10;
|
|
|
|
int64_t topk = 5;
|
2021-10-08 09:39:55 +00:00
|
|
|
int64_t round_decimal = 3;
|
2021-07-09 10:12:40 +00:00
|
|
|
int64_t dim = 8192;
|
2023-02-21 01:48:32 +00:00
|
|
|
Config search_params_ = {};
|
2022-06-29 06:20:19 +00:00
|
|
|
auto metric_type = knowhere::metric::JACCARD;
|
2020-11-30 14:14:19 +00:00
|
|
|
auto result_count = topk * num_queries;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 01:47:54 +00:00
|
|
|
auto vec_fid = schema->AddDebugField(
|
|
|
|
"vecbin", DataType::VECTOR_BINARY, dim, metric_type);
|
2022-04-29 05:35:49 +00:00
|
|
|
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
2020-11-30 14:14:19 +00:00
|
|
|
auto dataset = DataGen(schema, N, 10);
|
2022-04-29 05:35:49 +00:00
|
|
|
auto bin_vec = dataset.get_col<uint8_t>(vec_fid);
|
2021-01-07 01:32:17 +00:00
|
|
|
auto query_data = 1024 * dim / 8 + bin_vec.data();
|
2021-07-13 14:20:33 +00:00
|
|
|
query::dataset::SearchDataset search_dataset{
|
2022-07-28 04:36:30 +00:00
|
|
|
metric_type, //
|
|
|
|
num_queries, //
|
|
|
|
topk, //
|
2021-10-08 09:39:55 +00:00
|
|
|
round_decimal,
|
2021-10-13 08:54:34 +00:00
|
|
|
dim, //
|
|
|
|
query_data //
|
2020-12-08 10:51:07 +00:00
|
|
|
};
|
|
|
|
|
2024-02-28 04:29:00 +00:00
|
|
|
SearchInfo search_info;
|
|
|
|
search_info.topk_ = topk;
|
|
|
|
search_info.round_decimal_ = round_decimal;
|
|
|
|
search_info.metric_type_ = metric_type;
|
2024-01-11 07:48:51 +00:00
|
|
|
auto sub_result = query::BruteForceSearch(search_dataset,
|
|
|
|
bin_vec.data(),
|
|
|
|
N,
|
2024-02-28 04:29:00 +00:00
|
|
|
search_info,
|
2024-01-11 07:48:51 +00:00
|
|
|
nullptr,
|
|
|
|
DataType::VECTOR_BINARY);
|
2020-12-08 10:51:07 +00:00
|
|
|
|
2021-07-13 14:20:33 +00:00
|
|
|
SearchResult sr;
|
2022-05-23 08:41:58 +00:00
|
|
|
sr.total_nq_ = num_queries;
|
|
|
|
sr.unity_topK_ = topk;
|
2022-04-29 05:35:49 +00:00
|
|
|
sr.seg_offsets_ = std::move(sub_result.mutable_seg_offsets());
|
2021-11-29 09:07:40 +00:00
|
|
|
sr.distances_ = std::move(sub_result.mutable_distances());
|
2020-11-30 14:14:19 +00:00
|
|
|
|
2021-07-13 14:20:33 +00:00
|
|
|
auto json = SearchResultToJson(sr);
|
2021-10-13 03:00:39 +00:00
|
|
|
std::cout << json.dump(2);
|
2022-04-08 07:29:31 +00:00
|
|
|
#ifdef __linux__
|
2023-09-11 12:43:17 +00:00
|
|
|
auto ref = nlohmann::json::parse(R"(
|
2020-11-30 14:14:19 +00:00
|
|
|
[
|
|
|
|
[
|
2022-04-08 07:29:31 +00:00
|
|
|
[ "1024->0.000000", "48942->0.642000", "18494->0.644000", "68225->0.644000", "93557->0.644000" ],
|
|
|
|
[ "1025->0.000000", "73557->0.641000", "53086->0.643000", "9737->0.643000", "62855->0.644000" ],
|
|
|
|
[ "1026->0.000000", "62904->0.644000", "46758->0.644000", "57969->0.645000", "98113->0.646000" ],
|
|
|
|
[ "1027->0.000000", "92446->0.638000", "96034->0.640000", "92129->0.644000", "45887->0.644000" ],
|
|
|
|
[ "1028->0.000000", "22992->0.643000", "73903->0.644000", "19969->0.645000", "65178->0.645000" ],
|
|
|
|
[ "1029->0.000000", "19776->0.641000", "15166->0.642000", "85470->0.642000", "16730->0.643000" ],
|
|
|
|
[ "1030->0.000000", "55939->0.640000", "84253->0.643000", "31958->0.644000", "11667->0.646000" ],
|
|
|
|
[ "1031->0.000000", "89536->0.637000", "61622->0.638000", "9275->0.639000", "91403->0.640000" ],
|
|
|
|
[ "1032->0.000000", "69504->0.642000", "23414->0.644000", "48770->0.645000", "23231->0.645000" ],
|
|
|
|
[ "1033->0.000000", "33540->0.636000", "25310->0.640000", "18576->0.640000", "73729->0.642000" ]
|
2020-11-30 14:14:19 +00:00
|
|
|
]
|
|
|
|
]
|
|
|
|
)");
|
2022-04-14 14:37:34 +00:00
|
|
|
#else // for mac
|
2023-09-11 12:43:17 +00:00
|
|
|
auto ref = nlohmann::json::parse(R"(
|
2022-04-08 07:29:31 +00:00
|
|
|
[
|
|
|
|
[
|
|
|
|
[ "1024->0.000000", "59169->0.645000", "98548->0.646000", "3356->0.646000", "90373->0.647000" ],
|
|
|
|
[ "1025->0.000000", "61245->0.638000", "95271->0.639000", "31087->0.639000", "31549->0.640000" ],
|
|
|
|
[ "1026->0.000000", "65225->0.648000", "35750->0.648000", "14971->0.649000", "75385->0.649000" ],
|
|
|
|
[ "1027->0.000000", "70158->0.640000", "27076->0.640000", "3407->0.641000", "59527->0.641000" ],
|
|
|
|
[ "1028->0.000000", "45757->0.645000", "3356->0.645000", "77230->0.646000", "28690->0.647000" ],
|
|
|
|
[ "1029->0.000000", "13291->0.642000", "24960->0.643000", "83770->0.643000", "88244->0.643000" ],
|
|
|
|
[ "1030->0.000000", "96807->0.641000", "39920->0.643000", "62943->0.644000", "12603->0.644000" ],
|
|
|
|
[ "1031->0.000000", "65769->0.648000", "60493->0.648000", "48738->0.648000", "4353->0.648000" ],
|
|
|
|
[ "1032->0.000000", "57827->0.637000", "8213->0.638000", "22221->0.639000", "23328->0.640000" ],
|
|
|
|
[ "1033->0.000000", "676->0.645000", "91430->0.646000", "85353->0.646000", "6014->0.646000" ]
|
|
|
|
]
|
|
|
|
]
|
|
|
|
)");
|
|
|
|
#endif
|
2020-12-08 10:51:07 +00:00
|
|
|
auto json_str = json.dump(2);
|
|
|
|
auto ref_str = ref.dump(2);
|
|
|
|
ASSERT_EQ(json_str, ref_str);
|
2020-11-30 14:14:19 +00:00
|
|
|
}
|
2022-09-21 12:16:51 +00:00
|
|
|
|
|
|
|
TEST(Indexing, Naive) {
|
|
|
|
constexpr int N = 10000;
|
|
|
|
constexpr int TOPK = 10;
|
|
|
|
|
|
|
|
auto [raw_data, timestamps, uids] = generate_data<DIM>(N);
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
|
|
|
create_index_info.metric_type = knowhere::metric::L2;
|
|
|
|
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info.index_engine_version =
|
2023-09-25 13:39:27 +00:00
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
2023-03-10 01:47:54 +00:00
|
|
|
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, milvus::storage::FileManagerContext());
|
2022-09-21 12:16:51 +00:00
|
|
|
|
2023-02-10 06:24:32 +00:00
|
|
|
auto build_conf = knowhere::Json{
|
2022-09-21 12:16:51 +00:00
|
|
|
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
|
|
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
|
|
|
{knowhere::indexparam::NLIST, "100"},
|
|
|
|
{knowhere::indexparam::M, "4"},
|
|
|
|
{knowhere::indexparam::NBITS, "8"},
|
|
|
|
};
|
|
|
|
|
2023-02-10 06:24:32 +00:00
|
|
|
auto search_conf = knowhere::Json{
|
2022-09-21 12:16:51 +00:00
|
|
|
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
|
|
|
{knowhere::indexparam::NPROBE, 4},
|
|
|
|
};
|
|
|
|
|
2023-02-10 06:24:32 +00:00
|
|
|
std::vector<knowhere::DataSetPtr> datasets;
|
2022-09-21 12:16:51 +00:00
|
|
|
std::vector<std::vector<float>> ftrashs;
|
|
|
|
auto raw = raw_data.data();
|
|
|
|
for (int beg = 0; beg < N; beg += TestChunkSize) {
|
|
|
|
auto end = beg + TestChunkSize;
|
|
|
|
if (end > N) {
|
|
|
|
end = N;
|
|
|
|
}
|
|
|
|
std::vector<float> ft(raw + DIM * beg, raw + DIM * end);
|
|
|
|
|
2023-02-10 06:24:32 +00:00
|
|
|
auto ds = knowhere::GenDataSet(end - beg, DIM, ft.data());
|
2022-09-21 12:16:51 +00:00
|
|
|
datasets.push_back(ds);
|
|
|
|
ftrashs.push_back(std::move(ft));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& ds : datasets) {
|
|
|
|
index->BuildWithDataset(ds, build_conf);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto bitmap = BitsetType(N, false);
|
|
|
|
// exclude the first
|
|
|
|
for (int i = 0; i < N / 2; ++i) {
|
|
|
|
bitmap.set(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BitsetView view = bitmap;
|
2023-02-10 06:24:32 +00:00
|
|
|
auto query_ds = knowhere::GenDataSet(1, DIM, raw_data.data());
|
2022-09-21 12:16:51 +00:00
|
|
|
|
|
|
|
milvus::SearchInfo searchInfo;
|
|
|
|
searchInfo.topk_ = TOPK;
|
|
|
|
searchInfo.metric_type_ = knowhere::metric::L2;
|
|
|
|
searchInfo.search_params_ = search_conf;
|
|
|
|
auto vec_index = dynamic_cast<index::VectorIndex*>(index.get());
|
2024-02-21 06:02:53 +00:00
|
|
|
SearchResult result;
|
|
|
|
vec_index->Query(query_ds, searchInfo, view, result);
|
2022-09-21 12:16:51 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < TOPK; ++i) {
|
2024-03-12 16:16:30 +00:00
|
|
|
ASSERT_FALSE(result.seg_offsets_[i] < N / 2);
|
2022-09-21 12:16:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
using Param = std::pair<knowhere::IndexType, knowhere::MetricType>;
|
|
|
|
|
|
|
|
class IndexTest : public ::testing::TestWithParam<Param> {
|
|
|
|
protected:
|
|
|
|
void
|
|
|
|
SetUp() override {
|
2023-08-11 02:37:36 +00:00
|
|
|
storage_config_ = get_default_local_storage_config();
|
2022-09-21 12:16:51 +00:00
|
|
|
|
|
|
|
auto param = GetParam();
|
|
|
|
index_type = param.first;
|
|
|
|
metric_type = param.second;
|
2023-09-13 07:41:18 +00:00
|
|
|
|
|
|
|
// try to reduce the test time,
|
|
|
|
// but the large dataset is needed for the case below.
|
|
|
|
auto test_name = std::string(
|
|
|
|
testing::UnitTest::GetInstance()->current_test_info()->name());
|
|
|
|
if (test_name == "Mmap" &&
|
|
|
|
index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
2023-08-10 05:59:15 +00:00
|
|
|
NB = 270000;
|
|
|
|
}
|
2022-09-21 12:16:51 +00:00
|
|
|
build_conf = generate_build_conf(index_type, metric_type);
|
|
|
|
load_conf = generate_load_conf(index_type, metric_type, NB);
|
|
|
|
search_conf = generate_search_conf(index_type, metric_type);
|
2023-02-21 01:48:32 +00:00
|
|
|
range_search_conf = generate_range_search_conf(index_type, metric_type);
|
2022-09-21 12:16:51 +00:00
|
|
|
|
2024-03-12 16:16:30 +00:00
|
|
|
if (index_type == knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
|
|
|
|
index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) {
|
|
|
|
is_sparse = true;
|
|
|
|
vec_field_data_type = milvus::DataType::VECTOR_SPARSE_FLOAT;
|
|
|
|
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT ||
|
|
|
|
index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) {
|
|
|
|
is_binary = true;
|
2022-09-21 12:16:51 +00:00
|
|
|
vec_field_data_type = milvus::DataType::VECTOR_BINARY;
|
|
|
|
} else {
|
|
|
|
vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
|
|
|
}
|
|
|
|
|
2024-03-12 16:16:30 +00:00
|
|
|
auto dataset =
|
|
|
|
GenDatasetWithDataType(NB, metric_type, vec_field_data_type);
|
|
|
|
if (is_binary) {
|
|
|
|
// binary vector
|
2022-09-21 12:16:51 +00:00
|
|
|
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
2023-02-10 06:24:32 +00:00
|
|
|
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
|
2023-03-10 01:47:54 +00:00
|
|
|
xq_dataset = knowhere::GenDataSet(
|
|
|
|
NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
2024-03-12 16:16:30 +00:00
|
|
|
} else if (is_sparse) {
|
|
|
|
// sparse vector
|
|
|
|
xb_sparse_data =
|
|
|
|
dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
|
|
|
milvus::FieldId(100));
|
|
|
|
xb_dataset =
|
|
|
|
knowhere::GenDataSet(NB, kTestSparseDim, xb_sparse_data.data());
|
|
|
|
xb_dataset->SetIsSparse(true);
|
|
|
|
xq_dataset = knowhere::GenDataSet(
|
|
|
|
NQ, kTestSparseDim, xb_sparse_data.data() + query_offset);
|
|
|
|
xq_dataset->SetIsSparse(true);
|
|
|
|
} else {
|
|
|
|
// float vector
|
|
|
|
xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
|
|
|
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
|
|
|
xq_dataset = knowhere::GenDataSet(
|
|
|
|
NQ, DIM, xb_data.data() + DIM * query_offset);
|
2022-09-21 12:16:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
TearDown() override {
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
std::string index_type, metric_type;
|
2024-03-12 16:16:30 +00:00
|
|
|
bool is_binary = false;
|
|
|
|
bool is_sparse = false;
|
2022-09-21 12:16:51 +00:00
|
|
|
milvus::Config build_conf;
|
|
|
|
milvus::Config load_conf;
|
|
|
|
milvus::Config search_conf;
|
2023-02-21 01:48:32 +00:00
|
|
|
milvus::Config range_search_conf;
|
2022-09-21 12:16:51 +00:00
|
|
|
milvus::DataType vec_field_data_type;
|
2023-02-10 06:24:32 +00:00
|
|
|
knowhere::DataSetPtr xb_dataset;
|
2023-11-10 07:44:22 +00:00
|
|
|
FixedVector<float> xb_data;
|
|
|
|
FixedVector<uint8_t> xb_bin_data;
|
2024-03-12 16:16:30 +00:00
|
|
|
FixedVector<knowhere::sparse::SparseRow<float>> xb_sparse_data;
|
2023-02-10 06:24:32 +00:00
|
|
|
knowhere::DataSetPtr xq_dataset;
|
2022-09-21 12:16:51 +00:00
|
|
|
int64_t query_offset = 100;
|
2024-03-12 16:16:30 +00:00
|
|
|
int64_t NB = 3000; // will be updated to 27000 for mmap+hnsw
|
2022-10-14 06:45:23 +00:00
|
|
|
StorageConfig storage_config_;
|
2022-09-21 12:16:51 +00:00
|
|
|
};
|
|
|
|
|
2024-03-11 06:45:02 +00:00
|
|
|
INSTANTIATE_TEST_SUITE_P(
|
2022-09-21 12:16:51 +00:00
|
|
|
IndexTypeParameters,
|
|
|
|
IndexTest,
|
2023-03-10 01:47:54 +00:00
|
|
|
::testing::Values(
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::metric::L2),
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::metric::L2),
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
|
|
|
knowhere::metric::L2),
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
|
|
|
|
knowhere::metric::L2),
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
|
|
|
knowhere::metric::JACCARD),
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
|
|
|
knowhere::metric::JACCARD),
|
2024-03-12 16:16:30 +00:00
|
|
|
std::pair(knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX,
|
|
|
|
knowhere::metric::IP),
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_SPARSE_WAND, knowhere::metric::IP),
|
2023-06-25 06:38:44 +00:00
|
|
|
#ifdef BUILD_DISK_ANN
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
|
|
|
|
#endif
|
|
|
|
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2)));
|
2022-09-21 12:16:51 +00:00
|
|
|
|
2024-05-08 04:17:29 +00:00
|
|
|
/*TEST(Indexing, Iterator) {
|
2024-02-21 06:02:53 +00:00
|
|
|
constexpr int N = 10240;
|
|
|
|
constexpr int TOPK = 100;
|
|
|
|
constexpr int dim = 128;
|
|
|
|
constexpr int chunk_size = 5120;
|
|
|
|
|
|
|
|
auto [raw_data, timestamps, uids] = generate_data<dim>(N);
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
|
|
|
create_index_info.metric_type = knowhere::metric::L2;
|
|
|
|
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC;
|
|
|
|
create_index_info.index_engine_version =
|
2024-02-22 11:56:52 +00:00
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
2024-02-21 06:02:53 +00:00
|
|
|
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2024-02-22 11:56:52 +00:00
|
|
|
create_index_info, milvus::storage::FileManagerContext());
|
2024-02-21 06:02:53 +00:00
|
|
|
|
|
|
|
auto build_conf = knowhere::Json{
|
2024-02-22 11:56:52 +00:00
|
|
|
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
|
|
|
{knowhere::meta::DIM, std::to_string(dim)},
|
|
|
|
{knowhere::indexparam::NLIST, "128"},
|
2024-02-21 06:02:53 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
auto search_conf = knowhere::Json{
|
2024-02-22 11:56:52 +00:00
|
|
|
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
|
|
|
{knowhere::indexparam::NPROBE, 4},
|
2024-02-21 06:02:53 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<knowhere::DataSetPtr> datasets;
|
|
|
|
auto raw = raw_data.data();
|
|
|
|
for (int beg = 0; beg < N; beg += chunk_size) {
|
|
|
|
auto end = beg + chunk_size;
|
|
|
|
if (end > N) {
|
|
|
|
end = N;
|
|
|
|
}
|
|
|
|
std::vector<float> ft(raw + dim * beg, raw + dim * end);
|
|
|
|
auto ds = knowhere::GenDataSet(end - beg, dim, ft.data());
|
|
|
|
datasets.push_back(ds);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& ds : datasets) {
|
|
|
|
index->BuildWithDataset(ds, build_conf);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto bitmap = BitsetType(N, false);
|
|
|
|
|
|
|
|
BitsetView view = bitmap;
|
|
|
|
auto query_ds = knowhere::GenDataSet(1, dim, raw_data.data());
|
|
|
|
|
|
|
|
milvus::SearchInfo searchInfo;
|
|
|
|
searchInfo.topk_ = TOPK;
|
|
|
|
searchInfo.metric_type_ = knowhere::metric::L2;
|
|
|
|
searchInfo.search_params_ = search_conf;
|
|
|
|
auto vec_index = dynamic_cast<index::VectorIndex*>(index.get());
|
|
|
|
|
2024-02-22 11:56:52 +00:00
|
|
|
knowhere::expected<
|
|
|
|
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
2024-04-13 14:13:23 +00:00
|
|
|
kw_iterators = vec_index->VectorIterators(
|
|
|
|
query_ds, searchInfo.search_params_, view);
|
2024-02-21 06:02:53 +00:00
|
|
|
ASSERT_TRUE(kw_iterators.has_value());
|
|
|
|
ASSERT_EQ(kw_iterators.value().size(), 1);
|
|
|
|
auto iterator = kw_iterators.value()[0];
|
|
|
|
ASSERT_TRUE(iterator->HasNext());
|
2024-02-22 11:56:52 +00:00
|
|
|
while (iterator->HasNext()) {
|
2024-02-21 06:02:53 +00:00
|
|
|
auto [off, dis] = iterator->Next();
|
|
|
|
ASSERT_TRUE(off >= 0);
|
|
|
|
ASSERT_TRUE(dis >= 0);
|
|
|
|
}
|
2024-05-08 04:17:29 +00:00
|
|
|
}*/
|
2024-02-21 06:02:53 +00:00
|
|
|
|
2022-09-21 12:16:51 +00:00
|
|
|
TEST_P(IndexTest, BuildAndQuery) {
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.index_type = index_type;
|
|
|
|
create_index_info.metric_type = metric_type;
|
|
|
|
create_index_info.field_type = vec_field_data_type;
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info.index_engine_version =
|
2023-09-25 13:39:27 +00:00
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
2022-09-21 12:16:51 +00:00
|
|
|
index::IndexBasePtr index;
|
2023-06-25 06:38:44 +00:00
|
|
|
|
|
|
|
milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
|
|
|
milvus::storage::IndexMeta index_meta{3, 100, 1000, 1};
|
|
|
|
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_);
|
2023-09-22 01:59:26 +00:00
|
|
|
milvus::storage::FileManagerContext file_manager_context(
|
|
|
|
field_data_meta, index_meta, chunk_manager);
|
2023-06-25 06:38:44 +00:00
|
|
|
index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, file_manager_context);
|
2023-06-25 06:38:44 +00:00
|
|
|
|
2022-09-21 12:16:51 +00:00
|
|
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
2022-10-14 09:51:24 +00:00
|
|
|
milvus::index::IndexBasePtr new_index;
|
2022-09-21 12:16:51 +00:00
|
|
|
milvus::index::VectorIndex* vec_index = nullptr;
|
|
|
|
|
2023-07-26 09:35:01 +00:00
|
|
|
auto binary_set = index->Upload();
|
|
|
|
index.reset();
|
|
|
|
|
|
|
|
new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, file_manager_context);
|
2023-07-26 09:35:01 +00:00
|
|
|
vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
|
|
|
|
|
|
std::vector<std::string> index_files;
|
|
|
|
for (auto& binary : binary_set.binary_map_) {
|
|
|
|
index_files.emplace_back(binary.first);
|
2022-09-21 12:16:51 +00:00
|
|
|
}
|
2023-10-11 02:33:33 +00:00
|
|
|
load_conf = generate_load_conf(index_type, metric_type, 0);
|
2023-07-26 09:35:01 +00:00
|
|
|
load_conf["index_files"] = index_files;
|
2024-01-10 13:58:51 +00:00
|
|
|
ASSERT_NO_THROW(vec_index->Load(milvus::tracer::TraceContext{}, load_conf));
|
2022-09-21 12:16:51 +00:00
|
|
|
EXPECT_EQ(vec_index->Count(), NB);
|
2024-03-12 16:16:30 +00:00
|
|
|
if (!is_sparse) {
|
|
|
|
EXPECT_EQ(vec_index->GetDim(), DIM);
|
|
|
|
}
|
2022-09-21 12:16:51 +00:00
|
|
|
|
2023-08-10 05:59:15 +00:00
|
|
|
milvus::SearchInfo search_info;
|
|
|
|
search_info.topk_ = K;
|
|
|
|
search_info.metric_type_ = metric_type;
|
|
|
|
search_info.search_params_ = search_conf;
|
2024-02-21 06:02:53 +00:00
|
|
|
SearchResult result;
|
|
|
|
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
|
|
|
EXPECT_EQ(result.total_nq_, NQ);
|
|
|
|
EXPECT_EQ(result.unity_topK_, K);
|
|
|
|
EXPECT_EQ(result.distances_.size(), NQ * K);
|
|
|
|
EXPECT_EQ(result.seg_offsets_.size(), NQ * K);
|
2024-03-12 16:16:30 +00:00
|
|
|
if (metric_type == knowhere::metric::L2) {
|
|
|
|
// for L2 metric each vector is closest to itself
|
|
|
|
for (int i = 0; i < NQ; i++) {
|
|
|
|
EXPECT_EQ(result.seg_offsets_[i * K], query_offset + i);
|
|
|
|
}
|
|
|
|
// for other metrics we can't verify the correctness unless we perform
|
|
|
|
// brute force search to get the ground truth.
|
|
|
|
}
|
|
|
|
if (!is_sparse) {
|
|
|
|
// sparse doesn't support range search yet
|
|
|
|
search_info.search_params_ = range_search_conf;
|
|
|
|
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
2023-08-10 05:59:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(IndexTest, Mmap) {
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.index_type = index_type;
|
|
|
|
create_index_info.metric_type = metric_type;
|
|
|
|
create_index_info.field_type = vec_field_data_type;
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info.index_engine_version =
|
2023-09-25 13:39:27 +00:00
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
2023-08-10 05:59:15 +00:00
|
|
|
index::IndexBasePtr index;
|
|
|
|
|
|
|
|
milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
|
|
|
milvus::storage::IndexMeta index_meta{3, 100, 1000, 1};
|
|
|
|
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_);
|
2023-09-22 01:59:26 +00:00
|
|
|
milvus::storage::FileManagerContext file_manager_context(
|
|
|
|
field_data_meta, index_meta, chunk_manager);
|
2023-08-10 05:59:15 +00:00
|
|
|
index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, file_manager_context);
|
2023-08-10 05:59:15 +00:00
|
|
|
|
|
|
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
|
|
|
milvus::index::IndexBasePtr new_index;
|
|
|
|
milvus::index::VectorIndex* vec_index = nullptr;
|
|
|
|
|
|
|
|
auto binary_set = index->Upload();
|
|
|
|
index.reset();
|
|
|
|
|
|
|
|
new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, file_manager_context);
|
2023-08-10 05:59:15 +00:00
|
|
|
if (!new_index->IsMmapSupported()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
|
|
|
|
|
|
std::vector<std::string> index_files;
|
|
|
|
for (auto& binary : binary_set.binary_map_) {
|
|
|
|
index_files.emplace_back(binary.first);
|
|
|
|
}
|
2023-10-11 02:33:33 +00:00
|
|
|
load_conf = generate_load_conf(index_type, metric_type, 0);
|
2023-08-10 05:59:15 +00:00
|
|
|
load_conf["index_files"] = index_files;
|
|
|
|
load_conf["mmap_filepath"] = "mmap/test_index_mmap_" + index_type;
|
2024-01-10 13:58:51 +00:00
|
|
|
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
2023-08-10 05:59:15 +00:00
|
|
|
EXPECT_EQ(vec_index->Count(), NB);
|
|
|
|
EXPECT_EQ(vec_index->GetDim(), DIM);
|
|
|
|
|
2022-09-21 12:16:51 +00:00
|
|
|
milvus::SearchInfo search_info;
|
|
|
|
search_info.topk_ = K;
|
|
|
|
search_info.metric_type_ = metric_type;
|
|
|
|
search_info.search_params_ = search_conf;
|
2024-02-21 06:02:53 +00:00
|
|
|
SearchResult result;
|
|
|
|
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
|
|
|
EXPECT_EQ(result.total_nq_, NQ);
|
|
|
|
EXPECT_EQ(result.unity_topK_, K);
|
|
|
|
EXPECT_EQ(result.distances_.size(), NQ * K);
|
|
|
|
EXPECT_EQ(result.seg_offsets_.size(), NQ * K);
|
2022-09-21 12:16:51 +00:00
|
|
|
if (!is_binary) {
|
2024-02-21 06:02:53 +00:00
|
|
|
EXPECT_EQ(result.seg_offsets_[0], query_offset);
|
2022-09-21 12:16:51 +00:00
|
|
|
}
|
2023-04-04 08:30:27 +00:00
|
|
|
search_info.search_params_ = range_search_conf;
|
2024-02-21 06:02:53 +00:00
|
|
|
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
2023-02-10 06:24:32 +00:00
|
|
|
}
|
2023-02-16 03:02:34 +00:00
|
|
|
|
2023-04-23 01:00:32 +00:00
|
|
|
TEST_P(IndexTest, GetVector) {
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.index_type = index_type;
|
|
|
|
create_index_info.metric_type = metric_type;
|
|
|
|
create_index_info.field_type = vec_field_data_type;
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info.index_engine_version =
|
2023-09-25 13:39:27 +00:00
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
2023-04-23 01:00:32 +00:00
|
|
|
index::IndexBasePtr index;
|
|
|
|
|
2023-06-25 06:38:44 +00:00
|
|
|
milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
|
|
|
milvus::storage::IndexMeta index_meta{3, 100, 1000, 1};
|
|
|
|
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_);
|
2023-09-22 01:59:26 +00:00
|
|
|
milvus::storage::FileManagerContext file_manager_context(
|
|
|
|
field_data_meta, index_meta, chunk_manager);
|
2023-06-25 06:38:44 +00:00
|
|
|
index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, file_manager_context);
|
2023-06-25 06:38:44 +00:00
|
|
|
|
2023-04-23 01:00:32 +00:00
|
|
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
|
|
|
milvus::index::IndexBasePtr new_index;
|
|
|
|
milvus::index::VectorIndex* vec_index = nullptr;
|
|
|
|
|
2023-10-11 02:33:33 +00:00
|
|
|
auto binary_set = index->Upload();
|
|
|
|
index.reset();
|
|
|
|
std::vector<std::string> index_files;
|
|
|
|
for (auto& binary : binary_set.binary_map_) {
|
|
|
|
index_files.emplace_back(binary.first);
|
|
|
|
}
|
|
|
|
new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
|
|
create_index_info, file_manager_context);
|
|
|
|
load_conf = generate_load_conf(index_type, metric_type, 0);
|
|
|
|
load_conf["index_files"] = index_files;
|
2023-04-23 01:00:32 +00:00
|
|
|
|
2023-10-11 02:33:33 +00:00
|
|
|
vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
|
|
if (index_type == knowhere::IndexEnum::INDEX_DISKANN) {
|
2023-04-23 01:00:32 +00:00
|
|
|
vec_index->Load(binary_set, load_conf);
|
|
|
|
EXPECT_EQ(vec_index->Count(), NB);
|
|
|
|
} else {
|
2024-01-10 13:58:51 +00:00
|
|
|
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
2023-04-23 01:00:32 +00:00
|
|
|
}
|
2024-03-12 16:16:30 +00:00
|
|
|
if (!is_sparse) {
|
|
|
|
EXPECT_EQ(vec_index->GetDim(), DIM);
|
|
|
|
}
|
2023-04-23 01:00:32 +00:00
|
|
|
EXPECT_EQ(vec_index->Count(), NB);
|
|
|
|
|
|
|
|
if (!vec_index->HasRawData()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto ids_ds = GenRandomIds(NB);
|
2024-03-12 16:16:30 +00:00
|
|
|
if (is_binary) {
|
|
|
|
auto results = vec_index->GetVector(ids_ds);
|
|
|
|
EXPECT_EQ(results.size(), xb_bin_data.size());
|
|
|
|
const auto data_bytes = DIM / 8;
|
2023-04-23 01:00:32 +00:00
|
|
|
for (size_t i = 0; i < NB; ++i) {
|
|
|
|
auto id = ids_ds->GetIds()[i];
|
2024-03-12 16:16:30 +00:00
|
|
|
for (size_t j = 0; j < data_bytes; ++j) {
|
|
|
|
ASSERT_EQ(results[i * data_bytes + j],
|
|
|
|
xb_bin_data[id * data_bytes + j]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (is_sparse) {
|
|
|
|
auto sparse_rows = vec_index->GetSparseVector(ids_ds);
|
|
|
|
for (size_t i = 0; i < NB; ++i) {
|
|
|
|
auto id = ids_ds->GetIds()[i];
|
|
|
|
auto& row = sparse_rows[i];
|
|
|
|
ASSERT_EQ(row.size(), xb_sparse_data[id].size());
|
|
|
|
for (size_t j = 0; j < row.size(); ++j) {
|
|
|
|
ASSERT_EQ(row[j].id, xb_sparse_data[id][j].id);
|
|
|
|
ASSERT_EQ(row[j].val, xb_sparse_data[id][j].val);
|
2023-04-23 01:00:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2024-03-12 16:16:30 +00:00
|
|
|
auto results = vec_index->GetVector(ids_ds);
|
|
|
|
std::vector<float> result_vectors(results.size() / (sizeof(float)));
|
|
|
|
memcpy(result_vectors.data(), results.data(), results.size());
|
|
|
|
ASSERT_EQ(result_vectors.size(), xb_data.size());
|
2023-04-23 01:00:32 +00:00
|
|
|
for (size_t i = 0; i < NB; ++i) {
|
|
|
|
auto id = ids_ds->GetIds()[i];
|
2024-03-12 16:16:30 +00:00
|
|
|
for (size_t j = 0; j < DIM; ++j) {
|
|
|
|
ASSERT_EQ(result_vectors[i * DIM + j], xb_data[id * DIM + j]);
|
2023-04-23 01:00:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-25 06:38:44 +00:00
|
|
|
#ifdef BUILD_DISK_ANN
|
|
|
|
TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
2024-04-07 06:13:16 +00:00
|
|
|
int64_t NB = 1000;
|
2023-06-25 06:38:44 +00:00
|
|
|
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
|
|
|
MetricType metric_type = knowhere::metric::L2;
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.index_type = index_type;
|
|
|
|
create_index_info.metric_type = metric_type;
|
|
|
|
create_index_info.field_type = milvus::DataType::VECTOR_FLOAT;
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info.index_engine_version =
|
2023-09-25 13:39:27 +00:00
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
2023-06-25 06:38:44 +00:00
|
|
|
|
|
|
|
int64_t collection_id = 1;
|
|
|
|
int64_t partition_id = 2;
|
|
|
|
int64_t segment_id = 3;
|
|
|
|
int64_t field_id = 100;
|
|
|
|
int64_t build_id = 1000;
|
|
|
|
int64_t index_version = 1;
|
|
|
|
|
2023-08-11 02:37:36 +00:00
|
|
|
StorageConfig storage_config = get_default_local_storage_config();
|
2023-06-25 06:38:44 +00:00
|
|
|
milvus::storage::FieldDataMeta field_data_meta{
|
|
|
|
collection_id, partition_id, segment_id, field_id};
|
|
|
|
milvus::storage::IndexMeta index_meta{
|
|
|
|
segment_id, field_id, build_id, index_version};
|
|
|
|
auto chunk_manager = storage::CreateChunkManager(storage_config);
|
2023-09-22 01:59:26 +00:00
|
|
|
milvus::storage::FileManagerContext file_manager_context(
|
|
|
|
field_data_meta, index_meta, chunk_manager);
|
2023-06-25 06:38:44 +00:00
|
|
|
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, file_manager_context);
|
2023-06-25 06:38:44 +00:00
|
|
|
|
|
|
|
auto build_conf = Config{
|
|
|
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
|
|
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
2024-04-07 06:13:16 +00:00
|
|
|
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
|
|
|
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
2023-06-25 06:38:44 +00:00
|
|
|
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
|
|
|
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
|
|
|
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
|
|
|
};
|
|
|
|
|
|
|
|
// build disk ann index
|
|
|
|
auto dataset = GenDataset(NB, metric_type, false);
|
2023-11-10 07:44:22 +00:00
|
|
|
FixedVector<float> xb_data =
|
2023-06-25 06:38:44 +00:00
|
|
|
dataset.get_col<float>(milvus::FieldId(field_id));
|
|
|
|
knowhere::DataSetPtr xb_dataset =
|
|
|
|
knowhere::GenDataSet(NB, DIM, xb_data.data());
|
|
|
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
|
|
|
|
|
|
|
// serialize and load disk index, disk index can only be search after loading for now
|
|
|
|
auto binary_set = index->Upload();
|
|
|
|
index.reset();
|
|
|
|
|
|
|
|
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
2023-09-22 01:59:26 +00:00
|
|
|
create_index_info, file_manager_context);
|
2023-06-25 06:38:44 +00:00
|
|
|
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
|
|
std::vector<std::string> index_files;
|
|
|
|
for (auto& binary : binary_set.binary_map_) {
|
|
|
|
index_files.emplace_back(binary.first);
|
|
|
|
}
|
|
|
|
auto load_conf = generate_load_conf(index_type, metric_type, NB);
|
|
|
|
load_conf["index_files"] = index_files;
|
2024-01-10 13:58:51 +00:00
|
|
|
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
2023-06-25 06:38:44 +00:00
|
|
|
EXPECT_EQ(vec_index->Count(), NB);
|
|
|
|
|
|
|
|
// search disk index with search_list == limit
|
|
|
|
int query_offset = 100;
|
|
|
|
knowhere::DataSetPtr xq_dataset =
|
|
|
|
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
|
|
|
|
|
|
|
milvus::SearchInfo search_info;
|
|
|
|
search_info.topk_ = K;
|
|
|
|
search_info.metric_type_ = metric_type;
|
|
|
|
search_info.search_params_ = milvus::Config{
|
|
|
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
|
|
|
{milvus::index::DISK_ANN_QUERY_LIST, K - 1},
|
|
|
|
};
|
2024-02-21 06:02:53 +00:00
|
|
|
SearchResult result;
|
|
|
|
EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result),
|
2023-06-25 06:38:44 +00:00
|
|
|
std::runtime_error);
|
|
|
|
}
|
2024-01-11 07:48:51 +00:00
|
|
|
|
2024-04-07 06:13:16 +00:00
|
|
|
TEST(Indexing, SearchDiskAnnWithFloat16) {
|
|
|
|
int64_t NB = 1000;
|
|
|
|
int64_t NQ = 2;
|
|
|
|
int64_t K = 4;
|
|
|
|
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
|
|
|
MetricType metric_type = knowhere::metric::L2;
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.index_type = index_type;
|
|
|
|
create_index_info.metric_type = metric_type;
|
|
|
|
create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16;
|
|
|
|
create_index_info.index_engine_version =
|
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
|
|
|
|
|
|
|
int64_t collection_id = 1;
|
|
|
|
int64_t partition_id = 2;
|
|
|
|
int64_t segment_id = 3;
|
|
|
|
int64_t field_id = 100;
|
|
|
|
int64_t build_id = 1000;
|
|
|
|
int64_t index_version = 1;
|
|
|
|
|
|
|
|
StorageConfig storage_config = get_default_local_storage_config();
|
|
|
|
milvus::storage::FieldDataMeta field_data_meta{
|
|
|
|
collection_id, partition_id, segment_id, field_id};
|
|
|
|
milvus::storage::IndexMeta index_meta{
|
|
|
|
segment_id, field_id, build_id, index_version};
|
|
|
|
auto chunk_manager = storage::CreateChunkManager(storage_config);
|
|
|
|
milvus::storage::FileManagerContext file_manager_context(
|
|
|
|
field_data_meta, index_meta, chunk_manager);
|
|
|
|
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
|
|
create_index_info, file_manager_context);
|
|
|
|
|
|
|
|
auto build_conf = Config{
|
|
|
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
|
|
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
|
|
|
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
|
|
|
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
|
|
|
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
|
|
|
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
|
|
|
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
|
|
|
};
|
|
|
|
|
|
|
|
// build disk ann index
|
|
|
|
auto dataset = GenDatasetWithDataType(
|
|
|
|
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
|
|
|
FixedVector<float16> xb_data =
|
|
|
|
dataset.get_col<float16>(milvus::FieldId(field_id));
|
|
|
|
knowhere::DataSetPtr xb_dataset =
|
|
|
|
knowhere::GenDataSet(NB, DIM, xb_data.data());
|
|
|
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
|
|
|
|
|
|
|
// serialize and load disk index, disk index can only be search after loading for now
|
|
|
|
auto binary_set = index->Upload();
|
|
|
|
index.reset();
|
|
|
|
|
|
|
|
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
|
|
create_index_info, file_manager_context);
|
|
|
|
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
|
|
std::vector<std::string> index_files;
|
|
|
|
for (auto& binary : binary_set.binary_map_) {
|
|
|
|
index_files.emplace_back(binary.first);
|
|
|
|
}
|
|
|
|
auto load_conf = generate_load_conf<float16>(index_type, metric_type, NB);
|
|
|
|
load_conf["index_files"] = index_files;
|
|
|
|
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
|
|
|
EXPECT_EQ(vec_index->Count(), NB);
|
|
|
|
|
|
|
|
// search disk index with search_list == limit
|
|
|
|
int query_offset = 100;
|
|
|
|
knowhere::DataSetPtr xq_dataset =
|
|
|
|
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
|
|
|
|
|
|
|
milvus::SearchInfo search_info;
|
|
|
|
search_info.topk_ = K;
|
|
|
|
search_info.metric_type_ = metric_type;
|
|
|
|
search_info.search_params_ = milvus::Config{
|
|
|
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
|
|
|
{milvus::index::DISK_ANN_QUERY_LIST, K * 2},
|
|
|
|
};
|
|
|
|
SearchResult result;
|
|
|
|
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Indexing, SearchDiskAnnWithBFloat16) {
|
|
|
|
int64_t NB = 1000;
|
|
|
|
int64_t NQ = 2;
|
|
|
|
int64_t K = 4;
|
|
|
|
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
|
|
|
MetricType metric_type = knowhere::metric::L2;
|
|
|
|
milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
create_index_info.index_type = index_type;
|
|
|
|
create_index_info.metric_type = metric_type;
|
|
|
|
create_index_info.field_type = milvus::DataType::VECTOR_BFLOAT16;
|
|
|
|
create_index_info.index_engine_version =
|
|
|
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
|
|
|
|
|
|
|
int64_t collection_id = 1;
|
|
|
|
int64_t partition_id = 2;
|
|
|
|
int64_t segment_id = 3;
|
|
|
|
int64_t field_id = 100;
|
|
|
|
int64_t build_id = 1000;
|
|
|
|
int64_t index_version = 1;
|
|
|
|
|
|
|
|
StorageConfig storage_config = get_default_local_storage_config();
|
|
|
|
milvus::storage::FieldDataMeta field_data_meta{
|
|
|
|
collection_id, partition_id, segment_id, field_id};
|
|
|
|
milvus::storage::IndexMeta index_meta{
|
|
|
|
segment_id, field_id, build_id, index_version};
|
|
|
|
auto chunk_manager = storage::CreateChunkManager(storage_config);
|
|
|
|
milvus::storage::FileManagerContext file_manager_context(
|
|
|
|
field_data_meta, index_meta, chunk_manager);
|
|
|
|
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
|
|
create_index_info, file_manager_context);
|
|
|
|
|
|
|
|
auto build_conf = Config{
|
|
|
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
|
|
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
|
|
|
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
|
|
|
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
|
|
|
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
|
|
|
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
|
|
|
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
|
|
|
};
|
|
|
|
|
|
|
|
// build disk ann index
|
|
|
|
auto dataset = GenDatasetWithDataType(
|
|
|
|
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
|
|
|
FixedVector<bfloat16> xb_data =
|
|
|
|
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
|
|
|
|
knowhere::DataSetPtr xb_dataset =
|
|
|
|
knowhere::GenDataSet(NB, DIM, xb_data.data());
|
|
|
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
|
|
|
|
|
|
|
// serialize and load disk index, disk index can only be search after loading for now
|
|
|
|
auto binary_set = index->Upload();
|
|
|
|
index.reset();
|
|
|
|
|
|
|
|
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
|
|
create_index_info, file_manager_context);
|
|
|
|
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
|
|
std::vector<std::string> index_files;
|
|
|
|
for (auto& binary : binary_set.binary_map_) {
|
|
|
|
index_files.emplace_back(binary.first);
|
|
|
|
}
|
|
|
|
auto load_conf = generate_load_conf<bfloat16>(index_type, metric_type, NB);
|
|
|
|
load_conf["index_files"] = index_files;
|
|
|
|
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
|
|
|
EXPECT_EQ(vec_index->Count(), NB);
|
|
|
|
|
|
|
|
// search disk index with search_list == limit
|
|
|
|
int query_offset = 100;
|
|
|
|
knowhere::DataSetPtr xq_dataset =
|
|
|
|
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
|
|
|
|
|
|
|
milvus::SearchInfo search_info;
|
|
|
|
search_info.topk_ = K;
|
|
|
|
search_info.metric_type_ = metric_type;
|
|
|
|
search_info.search_params_ = milvus::Config{
|
|
|
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
|
|
|
{milvus::index::DISK_ANN_QUERY_LIST, K * 2},
|
|
|
|
};
|
|
|
|
SearchResult result;
|
|
|
|
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
|
|
|
}
|
2023-06-25 06:38:44 +00:00
|
|
|
#endif
|
2023-12-05 08:48:54 +00:00
|
|
|
|
2024-02-04 13:25:05 +00:00
|
|
|
//class IndexTestV2
|
|
|
|
// : public ::testing::TestWithParam<std::tuple<Param, int64_t, bool>> {
|
|
|
|
// protected:
|
|
|
|
// std::shared_ptr<arrow::Schema>
|
|
|
|
// TestSchema(int vec_size) {
|
|
|
|
// arrow::FieldVector fields;
|
|
|
|
// fields.push_back(arrow::field("pk", arrow::int64()));
|
|
|
|
// fields.push_back(arrow::field("ts", arrow::int64()));
|
|
|
|
// fields.push_back(
|
|
|
|
// arrow::field("vec", arrow::fixed_size_binary(vec_size)));
|
|
|
|
// return std::make_shared<arrow::Schema>(fields);
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// std::shared_ptr<arrow::RecordBatchReader>
|
|
|
|
// TestRecords(int vec_size, GeneratedData& dataset) {
|
|
|
|
// arrow::Int64Builder pk_builder;
|
|
|
|
// arrow::Int64Builder ts_builder;
|
|
|
|
// arrow::FixedSizeBinaryBuilder vec_builder(
|
|
|
|
// arrow::fixed_size_binary(vec_size));
|
|
|
|
// if (!is_binary) {
|
|
|
|
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
|
|
|
// auto data = reinterpret_cast<char*>(xb_data.data());
|
|
|
|
// for (auto i = 0; i < NB; ++i) {
|
|
|
|
// EXPECT_TRUE(pk_builder.Append(i).ok());
|
|
|
|
// EXPECT_TRUE(ts_builder.Append(i).ok());
|
|
|
|
// EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok());
|
|
|
|
// }
|
|
|
|
// } else {
|
|
|
|
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
|
|
|
// for (auto i = 0; i < NB; ++i) {
|
|
|
|
// EXPECT_TRUE(pk_builder.Append(i).ok());
|
|
|
|
// EXPECT_TRUE(ts_builder.Append(i).ok());
|
|
|
|
// EXPECT_TRUE(
|
|
|
|
// vec_builder.Append(xb_bin_data.data() + i * vec_size).ok());
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// std::shared_ptr<arrow::Array> pk_array;
|
|
|
|
// EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
|
|
|
|
// std::shared_ptr<arrow::Array> ts_array;
|
|
|
|
// EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
|
|
|
|
// std::shared_ptr<arrow::Array> vec_array;
|
|
|
|
// EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
|
|
|
|
// auto schema = TestSchema(vec_size);
|
|
|
|
// auto rec_batch = arrow::RecordBatch::Make(
|
|
|
|
// schema, NB, {pk_array, ts_array, vec_array});
|
|
|
|
// auto reader =
|
|
|
|
// arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie();
|
|
|
|
// return reader;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// std::shared_ptr<milvus_storage::Space>
|
|
|
|
// TestSpace(int vec_size, GeneratedData& dataset) {
|
|
|
|
// auto arrow_schema = TestSchema(vec_size);
|
|
|
|
// auto schema_options = std::make_shared<milvus_storage::SchemaOptions>();
|
|
|
|
// schema_options->primary_column = "pk";
|
|
|
|
// schema_options->version_column = "ts";
|
|
|
|
// schema_options->vector_column = "vec";
|
|
|
|
// auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
|
|
|
|
// schema_options);
|
|
|
|
// EXPECT_TRUE(schema->Validate().ok());
|
|
|
|
//
|
|
|
|
// auto space_res = milvus_storage::Space::Open(
|
|
|
|
// "file://" + boost::filesystem::canonical(temp_path).string(),
|
|
|
|
// milvus_storage::Options{schema});
|
|
|
|
// EXPECT_TRUE(space_res.has_value());
|
|
|
|
//
|
|
|
|
// auto space = std::move(space_res.value());
|
|
|
|
// auto rec = TestRecords(vec_size, dataset);
|
|
|
|
// auto write_opt = milvus_storage::WriteOption{NB};
|
|
|
|
// space->Write(rec.get(), &write_opt);
|
|
|
|
// return std::move(space);
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// void
|
|
|
|
// SetUp() override {
|
|
|
|
// temp_path = boost::filesystem::temp_directory_path() /
|
|
|
|
// boost::filesystem::unique_path();
|
|
|
|
// boost::filesystem::create_directory(temp_path);
|
|
|
|
// storage_config_ = get_default_local_storage_config();
|
|
|
|
//
|
|
|
|
// auto param = GetParam();
|
|
|
|
// index_type = std::get<0>(param).first;
|
|
|
|
// metric_type = std::get<0>(param).second;
|
|
|
|
// file_slice_size = std::get<1>(param);
|
|
|
|
// enable_mmap = index_type != knowhere::IndexEnum::INDEX_DISKANN &&
|
|
|
|
// std::get<2>(param);
|
|
|
|
// if (enable_mmap) {
|
|
|
|
// mmap_file_path = boost::filesystem::temp_directory_path() /
|
|
|
|
// boost::filesystem::unique_path();
|
|
|
|
// }
|
|
|
|
// NB = 3000;
|
|
|
|
//
|
|
|
|
// // try to reduce the test time,
|
|
|
|
// // but the large dataset is needed for the case below.
|
|
|
|
// auto test_name = std::string(
|
|
|
|
// testing::UnitTest::GetInstance()->current_test_info()->name());
|
|
|
|
// if (test_name == "Mmap" &&
|
|
|
|
// index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
|
|
|
// NB = 270000;
|
|
|
|
// }
|
|
|
|
// build_conf = generate_build_conf(index_type, metric_type);
|
|
|
|
// load_conf = generate_load_conf(index_type, metric_type, NB);
|
|
|
|
// search_conf = generate_search_conf(index_type, metric_type);
|
|
|
|
// range_search_conf = generate_range_search_conf(index_type, metric_type);
|
|
|
|
//
|
|
|
|
// std::map<knowhere::MetricType, bool> is_binary_map = {
|
|
|
|
// {knowhere::IndexEnum::INDEX_FAISS_IDMAP, false},
|
|
|
|
// {knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false},
|
|
|
|
// {knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, false},
|
|
|
|
// {knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, false},
|
|
|
|
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true},
|
|
|
|
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, true},
|
|
|
|
// {knowhere::IndexEnum::INDEX_HNSW, false},
|
|
|
|
// {knowhere::IndexEnum::INDEX_DISKANN, false},
|
|
|
|
// };
|
|
|
|
//
|
|
|
|
// is_binary = is_binary_map[index_type];
|
|
|
|
// int vec_size;
|
|
|
|
// if (is_binary) {
|
|
|
|
// vec_size = DIM / 8;
|
|
|
|
// vec_field_data_type = milvus::DataType::VECTOR_BINARY;
|
|
|
|
// } else {
|
|
|
|
// vec_size = DIM * 4;
|
|
|
|
// vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// auto dataset = GenDataset(NB, metric_type, is_binary);
|
|
|
|
// space = TestSpace(vec_size, dataset);
|
|
|
|
//
|
|
|
|
// if (!is_binary) {
|
|
|
|
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
|
|
|
// xq_dataset = knowhere::GenDataSet(
|
|
|
|
// NQ, DIM, xb_data.data() + DIM * query_offset);
|
|
|
|
// } else {
|
|
|
|
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
|
|
|
// xq_dataset = knowhere::GenDataSet(
|
|
|
|
// NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// void
|
|
|
|
// TearDown() override {
|
|
|
|
// boost::filesystem::remove_all(temp_path);
|
|
|
|
// if (enable_mmap) {
|
|
|
|
// boost::filesystem::remove_all(mmap_file_path);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// protected:
|
|
|
|
// std::string index_type, metric_type;
|
|
|
|
// bool is_binary;
|
|
|
|
// milvus::Config build_conf;
|
|
|
|
// milvus::Config load_conf;
|
|
|
|
// milvus::Config search_conf;
|
|
|
|
// milvus::Config range_search_conf;
|
|
|
|
// milvus::DataType vec_field_data_type;
|
|
|
|
// knowhere::DataSetPtr xb_dataset;
|
|
|
|
// FixedVector<float> xb_data;
|
|
|
|
// FixedVector<uint8_t> xb_bin_data;
|
|
|
|
// knowhere::DataSetPtr xq_dataset;
|
|
|
|
// int64_t query_offset = 100;
|
|
|
|
// int64_t NB = 3000;
|
|
|
|
// StorageConfig storage_config_;
|
|
|
|
//
|
|
|
|
// boost::filesystem::path temp_path;
|
|
|
|
// std::shared_ptr<milvus_storage::Space> space;
|
|
|
|
// int64_t file_slice_size = DEFAULT_INDEX_FILE_SLICE_SIZE;
|
|
|
|
// bool enable_mmap;
|
|
|
|
// boost::filesystem::path mmap_file_path;
|
|
|
|
//};
|
|
|
|
//
|
2024-03-11 06:45:02 +00:00
|
|
|
//INSTANTIATE_TEST_SUITE_P(
|
2024-02-04 13:25:05 +00:00
|
|
|
// IndexTypeParameters,
|
|
|
|
// IndexTestV2,
|
|
|
|
// testing::Combine(
|
|
|
|
// ::testing::Values(
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP,
|
|
|
|
// knowhere::metric::L2),
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ,
|
|
|
|
// knowhere::metric::L2),
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
|
|
|
// knowhere::metric::L2),
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
|
|
|
|
// knowhere::metric::L2),
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
|
|
|
// knowhere::metric::JACCARD),
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
|
|
|
// knowhere::metric::JACCARD),
|
|
|
|
//#ifdef BUILD_DISK_ANN
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
|
|
|
|
//#endif
|
|
|
|
// std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2)),
|
|
|
|
// testing::Values(DEFAULT_INDEX_FILE_SLICE_SIZE, 5000L),
|
|
|
|
// testing::Bool()));
|
|
|
|
//
|
|
|
|
//TEST_P(IndexTestV2, BuildAndQuery) {
|
|
|
|
// FILE_SLICE_SIZE = file_slice_size;
|
|
|
|
// milvus::index::CreateIndexInfo create_index_info;
|
|
|
|
// create_index_info.index_type = index_type;
|
|
|
|
// create_index_info.metric_type = metric_type;
|
|
|
|
// create_index_info.field_type = vec_field_data_type;
|
|
|
|
// create_index_info.field_name = "vec";
|
|
|
|
// create_index_info.dim = DIM;
|
|
|
|
// create_index_info.index_engine_version =
|
|
|
|
// knowhere::Version::GetCurrentVersion().VersionNumber();
|
|
|
|
// index::IndexBasePtr index;
|
|
|
|
//
|
|
|
|
// milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
|
|
|
// milvus::storage::IndexMeta index_meta{.segment_id = 3,
|
|
|
|
// .field_id = 100,
|
|
|
|
// .build_id = 1000,
|
|
|
|
// .index_version = 1,
|
|
|
|
// .field_name = "vec",
|
|
|
|
// .field_type = vec_field_data_type,
|
|
|
|
// .dim = DIM};
|
|
|
|
// auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_);
|
|
|
|
// milvus::storage::FileManagerContext file_manager_context(
|
|
|
|
// field_data_meta, index_meta, chunk_manager, space);
|
|
|
|
// index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
|
|
// create_index_info, file_manager_context, space);
|
|
|
|
//
|
|
|
|
// auto build_conf = generate_build_conf(index_type, metric_type);
|
|
|
|
// index->BuildV2(build_conf);
|
|
|
|
// milvus::index::IndexBasePtr new_index;
|
|
|
|
// milvus::index::VectorIndex* vec_index = nullptr;
|
|
|
|
//
|
|
|
|
// auto binary_set = index->UploadV2();
|
|
|
|
// index.reset();
|
|
|
|
//
|
|
|
|
// new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
|
|
// create_index_info, file_manager_context, space);
|
|
|
|
// vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
|
|
//
|
|
|
|
// load_conf = generate_load_conf(index_type, metric_type, 0);
|
|
|
|
// if (enable_mmap) {
|
|
|
|
// load_conf[kMmapFilepath] = mmap_file_path.string();
|
|
|
|
// }
|
|
|
|
// ASSERT_NO_THROW(vec_index->LoadV2(load_conf));
|
|
|
|
// EXPECT_EQ(vec_index->Count(), NB);
|
|
|
|
// EXPECT_EQ(vec_index->GetDim(), DIM);
|
|
|
|
//
|
|
|
|
// milvus::SearchInfo search_info;
|
|
|
|
// search_info.topk_ = K;
|
|
|
|
// search_info.metric_type_ = metric_type;
|
|
|
|
// search_info.search_params_ = search_conf;
|
|
|
|
// auto result = vec_index->Query(xq_dataset, search_info, nullptr);
|
|
|
|
// EXPECT_EQ(result->total_nq_, NQ);
|
|
|
|
// EXPECT_EQ(result->unity_topK_, K);
|
|
|
|
// EXPECT_EQ(result->distances_.size(), NQ * K);
|
|
|
|
// EXPECT_EQ(result->seg_offsets_.size(), NQ * K);
|
|
|
|
// if (!is_binary) {
|
|
|
|
// EXPECT_EQ(result->seg_offsets_[0], query_offset);
|
|
|
|
// }
|
|
|
|
// search_info.search_params_ = range_search_conf;
|
|
|
|
// vec_index->Query(xq_dataset, search_info, nullptr);
|
|
|
|
//}
|