mirror of https://github.com/milvus-io/milvus.git
commit
41f3604e44
|
@ -36,6 +36,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
|||
- \#543 - client raise exception in shards when search results is empty
|
||||
- \#545 - Avoid dead circle of build index thread when error occurs
|
||||
- \#547 - NSG build failed using GPU-edition if set gpu_enable false
|
||||
- \#548 - NSG search accuracy is too low
|
||||
- \#552 - Server down during building index_type: IVF_PQ using GPU-edition
|
||||
- \#561 - Milvus server should report exception/error message or terminate on mysql metadata backend error
|
||||
- \#579 - Build index hang in GPU version when gpu_resources disabled
|
||||
|
|
|
@ -126,4 +126,38 @@ GPUIDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances,
|
|||
index_->search(n, (float*)data, k, distances, labels);
|
||||
}
|
||||
|
||||
void
|
||||
GPUIDMAP::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) {
|
||||
int64_t K = k + 1;
|
||||
auto ntotal = Count();
|
||||
|
||||
size_t dim = config->d;
|
||||
auto batch_size = 1000;
|
||||
auto tail_batch_size = ntotal % batch_size;
|
||||
auto batch_search_count = ntotal / batch_size;
|
||||
auto total_search_count = tail_batch_size == 0 ? batch_search_count : batch_search_count + 1;
|
||||
|
||||
std::vector<float> res_dis(K * batch_size);
|
||||
graph.resize(ntotal);
|
||||
Graph res_vec(total_search_count);
|
||||
for (int i = 0; i < total_search_count; ++i) {
|
||||
auto b_size = (i == (total_search_count - 1)) && tail_batch_size != 0 ? tail_batch_size : batch_size;
|
||||
|
||||
auto& res = res_vec[i];
|
||||
res.resize(K * b_size);
|
||||
|
||||
auto xq = data + batch_size * dim * i;
|
||||
search_impl(b_size, (float*)xq, K, res_dis.data(), res.data(), config);
|
||||
|
||||
for (int j = 0; j < b_size; ++j) {
|
||||
auto& node = graph[batch_size * i + j];
|
||||
node.resize(k);
|
||||
auto start_pos = j * K + 1;
|
||||
for (int m = 0, cursor = start_pos; m < k && cursor < start_pos + k; ++m, ++cursor) {
|
||||
node[m] = res[cursor];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace knowhere {
|
||||
|
||||
|
@ -47,6 +48,9 @@ class GPUIDMAP : public IDMAP, public GPUIndex {
|
|||
VectorIndexPtr
|
||||
CopyGpuToGpu(const int64_t& device_id, const Config& config) override;
|
||||
|
||||
void
|
||||
GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config);
|
||||
|
||||
protected:
|
||||
void
|
||||
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override;
|
||||
|
|
|
@ -121,6 +121,26 @@ IDMAP::Add(const DatasetPtr& dataset, const Config& config) {
|
|||
index_->add_with_ids(rows, (float*)p_data, p_ids);
|
||||
}
|
||||
|
||||
void
|
||||
IDMAP::AddWithoutId(const DatasetPtr& dataset, const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
GETTENSOR(dataset)
|
||||
|
||||
// TODO: magic here.
|
||||
auto array = dataset->array()[0];
|
||||
|
||||
std::vector<int64_t> new_ids(rows);
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
new_ids[i] = i;
|
||||
}
|
||||
|
||||
index_->add_with_ids(rows, (float*)p_data, new_ids.data());
|
||||
}
|
||||
|
||||
int64_t
|
||||
IDMAP::Count() {
|
||||
return index_->ntotal;
|
||||
|
|
|
@ -34,20 +34,31 @@ class IDMAP : public VectorIndex, public FaissBaseIndex {
|
|||
|
||||
BinarySet
|
||||
Serialize() override;
|
||||
|
||||
void
|
||||
Load(const BinarySet& index_binary) override;
|
||||
|
||||
void
|
||||
Train(const Config& config);
|
||||
|
||||
DatasetPtr
|
||||
Search(const DatasetPtr& dataset, const Config& config) override;
|
||||
|
||||
int64_t
|
||||
Count() override;
|
||||
|
||||
// VectorIndexPtr
|
||||
// Clone() override;
|
||||
|
||||
int64_t
|
||||
Dimension() override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr& dataset, const Config& config) override;
|
||||
|
||||
void
|
||||
AddWithoutId(const DatasetPtr& dataset, const Config& config);
|
||||
|
||||
VectorIndexPtr
|
||||
CopyCpuToGpu(const int64_t& device_id, const Config& config);
|
||||
void
|
||||
|
@ -55,12 +66,15 @@ class IDMAP : public VectorIndex, public FaissBaseIndex {
|
|||
|
||||
virtual float*
|
||||
GetRawVectors();
|
||||
|
||||
virtual int64_t*
|
||||
GetRawIds();
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg);
|
||||
|
||||
protected:
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
|
|
|
@ -195,35 +195,34 @@ IVF::Dimension() {
|
|||
}
|
||||
|
||||
void
|
||||
IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config) {
|
||||
GETTENSOR(dataset)
|
||||
|
||||
IVF::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) {
|
||||
int64_t K = k + 1;
|
||||
auto ntotal = Count();
|
||||
|
||||
auto batch_size = 100;
|
||||
size_t dim = config->d;
|
||||
auto batch_size = 1000;
|
||||
auto tail_batch_size = ntotal % batch_size;
|
||||
auto batch_search_count = ntotal / batch_size;
|
||||
auto total_search_count = tail_batch_size == 0 ? batch_search_count : batch_search_count + 1;
|
||||
|
||||
std::vector<float> res_dis(k * batch_size);
|
||||
std::vector<float> res_dis(K * batch_size);
|
||||
graph.resize(ntotal);
|
||||
Graph res_vec(total_search_count);
|
||||
for (int i = 0; i < total_search_count; ++i) {
|
||||
auto b_size = i == total_search_count - 1 && tail_batch_size != 0 ? tail_batch_size : batch_size;
|
||||
auto b_size = (i == (total_search_count - 1)) && tail_batch_size != 0 ? tail_batch_size : batch_size;
|
||||
|
||||
auto& res = res_vec[i];
|
||||
res.resize(k * b_size);
|
||||
res.resize(K * b_size);
|
||||
|
||||
auto xq = p_data + batch_size * dim * i;
|
||||
search_impl(b_size, (float*)xq, k, res_dis.data(), res.data(), config);
|
||||
auto xq = data + batch_size * dim * i;
|
||||
search_impl(b_size, (float*)xq, K, res_dis.data(), res.data(), config);
|
||||
|
||||
int tmp = 0;
|
||||
for (int j = 0; j < b_size; ++j) {
|
||||
auto& node = graph[batch_size * i + j];
|
||||
node.resize(k);
|
||||
for (int m = 0; m < k && tmp < k * b_size; ++m, ++tmp) {
|
||||
// TODO(linxj): avoid memcopy here.
|
||||
node[m] = res[tmp];
|
||||
auto start_pos = j * K + 1;
|
||||
for (int m = 0, cursor = start_pos; m < k && cursor < start_pos + k; ++m, ++cursor) {
|
||||
node[m] = res[cursor];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ class IVF : public VectorIndex, public FaissBaseIndex {
|
|||
Search(const DatasetPtr& dataset, const Config& config) override;
|
||||
|
||||
void
|
||||
GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config);
|
||||
GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config);
|
||||
|
||||
BinarySet
|
||||
Serialize() override;
|
||||
|
|
|
@ -20,9 +20,12 @@
|
|||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/common/Timer.h"
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include "knowhere/index/vector_index/IndexGPUIDMAP.h"
|
||||
#include "knowhere/index/vector_index/IndexGPUIVF.h"
|
||||
#include "knowhere/index/vector_index/helpers/Cloner.h"
|
||||
#endif
|
||||
|
||||
#include "knowhere/index/vector_index/IndexIDMAP.h"
|
||||
#include "knowhere/index/vector_index/IndexIVF.h"
|
||||
#include "knowhere/index/vector_index/nsg/NSG.h"
|
||||
#include "knowhere/index/vector_index/nsg/NSGIO.h"
|
||||
|
@ -110,33 +113,36 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) {
|
|||
|
||||
IndexModelPtr
|
||||
NSG::Train(const DatasetPtr& dataset, const Config& config) {
|
||||
config->Dump();
|
||||
auto build_cfg = std::dynamic_pointer_cast<NSGCfg>(config);
|
||||
if (build_cfg != nullptr) {
|
||||
build_cfg->CheckValid(); // throw exception
|
||||
}
|
||||
|
||||
// TODO(linxj): dev IndexFactory, support more IndexType
|
||||
auto idmap = std::make_shared<IDMAP>();
|
||||
idmap->Train(config);
|
||||
idmap->AddWithoutId(dataset, config);
|
||||
Graph knng;
|
||||
float* raw_data = idmap->GetRawVectors();
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
if (build_cfg->gpu_id == knowhere::INVALID_VALUE) {
|
||||
auto preprocess_index = std::make_shared<IVF>();
|
||||
auto model = preprocess_index->Train(dataset, config);
|
||||
preprocess_index->set_index_model(model);
|
||||
preprocess_index->AddWithoutIds(dataset, config);
|
||||
preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config);
|
||||
preprocess_index->Add(dataset, config);
|
||||
preprocess_index->GenGraph(raw_data, build_cfg->knng, knng, config);
|
||||
} else {
|
||||
auto preprocess_index = std::make_shared<GPUIVF>(build_cfg->gpu_id);
|
||||
auto model = preprocess_index->Train(dataset, config);
|
||||
preprocess_index->set_index_model(model);
|
||||
preprocess_index->AddWithoutIds(dataset, config);
|
||||
preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config);
|
||||
// TODO(linxj): use ivf instead?
|
||||
auto gpu_idx = cloner::CopyCpuToGpu(idmap, build_cfg->gpu_id, config);
|
||||
auto gpu_idmap = std::dynamic_pointer_cast<GPUIDMAP>(gpu_idx);
|
||||
gpu_idmap->GenGraph(raw_data, build_cfg->knng, knng, config);
|
||||
}
|
||||
#else
|
||||
auto preprocess_index = std::make_shared<IVF>();
|
||||
auto model = preprocess_index->Train(dataset, config);
|
||||
preprocess_index->set_index_model(model);
|
||||
preprocess_index->AddWithoutIds(dataset, config);
|
||||
preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config);
|
||||
preprocess_index->GenGraph(raw_data, build_cfg->knng, knng, config);
|
||||
#endif
|
||||
|
||||
algo::BuildParams b_params;
|
||||
|
@ -144,10 +150,10 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) {
|
|||
b_params.out_degree = build_cfg->out_degree;
|
||||
b_params.search_length = build_cfg->search_length;
|
||||
|
||||
GETTENSOR(dataset)
|
||||
auto array = dataset->array()[0];
|
||||
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
|
||||
|
||||
GETTENSOR(dataset)
|
||||
index_ = std::make_shared<algo::NsgIndex>(dim, rows);
|
||||
index_->SetKnnGraph(knng);
|
||||
index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params);
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <stack>
|
||||
#include <utility>
|
||||
|
@ -29,12 +28,13 @@
|
|||
#include "knowhere/index/vector_index/nsg/NSG.h"
|
||||
#include "knowhere/index/vector_index/nsg/NSGHelper.h"
|
||||
|
||||
// TODO: enable macro
|
||||
//#include <gperftools/profiler.h>
|
||||
|
||||
namespace knowhere {
|
||||
namespace algo {
|
||||
|
||||
unsigned int seed = 100;
|
||||
|
||||
NsgIndex::NsgIndex(const size_t& dimension, const size_t& n, METRICTYPE metric)
|
||||
: dimension(dimension), ntotal(n), metric_type(metric) {
|
||||
switch (metric) {
|
||||
|
@ -55,8 +55,6 @@ NsgIndex::~NsgIndex() {
|
|||
|
||||
void
|
||||
NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
|
||||
TimeRecorder rc("NSG");
|
||||
|
||||
ntotal = nb;
|
||||
ori_data_ = new float[ntotal * dimension];
|
||||
ids_ = new int64_t[ntotal];
|
||||
|
@ -67,25 +65,17 @@ NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const
|
|||
out_degree = parameters.out_degree;
|
||||
candidate_pool_size = parameters.candidate_pool_size;
|
||||
|
||||
TimeRecorder rc("NSG", 1);
|
||||
|
||||
InitNavigationPoint();
|
||||
rc.RecordSection("init");
|
||||
|
||||
Link();
|
||||
rc.RecordSection("Link");
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// int count = 0;
|
||||
// for (int i = 0; i < ntotal; ++i) {
|
||||
// count += nsg[i].size();
|
||||
//}
|
||||
/////
|
||||
|
||||
CheckConnectivity();
|
||||
rc.RecordSection("Connect");
|
||||
|
||||
//>> Debug code
|
||||
///
|
||||
int total_degree = 0;
|
||||
for (size_t i = 0; i < ntotal; ++i) {
|
||||
total_degree += nsg[i].size();
|
||||
|
@ -93,9 +83,17 @@ NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const
|
|||
|
||||
KNOWHERE_LOG_DEBUG << "Graph physical size: " << total_degree * sizeof(node_t) / 1024 / 1024 << "m";
|
||||
KNOWHERE_LOG_DEBUG << "Average degree: " << total_degree / ntotal;
|
||||
/////
|
||||
|
||||
is_trained = true;
|
||||
|
||||
// Debug code
|
||||
// for (size_t i = 0; i < ntotal; i++) {
|
||||
// auto& x = nsg[i];
|
||||
// for (size_t j = 0; j < x.size(); j++) {
|
||||
// std::cout << "id: " << x[j] << std::endl;
|
||||
// }
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -114,28 +112,22 @@ NsgIndex::InitNavigationPoint() {
|
|||
}
|
||||
|
||||
// select navigation point
|
||||
std::vector<Neighbor> resset, fullset;
|
||||
unsigned int seed = 100;
|
||||
std::vector<Neighbor> resset;
|
||||
navigation_point = rand_r(&seed) % ntotal; // random initialize navigating point
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// navigation_point = drand48();
|
||||
/////
|
||||
|
||||
GetNeighbors(center, resset, knng);
|
||||
navigation_point = resset[0].id;
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// Debug code
|
||||
// std::cout << "ep: " << navigation_point << std::endl;
|
||||
/////
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// for (int k = 0; k < resset.size(); ++k) {
|
||||
// std::cout << "id: " << resset[k].id << ", dis: " << resset[k].distance << std::endl;
|
||||
// }
|
||||
// std::cout << std::endl;
|
||||
//
|
||||
// std::cout << "ep: " << navigation_point << std::endl;
|
||||
//
|
||||
// float r1 = distance_->Compare(center, ori_data_ + navigation_point * dimension, dimension);
|
||||
// assert(r1 == resset[0].distance);
|
||||
/////
|
||||
}
|
||||
|
||||
// Specify Link
|
||||
|
@ -149,7 +141,9 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
|||
// TODO: throw exception here.
|
||||
}
|
||||
|
||||
std::vector<node_t> init_ids;
|
||||
resset.resize(search_length);
|
||||
std::vector<node_t> init_ids(buffer_size);
|
||||
// std::vector<node_t> init_ids;
|
||||
|
||||
{
|
||||
/*
|
||||
|
@ -158,25 +152,26 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
|||
size_t count = 0;
|
||||
|
||||
// Get all neighbors
|
||||
for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||
init_ids.push_back(graph[navigation_point][i]);
|
||||
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
|
||||
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||
// init_ids.push_back(graph[navigation_point][i]);
|
||||
init_ids[i] = graph[navigation_point][i];
|
||||
has_calculated_dist[init_ids[i]] = true;
|
||||
++count;
|
||||
}
|
||||
|
||||
unsigned int seed = 100;
|
||||
while (count < buffer_size) {
|
||||
node_t id = rand_r(&seed) % ntotal;
|
||||
if (has_calculated_dist[id])
|
||||
continue; // duplicate id
|
||||
init_ids.push_back(id);
|
||||
// init_ids.push_back(id);
|
||||
init_ids[count] = id;
|
||||
++count;
|
||||
has_calculated_dist[id] = true;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
resset.resize(init_ids.size());
|
||||
// resset.resize(init_ids.size());
|
||||
|
||||
// init resset and sort by distance
|
||||
for (size_t i = 0; i < init_ids.size(); ++i) {
|
||||
|
@ -190,7 +185,7 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
|||
float dist = distance_->Compare(ori_data_ + dimension * id, query, dimension);
|
||||
resset[i] = Neighbor(id, dist, false);
|
||||
|
||||
///////////// difference from other GetNeighbors ///////////////
|
||||
//// difference from other GetNeighbors
|
||||
fullset.push_back(resset[i]);
|
||||
///////////////////////////////////////
|
||||
}
|
||||
|
@ -247,8 +242,10 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
|||
// TODO: throw exception here.
|
||||
}
|
||||
|
||||
std::vector<node_t> init_ids;
|
||||
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; // TODO: ?
|
||||
// std::vector<node_t> init_ids;
|
||||
std::vector<node_t> init_ids(buffer_size);
|
||||
resset.resize(buffer_size);
|
||||
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0};
|
||||
|
||||
{
|
||||
/*
|
||||
|
@ -257,24 +254,26 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
|||
size_t count = 0;
|
||||
|
||||
// Get all neighbors
|
||||
for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||
init_ids.push_back(graph[navigation_point][i]);
|
||||
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
|
||||
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||
// init_ids.push_back(graph[navigation_point][i]);
|
||||
init_ids[i] = graph[navigation_point][i];
|
||||
has_calculated_dist[init_ids[i]] = true;
|
||||
++count;
|
||||
}
|
||||
unsigned int seed = 100;
|
||||
while (count < buffer_size) {
|
||||
node_t id = rand_r(&seed) % ntotal;
|
||||
if (has_calculated_dist[id])
|
||||
continue; // duplicate id
|
||||
init_ids.push_back(id);
|
||||
// init_ids.push_back(id);
|
||||
init_ids[count] = id;
|
||||
++count;
|
||||
has_calculated_dist[id] = true;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
resset.resize(init_ids.size());
|
||||
// resset.resize(init_ids.size());
|
||||
|
||||
// init resset and sort by distance
|
||||
for (size_t i = 0; i < init_ids.size(); ++i) {
|
||||
|
@ -333,13 +332,15 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
|||
|
||||
void
|
||||
NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph& graph, SearchParams* params) {
|
||||
size_t& buffer_size = params ? params->search_length : search_length;
|
||||
size_t buffer_size = params ? params->search_length : search_length;
|
||||
|
||||
if (buffer_size > ntotal) {
|
||||
// TODO: throw exception here.
|
||||
}
|
||||
|
||||
std::vector<node_t> init_ids;
|
||||
// std::vector<node_t> init_ids;
|
||||
std::vector<node_t> init_ids(buffer_size);
|
||||
resset.resize(buffer_size);
|
||||
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0};
|
||||
|
||||
{
|
||||
|
@ -349,33 +350,33 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
|||
size_t count = 0;
|
||||
|
||||
// Get all neighbors
|
||||
for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||
init_ids.push_back(graph[navigation_point][i]);
|
||||
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
|
||||
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||
// init_ids.push_back(graph[navigation_point][i]);
|
||||
init_ids[i] = graph[navigation_point][i];
|
||||
has_calculated_dist[init_ids[i]] = true;
|
||||
++count;
|
||||
}
|
||||
unsigned int seed = 100;
|
||||
while (count < buffer_size) {
|
||||
node_t id = rand_r(&seed) % ntotal;
|
||||
if (has_calculated_dist[id])
|
||||
continue; // duplicate id
|
||||
init_ids.push_back(id);
|
||||
// init_ids.push_back(id);
|
||||
init_ids[count] = id;
|
||||
++count;
|
||||
has_calculated_dist[id] = true;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
resset.resize(init_ids.size());
|
||||
// resset.resize(init_ids.size());
|
||||
|
||||
// init resset and sort by distance
|
||||
for (size_t i = 0; i < init_ids.size(); ++i) {
|
||||
node_t id = init_ids[i];
|
||||
|
||||
// assert(id < ntotal);
|
||||
if (id >= static_cast<node_t>(ntotal)) {
|
||||
KNOWHERE_THROW_MSG("Build Index Error, id > ntotal");
|
||||
continue;
|
||||
}
|
||||
|
||||
float dist = distance_->Compare(ori_data_ + id * dimension, query, dimension);
|
||||
|
@ -383,13 +384,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
|||
}
|
||||
std::sort(resset.begin(), resset.end()); // sort by distance
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// for (int j = 0; j < buffer_size; ++j) {
|
||||
// std::cout << "resset_id: " << resset[j].id << ", resset_dist: " << resset[j].distance << std::endl;
|
||||
//}
|
||||
/////
|
||||
|
||||
// search nearest neighbor
|
||||
size_t cursor = 0;
|
||||
while (cursor < buffer_size) {
|
||||
|
@ -410,7 +404,8 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
|||
|
||||
if (dist >= resset[buffer_size - 1].distance)
|
||||
continue;
|
||||
///////////// difference from other GetNeighbors ///////////////
|
||||
|
||||
//// difference from other GetNeighbors
|
||||
Neighbor nn(id, dist, false);
|
||||
///////////////////////////////////////
|
||||
|
||||
|
@ -440,59 +435,50 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
|||
|
||||
void
|
||||
NsgIndex::Link() {
|
||||
auto cut_graph_dist = new float[ntotal * out_degree];
|
||||
float* cut_graph_dist = new float[ntotal * out_degree];
|
||||
nsg.resize(ntotal);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<Neighbor> fullset;
|
||||
std::vector<Neighbor> temp;
|
||||
boost::dynamic_bitset<> flags{ntotal, 0}; // TODO: ?
|
||||
boost::dynamic_bitset<> flags{ntotal, 0};
|
||||
#pragma omp for schedule(dynamic, 100)
|
||||
for (size_t n = 0; n < ntotal; ++n) {
|
||||
fullset.clear();
|
||||
temp.clear();
|
||||
flags.reset();
|
||||
GetNeighbors(ori_data_ + dimension * n, temp, fullset, flags);
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// float r1 = distance_->Compare(ori_data_ + n * dimension, ori_data_ + temp[0].id * dimension, dimension);
|
||||
// assert(r1 == temp[0].distance);
|
||||
/////
|
||||
SyncPrune(n, fullset, flags, cut_graph_dist);
|
||||
}
|
||||
|
||||
// Debug code
|
||||
// std::cout << "ep: " << 0 << std::endl;
|
||||
// for (int k = 0; k < fullset.size(); ++k) {
|
||||
// std::cout << "id: " << fullset[k].id << ", dis: " << fullset[k].distance << std::endl;
|
||||
// }
|
||||
}
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// auto bak_nsg = nsg;
|
||||
/////
|
||||
// Debug code
|
||||
// for (size_t i = 0; i < ntotal; i++)
|
||||
// {
|
||||
// auto& x = nsg[i];
|
||||
// for (size_t j=0; j < x.size(); j++)
|
||||
// {
|
||||
// std::cout << "id: " << x[j] << std::endl;
|
||||
// }
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
|
||||
knng.clear();
|
||||
knng.shrink_to_fit();
|
||||
|
||||
std::vector<std::mutex> mutex_vec(ntotal);
|
||||
|
||||
#pragma omp for schedule(dynamic, 100)
|
||||
for (unsigned n = 0; n < ntotal; ++n) {
|
||||
InterInsert(n, mutex_vec, cut_graph_dist);
|
||||
}
|
||||
|
||||
delete[] cut_graph_dist;
|
||||
|
||||
//>> Debug code
|
||||
/////
|
||||
// int count = 0;
|
||||
// for (int i = 0; i < ntotal; ++i) {
|
||||
// if (bak_nsg[i].size() != nsg[i].size()) {
|
||||
// //count += nsg[i].size() - bak_nsg[i].size();
|
||||
// count += nsg[i].size();
|
||||
// }
|
||||
//}
|
||||
/////
|
||||
|
||||
for (size_t i = 0; i < ntotal; ++i) {
|
||||
nsg[i].shrink_to_fit();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -654,9 +640,9 @@ NsgIndex::DFS(size_t root, boost::dynamic_bitset<>& has_linked, int64_t& linked_
|
|||
std::stack<size_t> s;
|
||||
s.push(root);
|
||||
if (!has_linked[root]) {
|
||||
linked_count++; // not link
|
||||
has_linked[root] = true; // link start...
|
||||
linked_count++; // not link
|
||||
}
|
||||
has_linked[root] = true; // link start...
|
||||
|
||||
while (!s.empty()) {
|
||||
size_t next = ntotal + 1;
|
||||
|
@ -709,7 +695,6 @@ NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root
|
|||
}
|
||||
}
|
||||
if (found == 0) {
|
||||
unsigned int seed = 100;
|
||||
while (true) { // random a linked-node and add unlinked-node as its neighbor
|
||||
size_t rid = rand_r(&seed) % ntotal;
|
||||
if (has_linked[rid]) {
|
||||
|
@ -726,7 +711,10 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
|||
int64_t* ids, SearchParams& params) {
|
||||
std::vector<std::vector<Neighbor>> resset(nq);
|
||||
|
||||
params.search_length = k;
|
||||
if (k >= 45) {
|
||||
params.search_length = k;
|
||||
}
|
||||
|
||||
TimeRecorder rc("NsgIndex::search", 1);
|
||||
// TODO(linxj): when to use openmp
|
||||
if (nq <= 4) {
|
||||
|
@ -734,7 +722,7 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
|||
} else {
|
||||
#pragma omp parallel for
|
||||
for (unsigned int i = 0; i < nq; ++i) {
|
||||
auto single_query = query + i * dim;
|
||||
const float* single_query = query + i * dim;
|
||||
GetNeighbors(single_query, resset[i], nsg, ¶ms);
|
||||
}
|
||||
}
|
||||
|
@ -759,13 +747,6 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
|||
}
|
||||
rc.RecordSection("merge");
|
||||
|
||||
//>> Debug: test single insert
|
||||
// int x_0 = resset[0].size();
|
||||
// for (int l = 0; l < resset[0].size(); ++l) {
|
||||
// resset[0].pop_back();
|
||||
//}
|
||||
// resset.clear();
|
||||
|
||||
// ProfilerStart("xx.prof");
|
||||
// std::vector<Neighbor> resset;
|
||||
// GetNeighbors(query, resset, nsg, ¶ms);
|
||||
|
@ -781,30 +762,5 @@ NsgIndex::SetKnnGraph(Graph& g) {
|
|||
knng = std::move(g);
|
||||
}
|
||||
|
||||
// void NsgIndex::GetKnnGraphFromFile() {
|
||||
// //std::string filename = "sift.1M.50NN.graph";
|
||||
// std::string filename = "sift.50NN.graph";
|
||||
//
|
||||
// std::ifstream in(filename, std::ios::binary);
|
||||
// unsigned k;
|
||||
// in.read((char *) &k, sizeof(unsigned));
|
||||
// in.seekg(0, std::ios::end);
|
||||
// std::ios::pos_type ss = in.tellg();
|
||||
// size_t fsize = (size_t) ss;
|
||||
// size_t num = (unsigned) (fsize / (k + 1) / 4);
|
||||
// in.seekg(0, std::ios::beg);
|
||||
//
|
||||
// knng.resize(num);
|
||||
// knng.reserve(num);
|
||||
// unsigned kk = (k + 3) / 4 * 4;
|
||||
// for (size_t i = 0; i < num; i++) {
|
||||
// in.seekg(4, std::ios::cur);
|
||||
// knng[i].resize(k);
|
||||
// knng[i].reserve(kk);
|
||||
// in.read((char *) knng[i].data(), k * sizeof(unsigned));
|
||||
// }
|
||||
// in.close();
|
||||
//}
|
||||
|
||||
} // namespace algo
|
||||
} // namespace knowhere
|
||||
|
|
|
@ -52,9 +52,9 @@ class NsgIndex {
|
|||
Distance* distance_;
|
||||
|
||||
float* ori_data_;
|
||||
int64_t* ids_; // TODO: support different type
|
||||
Graph nsg; // final graph
|
||||
Graph knng; // reset after build
|
||||
int64_t* ids_;
|
||||
Graph nsg; // final graph
|
||||
Graph knng; // reset after build
|
||||
|
||||
node_t navigation_point; // offset of node in origin data
|
||||
|
||||
|
@ -134,9 +134,6 @@ class NsgIndex {
|
|||
|
||||
void
|
||||
FindUnconnectedNode(boost::dynamic_bitset<>& flags, int64_t& root);
|
||||
|
||||
// private:
|
||||
// void GetKnnGraphFromFile();
|
||||
};
|
||||
|
||||
} // namespace algo
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#include "knowhere/index/vector_index/FaissBaseIndex.h"
|
||||
#include "knowhere/index/vector_index/IndexNSG.h"
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include "knowhere/index/vector_index/IndexGPUIDMAP.h"
|
||||
#include "knowhere/index/vector_index/helpers/Cloner.h"
|
||||
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
|
||||
#endif
|
||||
|
||||
|
@ -50,6 +52,7 @@ class NSGInterfaceTest : public DataGen, public ::testing::Test {
|
|||
|
||||
auto tmp_conf = std::make_shared<knowhere::NSGCfg>();
|
||||
tmp_conf->gpu_id = DEVICEID;
|
||||
tmp_conf->d = 256;
|
||||
tmp_conf->knng = 20;
|
||||
tmp_conf->nprobe = 8;
|
||||
tmp_conf->nlist = 163;
|
||||
|
@ -116,3 +119,174 @@ TEST_F(NSGInterfaceTest, comparetest) {
|
|||
}
|
||||
tc.RecordSection("IP");
|
||||
}
|
||||
|
||||
//#include <src/index/knowhere/knowhere/index/vector_index/nsg/OriNSG.h>
|
||||
// TEST(test, ori_nsg) {
|
||||
// // float* p_data = nullptr;
|
||||
// size_t rows, dim;
|
||||
// char* filename = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_base.fvecs";
|
||||
// // loads_data(filename, p_data, rows, dim);
|
||||
// float* p_data = fvecs_read(filename, &dim, &rows);
|
||||
//
|
||||
// std::string knng_filename =
|
||||
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.1M.50NN.graph";
|
||||
// std::vector<std::vector<int64_t>> knng;
|
||||
// Load_nns_graph(knng, knng_filename.c_str());
|
||||
//
|
||||
// // float* search_data = nullptr;
|
||||
// size_t nq, search_dim;
|
||||
// char* searchfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_query.fvecs";
|
||||
// // loads_data(searchfile, search_data, nq, search_dim);
|
||||
// float* search_data = fvecs_read(searchfile, &search_dim, &nq);
|
||||
// assert(search_dim == dim);
|
||||
//
|
||||
// size_t k, nq2;
|
||||
// char* gtfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_groundtruth.ivecs";
|
||||
// int* gt_int = ivecs_read(gtfile, &k, &nq2);
|
||||
// int64_t* gt = new int64_t[k * nq2];
|
||||
// for (int i = 0; i < k * nq2; i++) {
|
||||
// gt[i] = gt_int[i];
|
||||
// }
|
||||
// delete[] gt_int;
|
||||
//
|
||||
// std::vector<int64_t> store_ids(rows);
|
||||
// for (int i = 0; i < rows; ++i) {
|
||||
// store_ids[i] = i;
|
||||
// }
|
||||
//
|
||||
// int64_t* I = new int64_t[nq * k];
|
||||
// float* D = new float[nq * k];
|
||||
//#if 0
|
||||
// efanna2e::Parameters params;
|
||||
// params.Set<int64_t>("L", 50);
|
||||
// params.Set<int64_t>("R", 55);
|
||||
// params.Set<int64_t>("C", 300);
|
||||
// auto orinsg = std::make_shared<efanna2e::IndexNSG>(dim, rows, efanna2e::Metric::L2, nullptr);
|
||||
// orinsg->Load_nn_graph(knng);
|
||||
// orinsg->Build(rows, (float*)p_data, params);
|
||||
//
|
||||
// efanna2e::Parameters paras;
|
||||
// paras.Set<unsigned>("L_search", 45);
|
||||
// paras.Set<unsigned>("P_search",100);
|
||||
// k = 10;
|
||||
// std::vector<std::vector<int64_t> > res;
|
||||
// for (unsigned i = 0; i < nq; i++) {
|
||||
// std::vector<int64_t> tmp(k);
|
||||
// orinsg->Search(search_data + i * dim, p_data, k, paras, tmp.data());
|
||||
// res.push_back(tmp);
|
||||
// }
|
||||
// }
|
||||
//#else
|
||||
// knowhere::algo::BuildParams params;
|
||||
// params.search_length = 50;
|
||||
// params.out_degree = 55;
|
||||
// params.candidate_pool_size = 300;
|
||||
// auto nsg = std::make_shared<knowhere::algo::NsgIndex>(dim, rows);
|
||||
//#if 1
|
||||
// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
|
||||
// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data());
|
||||
// auto config = std::make_shared<knowhere::IVFCfg>();
|
||||
// config->d = dim;
|
||||
// config->gpu_id = 0;
|
||||
// config->metric_type = knowhere::METRICTYPE::L2;
|
||||
// auto preprocess_index = std::make_shared<knowhere::IDMAP>();
|
||||
// preprocess_index->Train(config);
|
||||
// preprocess_index->AddWithoutId(dataset, config);
|
||||
// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config);
|
||||
// auto ss = std::dynamic_pointer_cast<knowhere::GPUIDMAP>(xx);
|
||||
//
|
||||
// std::vector<std::vector<int64_t>> kng;
|
||||
// ss->GenGraph(p_data, 50, kng, config);
|
||||
// nsg->SetKnnGraph(kng);
|
||||
// knowhere::FaissGpuResourceMgr::GetInstance().Free();
|
||||
//#else
|
||||
// nsg->SetKnnGraph(knng);
|
||||
//#endif
|
||||
// nsg->Build_with_ids(rows, (float*)p_data, store_ids.data(), params);
|
||||
// knowhere::algo::SearchParams s_params;
|
||||
// s_params.search_length = 45;
|
||||
// nsg->Search(search_data, nq, dim, k, D, I, s_params);
|
||||
//#endif
|
||||
//
|
||||
// int n_1 = 0, n_10 = 0, n_100 = 0;
|
||||
// for (int i = 0; i < nq; i++) {
|
||||
// int gt_nn = gt[i * k];
|
||||
// for (int j = 0; j < k; j++) {
|
||||
// if (I[i * k + j] == gt_nn) {
|
||||
// if (j < 1)
|
||||
// n_1++;
|
||||
// if (j < 10)
|
||||
// n_10++;
|
||||
// if (j < 100)
|
||||
// n_100++;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// printf("R@1 = %.4f\n", n_1 / float(nq));
|
||||
// printf("R@10 = %.4f\n", n_10 / float(nq));
|
||||
// printf("R@100 = %.4f\n", n_100 / float(nq));
|
||||
//}
|
||||
//
|
||||
// TEST(testxx, test_idmap){
|
||||
// int k = 50;
|
||||
// std::string knng_filename =
|
||||
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.50NN.graph";
|
||||
// std::vector<std::vector<int64_t>> gt_knng;
|
||||
// Load_nns_graph(gt_knng, knng_filename.c_str());
|
||||
//
|
||||
// size_t rows, dim;
|
||||
// char* filename =
|
||||
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/siftsmall/siftsmall_base.fvecs";
|
||||
// float* p_data = fvecs_read(filename, &dim, &rows);
|
||||
//
|
||||
// std::vector<int64_t> store_ids(rows);
|
||||
// for (int i = 0; i < rows; ++i) {
|
||||
// store_ids[i] = i;
|
||||
// }
|
||||
//
|
||||
// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
|
||||
// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data());
|
||||
// auto config = std::make_shared<knowhere::IVFCfg>();
|
||||
// config->d = dim;
|
||||
// config->gpu_id = 0;
|
||||
// config->metric_type = knowhere::METRICTYPE::L2;
|
||||
// auto preprocess_index = std::make_shared<knowhere::IDMAP>();
|
||||
// preprocess_index->Train(config);
|
||||
// preprocess_index->AddWithoutId(dataset, config);
|
||||
// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config);
|
||||
// auto ss = std::dynamic_pointer_cast<knowhere::GPUIDMAP>(xx);
|
||||
// std::vector<std::vector<int64_t>> idmap_knng;
|
||||
// ss->GenGraph(p_data, k, idmap_knng,config);
|
||||
// knowhere::FaissGpuResourceMgr::GetInstance().Free();
|
||||
//
|
||||
// int n_1 = 0, n_10 = 0, n_100 = 0;
|
||||
// for (int i = 0; i < rows; i++) {
|
||||
// int gt_nn = gt_knng[i][0];
|
||||
// int l_n_1 = 0;
|
||||
// int l_n_10 = 0;
|
||||
// int l_n_100 = 0;
|
||||
// for (int j = 0; j < k; j++) {
|
||||
// if (idmap_knng[i][j] == gt_nn) {
|
||||
// if (j < 1){
|
||||
// n_1++;
|
||||
// l_n_1++;
|
||||
// }
|
||||
// if (j < 10){
|
||||
// n_10++;
|
||||
// l_n_10++;
|
||||
// }
|
||||
// if (j < 100){
|
||||
// n_100++;
|
||||
// l_n_100++;
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// if ((j == k-1) && (l_n_100 == 0)){
|
||||
// std::cout << "error id: " << i << std::endl;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// printf("R@1 = %.4f\n", n_1 / float(rows));
|
||||
// printf("R@10 = %.4f\n", n_10 / float(rows));
|
||||
// printf("R@100 = %.4f\n", n_100 / float(rows));
|
||||
//}
|
||||
|
|
|
@ -178,3 +178,72 @@ PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
|
|||
std::cout << "id\n" << ss_id.str() << std::endl;
|
||||
std::cout << "dist\n" << ss_dist.str() << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
Load_nns_graph(std::vector<std::vector<int64_t>>& final_graph, const char* filename) {
|
||||
std::vector<std::vector<unsigned>> knng;
|
||||
|
||||
std::ifstream in(filename, std::ios::binary);
|
||||
unsigned k;
|
||||
in.read((char*)&k, sizeof(unsigned));
|
||||
in.seekg(0, std::ios::end);
|
||||
std::ios::pos_type ss = in.tellg();
|
||||
size_t fsize = (size_t)ss;
|
||||
size_t num = (size_t)(fsize / (k + 1) / 4);
|
||||
in.seekg(0, std::ios::beg);
|
||||
|
||||
knng.resize(num);
|
||||
knng.reserve(num);
|
||||
int64_t kk = (k + 3) / 4 * 4;
|
||||
for (size_t i = 0; i < num; i++) {
|
||||
in.seekg(4, std::ios::cur);
|
||||
knng[i].resize(k);
|
||||
knng[i].reserve(kk);
|
||||
in.read((char*)knng[i].data(), k * sizeof(unsigned));
|
||||
}
|
||||
in.close();
|
||||
|
||||
final_graph.resize(knng.size());
|
||||
for (int i = 0; i < knng.size(); ++i) {
|
||||
final_graph[i].resize(knng[i].size());
|
||||
for (int j = 0; j < knng[i].size(); ++j) {
|
||||
final_graph[i][j] = knng[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float*
|
||||
fvecs_read(const char* fname, size_t* d_out, size_t* n_out) {
|
||||
FILE* f = fopen(fname, "r");
|
||||
if (!f) {
|
||||
fprintf(stderr, "could not open %s\n", fname);
|
||||
perror("");
|
||||
abort();
|
||||
}
|
||||
int d;
|
||||
fread(&d, 1, sizeof(int), f);
|
||||
assert((d > 0 && d < 1000000) || !"unreasonable dimension");
|
||||
fseek(f, 0, SEEK_SET);
|
||||
struct stat st;
|
||||
fstat(fileno(f), &st);
|
||||
size_t sz = st.st_size;
|
||||
assert(sz % ((d + 1) * 4) == 0 || !"weird file size");
|
||||
size_t n = sz / ((d + 1) * 4);
|
||||
|
||||
*d_out = d;
|
||||
*n_out = n;
|
||||
float* x = new float[n * (d + 1)];
|
||||
size_t nr = fread(x, sizeof(float), n * (d + 1), f);
|
||||
assert(nr == n * (d + 1) || !"could not read whole file");
|
||||
|
||||
// shift array to remove row headers
|
||||
for (size_t i = 0; i < n; i++) memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x));
|
||||
|
||||
fclose(f);
|
||||
return x;
|
||||
}
|
||||
|
||||
int* // not very clean, but works as long as sizeof(int) == sizeof(float)
|
||||
ivecs_read(const char* fname, size_t* d_out, size_t* n_out) {
|
||||
return (int*)fvecs_read(fname, d_out, n_out);
|
||||
}
|
||||
|
|
|
@ -93,3 +93,12 @@ struct FileIOReader {
|
|||
size_t
|
||||
operator()(void* ptr, size_t size);
|
||||
};
|
||||
|
||||
void
|
||||
Load_nns_graph(std::vector<std::vector<int64_t>>& final_graph_, const char* filename);
|
||||
|
||||
float*
|
||||
fvecs_read(const char* fname, size_t* d_out, size_t* n_out);
|
||||
|
||||
int*
|
||||
ivecs_read(const char* fname, size_t* d_out, size_t* n_out);
|
||||
|
|
|
@ -204,10 +204,11 @@ NSGConfAdapter::Match(const TempMetaConf& metaconf) {
|
|||
auto scale_factor = round(metaconf.dim / 128.0);
|
||||
scale_factor = scale_factor >= 4 ? 4 : scale_factor;
|
||||
conf->nprobe = int64_t(conf->nlist * 0.01);
|
||||
conf->knng = 40 + 10 * scale_factor; // the size of knng
|
||||
conf->search_length = 40 + 5 * scale_factor;
|
||||
// conf->knng = 40 + 10 * scale_factor; // the size of knng
|
||||
conf->knng = 50;
|
||||
conf->search_length = 50 + 5 * scale_factor;
|
||||
conf->out_degree = 50 + 5 * scale_factor;
|
||||
conf->candidate_pool_size = 200 + 100 * scale_factor;
|
||||
conf->candidate_pool_size = 300;
|
||||
MatchBase(conf);
|
||||
return conf;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue