mirror of https://github.com/milvus-io/milvus.git
commit
41f3604e44
|
@ -36,6 +36,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||||
- \#543 - client raise exception in shards when search results is empty
|
- \#543 - client raise exception in shards when search results is empty
|
||||||
- \#545 - Avoid dead circle of build index thread when error occurs
|
- \#545 - Avoid dead circle of build index thread when error occurs
|
||||||
- \#547 - NSG build failed using GPU-edition if set gpu_enable false
|
- \#547 - NSG build failed using GPU-edition if set gpu_enable false
|
||||||
|
- \#548 - NSG search accuracy is too low
|
||||||
- \#552 - Server down during building index_type: IVF_PQ using GPU-edition
|
- \#552 - Server down during building index_type: IVF_PQ using GPU-edition
|
||||||
- \#561 - Milvus server should report exception/error message or terminate on mysql metadata backend error
|
- \#561 - Milvus server should report exception/error message or terminate on mysql metadata backend error
|
||||||
- \#579 - Build index hang in GPU version when gpu_resources disabled
|
- \#579 - Build index hang in GPU version when gpu_resources disabled
|
||||||
|
|
|
@ -126,4 +126,38 @@ GPUIDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances,
|
||||||
index_->search(n, (float*)data, k, distances, labels);
|
index_->search(n, (float*)data, k, distances, labels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GPUIDMAP::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) {
|
||||||
|
int64_t K = k + 1;
|
||||||
|
auto ntotal = Count();
|
||||||
|
|
||||||
|
size_t dim = config->d;
|
||||||
|
auto batch_size = 1000;
|
||||||
|
auto tail_batch_size = ntotal % batch_size;
|
||||||
|
auto batch_search_count = ntotal / batch_size;
|
||||||
|
auto total_search_count = tail_batch_size == 0 ? batch_search_count : batch_search_count + 1;
|
||||||
|
|
||||||
|
std::vector<float> res_dis(K * batch_size);
|
||||||
|
graph.resize(ntotal);
|
||||||
|
Graph res_vec(total_search_count);
|
||||||
|
for (int i = 0; i < total_search_count; ++i) {
|
||||||
|
auto b_size = (i == (total_search_count - 1)) && tail_batch_size != 0 ? tail_batch_size : batch_size;
|
||||||
|
|
||||||
|
auto& res = res_vec[i];
|
||||||
|
res.resize(K * b_size);
|
||||||
|
|
||||||
|
auto xq = data + batch_size * dim * i;
|
||||||
|
search_impl(b_size, (float*)xq, K, res_dis.data(), res.data(), config);
|
||||||
|
|
||||||
|
for (int j = 0; j < b_size; ++j) {
|
||||||
|
auto& node = graph[batch_size * i + j];
|
||||||
|
node.resize(k);
|
||||||
|
auto start_pos = j * K + 1;
|
||||||
|
for (int m = 0, cursor = start_pos; m < k && cursor < start_pos + k; ++m, ++cursor) {
|
||||||
|
node[m] = res[cursor];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace knowhere
|
} // namespace knowhere
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace knowhere {
|
namespace knowhere {
|
||||||
|
|
||||||
|
@ -47,6 +48,9 @@ class GPUIDMAP : public IDMAP, public GPUIndex {
|
||||||
VectorIndexPtr
|
VectorIndexPtr
|
||||||
CopyGpuToGpu(const int64_t& device_id, const Config& config) override;
|
CopyGpuToGpu(const int64_t& device_id, const Config& config) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void
|
void
|
||||||
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override;
|
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override;
|
||||||
|
|
|
@ -121,6 +121,26 @@ IDMAP::Add(const DatasetPtr& dataset, const Config& config) {
|
||||||
index_->add_with_ids(rows, (float*)p_data, p_ids);
|
index_->add_with_ids(rows, (float*)p_data, p_ids);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
IDMAP::AddWithoutId(const DatasetPtr& dataset, const Config& config) {
|
||||||
|
if (!index_) {
|
||||||
|
KNOWHERE_THROW_MSG("index not initialize");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lk(mutex_);
|
||||||
|
GETTENSOR(dataset)
|
||||||
|
|
||||||
|
// TODO: magic here.
|
||||||
|
auto array = dataset->array()[0];
|
||||||
|
|
||||||
|
std::vector<int64_t> new_ids(rows);
|
||||||
|
for (int i = 0; i < rows; ++i) {
|
||||||
|
new_ids[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
index_->add_with_ids(rows, (float*)p_data, new_ids.data());
|
||||||
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
IDMAP::Count() {
|
IDMAP::Count() {
|
||||||
return index_->ntotal;
|
return index_->ntotal;
|
||||||
|
|
|
@ -34,20 +34,31 @@ class IDMAP : public VectorIndex, public FaissBaseIndex {
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
Serialize() override;
|
Serialize() override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Load(const BinarySet& index_binary) override;
|
Load(const BinarySet& index_binary) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Train(const Config& config);
|
Train(const Config& config);
|
||||||
|
|
||||||
DatasetPtr
|
DatasetPtr
|
||||||
Search(const DatasetPtr& dataset, const Config& config) override;
|
Search(const DatasetPtr& dataset, const Config& config) override;
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override;
|
Count() override;
|
||||||
|
|
||||||
// VectorIndexPtr
|
// VectorIndexPtr
|
||||||
// Clone() override;
|
// Clone() override;
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Dimension() override;
|
Dimension() override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Add(const DatasetPtr& dataset, const Config& config) override;
|
Add(const DatasetPtr& dataset, const Config& config) override;
|
||||||
|
|
||||||
|
void
|
||||||
|
AddWithoutId(const DatasetPtr& dataset, const Config& config);
|
||||||
|
|
||||||
VectorIndexPtr
|
VectorIndexPtr
|
||||||
CopyCpuToGpu(const int64_t& device_id, const Config& config);
|
CopyCpuToGpu(const int64_t& device_id, const Config& config);
|
||||||
void
|
void
|
||||||
|
@ -55,12 +66,15 @@ class IDMAP : public VectorIndex, public FaissBaseIndex {
|
||||||
|
|
||||||
virtual float*
|
virtual float*
|
||||||
GetRawVectors();
|
GetRawVectors();
|
||||||
|
|
||||||
virtual int64_t*
|
virtual int64_t*
|
||||||
GetRawIds();
|
GetRawIds();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void
|
virtual void
|
||||||
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg);
|
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg);
|
||||||
|
|
||||||
|
protected:
|
||||||
std::mutex mutex_;
|
std::mutex mutex_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -195,35 +195,34 @@ IVF::Dimension() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config) {
|
IVF::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) {
|
||||||
GETTENSOR(dataset)
|
int64_t K = k + 1;
|
||||||
|
|
||||||
auto ntotal = Count();
|
auto ntotal = Count();
|
||||||
|
|
||||||
auto batch_size = 100;
|
size_t dim = config->d;
|
||||||
|
auto batch_size = 1000;
|
||||||
auto tail_batch_size = ntotal % batch_size;
|
auto tail_batch_size = ntotal % batch_size;
|
||||||
auto batch_search_count = ntotal / batch_size;
|
auto batch_search_count = ntotal / batch_size;
|
||||||
auto total_search_count = tail_batch_size == 0 ? batch_search_count : batch_search_count + 1;
|
auto total_search_count = tail_batch_size == 0 ? batch_search_count : batch_search_count + 1;
|
||||||
|
|
||||||
std::vector<float> res_dis(k * batch_size);
|
std::vector<float> res_dis(K * batch_size);
|
||||||
graph.resize(ntotal);
|
graph.resize(ntotal);
|
||||||
Graph res_vec(total_search_count);
|
Graph res_vec(total_search_count);
|
||||||
for (int i = 0; i < total_search_count; ++i) {
|
for (int i = 0; i < total_search_count; ++i) {
|
||||||
auto b_size = i == total_search_count - 1 && tail_batch_size != 0 ? tail_batch_size : batch_size;
|
auto b_size = (i == (total_search_count - 1)) && tail_batch_size != 0 ? tail_batch_size : batch_size;
|
||||||
|
|
||||||
auto& res = res_vec[i];
|
auto& res = res_vec[i];
|
||||||
res.resize(k * b_size);
|
res.resize(K * b_size);
|
||||||
|
|
||||||
auto xq = p_data + batch_size * dim * i;
|
auto xq = data + batch_size * dim * i;
|
||||||
search_impl(b_size, (float*)xq, k, res_dis.data(), res.data(), config);
|
search_impl(b_size, (float*)xq, K, res_dis.data(), res.data(), config);
|
||||||
|
|
||||||
int tmp = 0;
|
|
||||||
for (int j = 0; j < b_size; ++j) {
|
for (int j = 0; j < b_size; ++j) {
|
||||||
auto& node = graph[batch_size * i + j];
|
auto& node = graph[batch_size * i + j];
|
||||||
node.resize(k);
|
node.resize(k);
|
||||||
for (int m = 0; m < k && tmp < k * b_size; ++m, ++tmp) {
|
auto start_pos = j * K + 1;
|
||||||
// TODO(linxj): avoid memcopy here.
|
for (int m = 0, cursor = start_pos; m < k && cursor < start_pos + k; ++m, ++cursor) {
|
||||||
node[m] = res[tmp];
|
node[m] = res[cursor];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ class IVF : public VectorIndex, public FaissBaseIndex {
|
||||||
Search(const DatasetPtr& dataset, const Config& config) override;
|
Search(const DatasetPtr& dataset, const Config& config) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config);
|
GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config);
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
Serialize() override;
|
Serialize() override;
|
||||||
|
|
|
@ -20,9 +20,12 @@
|
||||||
#include "knowhere/common/Exception.h"
|
#include "knowhere/common/Exception.h"
|
||||||
#include "knowhere/common/Timer.h"
|
#include "knowhere/common/Timer.h"
|
||||||
#ifdef MILVUS_GPU_VERSION
|
#ifdef MILVUS_GPU_VERSION
|
||||||
|
#include "knowhere/index/vector_index/IndexGPUIDMAP.h"
|
||||||
#include "knowhere/index/vector_index/IndexGPUIVF.h"
|
#include "knowhere/index/vector_index/IndexGPUIVF.h"
|
||||||
|
#include "knowhere/index/vector_index/helpers/Cloner.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "knowhere/index/vector_index/IndexIDMAP.h"
|
||||||
#include "knowhere/index/vector_index/IndexIVF.h"
|
#include "knowhere/index/vector_index/IndexIVF.h"
|
||||||
#include "knowhere/index/vector_index/nsg/NSG.h"
|
#include "knowhere/index/vector_index/nsg/NSG.h"
|
||||||
#include "knowhere/index/vector_index/nsg/NSGIO.h"
|
#include "knowhere/index/vector_index/nsg/NSGIO.h"
|
||||||
|
@ -110,33 +113,36 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) {
|
||||||
|
|
||||||
IndexModelPtr
|
IndexModelPtr
|
||||||
NSG::Train(const DatasetPtr& dataset, const Config& config) {
|
NSG::Train(const DatasetPtr& dataset, const Config& config) {
|
||||||
|
config->Dump();
|
||||||
auto build_cfg = std::dynamic_pointer_cast<NSGCfg>(config);
|
auto build_cfg = std::dynamic_pointer_cast<NSGCfg>(config);
|
||||||
if (build_cfg != nullptr) {
|
if (build_cfg != nullptr) {
|
||||||
build_cfg->CheckValid(); // throw exception
|
build_cfg->CheckValid(); // throw exception
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(linxj): dev IndexFactory, support more IndexType
|
auto idmap = std::make_shared<IDMAP>();
|
||||||
|
idmap->Train(config);
|
||||||
|
idmap->AddWithoutId(dataset, config);
|
||||||
Graph knng;
|
Graph knng;
|
||||||
|
float* raw_data = idmap->GetRawVectors();
|
||||||
#ifdef MILVUS_GPU_VERSION
|
#ifdef MILVUS_GPU_VERSION
|
||||||
if (build_cfg->gpu_id == knowhere::INVALID_VALUE) {
|
if (build_cfg->gpu_id == knowhere::INVALID_VALUE) {
|
||||||
auto preprocess_index = std::make_shared<IVF>();
|
auto preprocess_index = std::make_shared<IVF>();
|
||||||
auto model = preprocess_index->Train(dataset, config);
|
auto model = preprocess_index->Train(dataset, config);
|
||||||
preprocess_index->set_index_model(model);
|
preprocess_index->set_index_model(model);
|
||||||
preprocess_index->AddWithoutIds(dataset, config);
|
preprocess_index->Add(dataset, config);
|
||||||
preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config);
|
preprocess_index->GenGraph(raw_data, build_cfg->knng, knng, config);
|
||||||
} else {
|
} else {
|
||||||
auto preprocess_index = std::make_shared<GPUIVF>(build_cfg->gpu_id);
|
// TODO(linxj): use ivf instead?
|
||||||
auto model = preprocess_index->Train(dataset, config);
|
auto gpu_idx = cloner::CopyCpuToGpu(idmap, build_cfg->gpu_id, config);
|
||||||
preprocess_index->set_index_model(model);
|
auto gpu_idmap = std::dynamic_pointer_cast<GPUIDMAP>(gpu_idx);
|
||||||
preprocess_index->AddWithoutIds(dataset, config);
|
gpu_idmap->GenGraph(raw_data, build_cfg->knng, knng, config);
|
||||||
preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config);
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
auto preprocess_index = std::make_shared<IVF>();
|
auto preprocess_index = std::make_shared<IVF>();
|
||||||
auto model = preprocess_index->Train(dataset, config);
|
auto model = preprocess_index->Train(dataset, config);
|
||||||
preprocess_index->set_index_model(model);
|
preprocess_index->set_index_model(model);
|
||||||
preprocess_index->AddWithoutIds(dataset, config);
|
preprocess_index->AddWithoutIds(dataset, config);
|
||||||
preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config);
|
preprocess_index->GenGraph(raw_data, build_cfg->knng, knng, config);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
algo::BuildParams b_params;
|
algo::BuildParams b_params;
|
||||||
|
@ -144,10 +150,10 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) {
|
||||||
b_params.out_degree = build_cfg->out_degree;
|
b_params.out_degree = build_cfg->out_degree;
|
||||||
b_params.search_length = build_cfg->search_length;
|
b_params.search_length = build_cfg->search_length;
|
||||||
|
|
||||||
GETTENSOR(dataset)
|
|
||||||
auto array = dataset->array()[0];
|
auto array = dataset->array()[0];
|
||||||
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
|
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
|
||||||
|
|
||||||
|
GETTENSOR(dataset)
|
||||||
index_ = std::make_shared<algo::NsgIndex>(dim, rows);
|
index_ = std::make_shared<algo::NsgIndex>(dim, rows);
|
||||||
index_->SetKnnGraph(knng);
|
index_->SetKnnGraph(knng);
|
||||||
index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params);
|
index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params);
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -29,12 +28,13 @@
|
||||||
#include "knowhere/index/vector_index/nsg/NSG.h"
|
#include "knowhere/index/vector_index/nsg/NSG.h"
|
||||||
#include "knowhere/index/vector_index/nsg/NSGHelper.h"
|
#include "knowhere/index/vector_index/nsg/NSGHelper.h"
|
||||||
|
|
||||||
// TODO: enable macro
|
|
||||||
//#include <gperftools/profiler.h>
|
//#include <gperftools/profiler.h>
|
||||||
|
|
||||||
namespace knowhere {
|
namespace knowhere {
|
||||||
namespace algo {
|
namespace algo {
|
||||||
|
|
||||||
|
unsigned int seed = 100;
|
||||||
|
|
||||||
NsgIndex::NsgIndex(const size_t& dimension, const size_t& n, METRICTYPE metric)
|
NsgIndex::NsgIndex(const size_t& dimension, const size_t& n, METRICTYPE metric)
|
||||||
: dimension(dimension), ntotal(n), metric_type(metric) {
|
: dimension(dimension), ntotal(n), metric_type(metric) {
|
||||||
switch (metric) {
|
switch (metric) {
|
||||||
|
@ -55,8 +55,6 @@ NsgIndex::~NsgIndex() {
|
||||||
|
|
||||||
void
|
void
|
||||||
NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
|
NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
|
||||||
TimeRecorder rc("NSG");
|
|
||||||
|
|
||||||
ntotal = nb;
|
ntotal = nb;
|
||||||
ori_data_ = new float[ntotal * dimension];
|
ori_data_ = new float[ntotal * dimension];
|
||||||
ids_ = new int64_t[ntotal];
|
ids_ = new int64_t[ntotal];
|
||||||
|
@ -67,25 +65,17 @@ NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const
|
||||||
out_degree = parameters.out_degree;
|
out_degree = parameters.out_degree;
|
||||||
candidate_pool_size = parameters.candidate_pool_size;
|
candidate_pool_size = parameters.candidate_pool_size;
|
||||||
|
|
||||||
|
TimeRecorder rc("NSG", 1);
|
||||||
|
|
||||||
InitNavigationPoint();
|
InitNavigationPoint();
|
||||||
rc.RecordSection("init");
|
rc.RecordSection("init");
|
||||||
|
|
||||||
Link();
|
Link();
|
||||||
rc.RecordSection("Link");
|
rc.RecordSection("Link");
|
||||||
|
|
||||||
//>> Debug code
|
|
||||||
/////
|
|
||||||
// int count = 0;
|
|
||||||
// for (int i = 0; i < ntotal; ++i) {
|
|
||||||
// count += nsg[i].size();
|
|
||||||
//}
|
|
||||||
/////
|
|
||||||
|
|
||||||
CheckConnectivity();
|
CheckConnectivity();
|
||||||
rc.RecordSection("Connect");
|
rc.RecordSection("Connect");
|
||||||
|
|
||||||
//>> Debug code
|
|
||||||
///
|
|
||||||
int total_degree = 0;
|
int total_degree = 0;
|
||||||
for (size_t i = 0; i < ntotal; ++i) {
|
for (size_t i = 0; i < ntotal; ++i) {
|
||||||
total_degree += nsg[i].size();
|
total_degree += nsg[i].size();
|
||||||
|
@ -93,9 +83,17 @@ NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const
|
||||||
|
|
||||||
KNOWHERE_LOG_DEBUG << "Graph physical size: " << total_degree * sizeof(node_t) / 1024 / 1024 << "m";
|
KNOWHERE_LOG_DEBUG << "Graph physical size: " << total_degree * sizeof(node_t) / 1024 / 1024 << "m";
|
||||||
KNOWHERE_LOG_DEBUG << "Average degree: " << total_degree / ntotal;
|
KNOWHERE_LOG_DEBUG << "Average degree: " << total_degree / ntotal;
|
||||||
/////
|
|
||||||
|
|
||||||
is_trained = true;
|
is_trained = true;
|
||||||
|
|
||||||
|
// Debug code
|
||||||
|
// for (size_t i = 0; i < ntotal; i++) {
|
||||||
|
// auto& x = nsg[i];
|
||||||
|
// for (size_t j = 0; j < x.size(); j++) {
|
||||||
|
// std::cout << "id: " << x[j] << std::endl;
|
||||||
|
// }
|
||||||
|
// std::cout << std::endl;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -114,28 +112,22 @@ NsgIndex::InitNavigationPoint() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// select navigation point
|
// select navigation point
|
||||||
std::vector<Neighbor> resset, fullset;
|
std::vector<Neighbor> resset;
|
||||||
unsigned int seed = 100;
|
|
||||||
navigation_point = rand_r(&seed) % ntotal; // random initialize navigating point
|
navigation_point = rand_r(&seed) % ntotal; // random initialize navigating point
|
||||||
|
|
||||||
//>> Debug code
|
|
||||||
/////
|
|
||||||
// navigation_point = drand48();
|
|
||||||
/////
|
|
||||||
|
|
||||||
GetNeighbors(center, resset, knng);
|
GetNeighbors(center, resset, knng);
|
||||||
navigation_point = resset[0].id;
|
navigation_point = resset[0].id;
|
||||||
|
|
||||||
//>> Debug code
|
// Debug code
|
||||||
/////
|
|
||||||
// std::cout << "ep: " << navigation_point << std::endl;
|
// std::cout << "ep: " << navigation_point << std::endl;
|
||||||
/////
|
// for (int k = 0; k < resset.size(); ++k) {
|
||||||
|
// std::cout << "id: " << resset[k].id << ", dis: " << resset[k].distance << std::endl;
|
||||||
//>> Debug code
|
// }
|
||||||
/////
|
// std::cout << std::endl;
|
||||||
|
//
|
||||||
|
// std::cout << "ep: " << navigation_point << std::endl;
|
||||||
|
//
|
||||||
// float r1 = distance_->Compare(center, ori_data_ + navigation_point * dimension, dimension);
|
// float r1 = distance_->Compare(center, ori_data_ + navigation_point * dimension, dimension);
|
||||||
// assert(r1 == resset[0].distance);
|
// assert(r1 == resset[0].distance);
|
||||||
/////
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Specify Link
|
// Specify Link
|
||||||
|
@ -149,7 +141,9 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
||||||
// TODO: throw exception here.
|
// TODO: throw exception here.
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<node_t> init_ids;
|
resset.resize(search_length);
|
||||||
|
std::vector<node_t> init_ids(buffer_size);
|
||||||
|
// std::vector<node_t> init_ids;
|
||||||
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -158,25 +152,26 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
|
|
||||||
// Get all neighbors
|
// Get all neighbors
|
||||||
for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
|
||||||
init_ids.push_back(graph[navigation_point][i]);
|
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||||
|
// init_ids.push_back(graph[navigation_point][i]);
|
||||||
|
init_ids[i] = graph[navigation_point][i];
|
||||||
has_calculated_dist[init_ids[i]] = true;
|
has_calculated_dist[init_ids[i]] = true;
|
||||||
++count;
|
++count;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int seed = 100;
|
|
||||||
while (count < buffer_size) {
|
while (count < buffer_size) {
|
||||||
node_t id = rand_r(&seed) % ntotal;
|
node_t id = rand_r(&seed) % ntotal;
|
||||||
if (has_calculated_dist[id])
|
if (has_calculated_dist[id])
|
||||||
continue; // duplicate id
|
continue; // duplicate id
|
||||||
init_ids.push_back(id);
|
// init_ids.push_back(id);
|
||||||
|
init_ids[count] = id;
|
||||||
++count;
|
++count;
|
||||||
has_calculated_dist[id] = true;
|
has_calculated_dist[id] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
resset.resize(init_ids.size());
|
// resset.resize(init_ids.size());
|
||||||
|
|
||||||
// init resset and sort by distance
|
// init resset and sort by distance
|
||||||
for (size_t i = 0; i < init_ids.size(); ++i) {
|
for (size_t i = 0; i < init_ids.size(); ++i) {
|
||||||
|
@ -190,7 +185,7 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
||||||
float dist = distance_->Compare(ori_data_ + dimension * id, query, dimension);
|
float dist = distance_->Compare(ori_data_ + dimension * id, query, dimension);
|
||||||
resset[i] = Neighbor(id, dist, false);
|
resset[i] = Neighbor(id, dist, false);
|
||||||
|
|
||||||
///////////// difference from other GetNeighbors ///////////////
|
//// difference from other GetNeighbors
|
||||||
fullset.push_back(resset[i]);
|
fullset.push_back(resset[i]);
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
}
|
}
|
||||||
|
@ -247,8 +242,10 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
||||||
// TODO: throw exception here.
|
// TODO: throw exception here.
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<node_t> init_ids;
|
// std::vector<node_t> init_ids;
|
||||||
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; // TODO: ?
|
std::vector<node_t> init_ids(buffer_size);
|
||||||
|
resset.resize(buffer_size);
|
||||||
|
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0};
|
||||||
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -257,24 +254,26 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
|
|
||||||
// Get all neighbors
|
// Get all neighbors
|
||||||
for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
|
||||||
init_ids.push_back(graph[navigation_point][i]);
|
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||||
|
// init_ids.push_back(graph[navigation_point][i]);
|
||||||
|
init_ids[i] = graph[navigation_point][i];
|
||||||
has_calculated_dist[init_ids[i]] = true;
|
has_calculated_dist[init_ids[i]] = true;
|
||||||
++count;
|
++count;
|
||||||
}
|
}
|
||||||
unsigned int seed = 100;
|
|
||||||
while (count < buffer_size) {
|
while (count < buffer_size) {
|
||||||
node_t id = rand_r(&seed) % ntotal;
|
node_t id = rand_r(&seed) % ntotal;
|
||||||
if (has_calculated_dist[id])
|
if (has_calculated_dist[id])
|
||||||
continue; // duplicate id
|
continue; // duplicate id
|
||||||
init_ids.push_back(id);
|
// init_ids.push_back(id);
|
||||||
|
init_ids[count] = id;
|
||||||
++count;
|
++count;
|
||||||
has_calculated_dist[id] = true;
|
has_calculated_dist[id] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
resset.resize(init_ids.size());
|
// resset.resize(init_ids.size());
|
||||||
|
|
||||||
// init resset and sort by distance
|
// init resset and sort by distance
|
||||||
for (size_t i = 0; i < init_ids.size(); ++i) {
|
for (size_t i = 0; i < init_ids.size(); ++i) {
|
||||||
|
@ -333,13 +332,15 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::v
|
||||||
|
|
||||||
void
|
void
|
||||||
NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph& graph, SearchParams* params) {
|
NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph& graph, SearchParams* params) {
|
||||||
size_t& buffer_size = params ? params->search_length : search_length;
|
size_t buffer_size = params ? params->search_length : search_length;
|
||||||
|
|
||||||
if (buffer_size > ntotal) {
|
if (buffer_size > ntotal) {
|
||||||
// TODO: throw exception here.
|
// TODO: throw exception here.
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<node_t> init_ids;
|
// std::vector<node_t> init_ids;
|
||||||
|
std::vector<node_t> init_ids(buffer_size);
|
||||||
|
resset.resize(buffer_size);
|
||||||
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0};
|
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0};
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -349,33 +350,33 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
|
|
||||||
// Get all neighbors
|
// Get all neighbors
|
||||||
for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
|
||||||
init_ids.push_back(graph[navigation_point][i]);
|
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||||
|
// init_ids.push_back(graph[navigation_point][i]);
|
||||||
|
init_ids[i] = graph[navigation_point][i];
|
||||||
has_calculated_dist[init_ids[i]] = true;
|
has_calculated_dist[init_ids[i]] = true;
|
||||||
++count;
|
++count;
|
||||||
}
|
}
|
||||||
unsigned int seed = 100;
|
|
||||||
while (count < buffer_size) {
|
while (count < buffer_size) {
|
||||||
node_t id = rand_r(&seed) % ntotal;
|
node_t id = rand_r(&seed) % ntotal;
|
||||||
if (has_calculated_dist[id])
|
if (has_calculated_dist[id])
|
||||||
continue; // duplicate id
|
continue; // duplicate id
|
||||||
init_ids.push_back(id);
|
// init_ids.push_back(id);
|
||||||
|
init_ids[count] = id;
|
||||||
++count;
|
++count;
|
||||||
has_calculated_dist[id] = true;
|
has_calculated_dist[id] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
resset.resize(init_ids.size());
|
// resset.resize(init_ids.size());
|
||||||
|
|
||||||
// init resset and sort by distance
|
// init resset and sort by distance
|
||||||
for (size_t i = 0; i < init_ids.size(); ++i) {
|
for (size_t i = 0; i < init_ids.size(); ++i) {
|
||||||
node_t id = init_ids[i];
|
node_t id = init_ids[i];
|
||||||
|
|
||||||
// assert(id < ntotal);
|
|
||||||
if (id >= static_cast<node_t>(ntotal)) {
|
if (id >= static_cast<node_t>(ntotal)) {
|
||||||
KNOWHERE_THROW_MSG("Build Index Error, id > ntotal");
|
KNOWHERE_THROW_MSG("Build Index Error, id > ntotal");
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float dist = distance_->Compare(ori_data_ + id * dimension, query, dimension);
|
float dist = distance_->Compare(ori_data_ + id * dimension, query, dimension);
|
||||||
|
@ -383,13 +384,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
||||||
}
|
}
|
||||||
std::sort(resset.begin(), resset.end()); // sort by distance
|
std::sort(resset.begin(), resset.end()); // sort by distance
|
||||||
|
|
||||||
//>> Debug code
|
|
||||||
/////
|
|
||||||
// for (int j = 0; j < buffer_size; ++j) {
|
|
||||||
// std::cout << "resset_id: " << resset[j].id << ", resset_dist: " << resset[j].distance << std::endl;
|
|
||||||
//}
|
|
||||||
/////
|
|
||||||
|
|
||||||
// search nearest neighbor
|
// search nearest neighbor
|
||||||
size_t cursor = 0;
|
size_t cursor = 0;
|
||||||
while (cursor < buffer_size) {
|
while (cursor < buffer_size) {
|
||||||
|
@ -410,7 +404,8 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
||||||
|
|
||||||
if (dist >= resset[buffer_size - 1].distance)
|
if (dist >= resset[buffer_size - 1].distance)
|
||||||
continue;
|
continue;
|
||||||
///////////// difference from other GetNeighbors ///////////////
|
|
||||||
|
//// difference from other GetNeighbors
|
||||||
Neighbor nn(id, dist, false);
|
Neighbor nn(id, dist, false);
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
|
|
||||||
|
@ -440,59 +435,50 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
||||||
|
|
||||||
void
|
void
|
||||||
NsgIndex::Link() {
|
NsgIndex::Link() {
|
||||||
auto cut_graph_dist = new float[ntotal * out_degree];
|
float* cut_graph_dist = new float[ntotal * out_degree];
|
||||||
nsg.resize(ntotal);
|
nsg.resize(ntotal);
|
||||||
|
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
std::vector<Neighbor> fullset;
|
std::vector<Neighbor> fullset;
|
||||||
std::vector<Neighbor> temp;
|
std::vector<Neighbor> temp;
|
||||||
boost::dynamic_bitset<> flags{ntotal, 0}; // TODO: ?
|
boost::dynamic_bitset<> flags{ntotal, 0};
|
||||||
#pragma omp for schedule(dynamic, 100)
|
#pragma omp for schedule(dynamic, 100)
|
||||||
for (size_t n = 0; n < ntotal; ++n) {
|
for (size_t n = 0; n < ntotal; ++n) {
|
||||||
fullset.clear();
|
fullset.clear();
|
||||||
|
temp.clear();
|
||||||
flags.reset();
|
flags.reset();
|
||||||
GetNeighbors(ori_data_ + dimension * n, temp, fullset, flags);
|
GetNeighbors(ori_data_ + dimension * n, temp, fullset, flags);
|
||||||
|
|
||||||
//>> Debug code
|
|
||||||
/////
|
|
||||||
// float r1 = distance_->Compare(ori_data_ + n * dimension, ori_data_ + temp[0].id * dimension, dimension);
|
|
||||||
// assert(r1 == temp[0].distance);
|
|
||||||
/////
|
|
||||||
SyncPrune(n, fullset, flags, cut_graph_dist);
|
SyncPrune(n, fullset, flags, cut_graph_dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Debug code
|
||||||
|
// std::cout << "ep: " << 0 << std::endl;
|
||||||
|
// for (int k = 0; k < fullset.size(); ++k) {
|
||||||
|
// std::cout << "id: " << fullset[k].id << ", dis: " << fullset[k].distance << std::endl;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
//>> Debug code
|
// Debug code
|
||||||
/////
|
// for (size_t i = 0; i < ntotal; i++)
|
||||||
// auto bak_nsg = nsg;
|
// {
|
||||||
/////
|
// auto& x = nsg[i];
|
||||||
|
// for (size_t j=0; j < x.size(); j++)
|
||||||
|
// {
|
||||||
|
// std::cout << "id: " << x[j] << std::endl;
|
||||||
|
// }
|
||||||
|
// std::cout << std::endl;
|
||||||
|
// }
|
||||||
|
|
||||||
knng.clear();
|
knng.clear();
|
||||||
knng.shrink_to_fit();
|
|
||||||
|
|
||||||
std::vector<std::mutex> mutex_vec(ntotal);
|
std::vector<std::mutex> mutex_vec(ntotal);
|
||||||
|
|
||||||
#pragma omp for schedule(dynamic, 100)
|
#pragma omp for schedule(dynamic, 100)
|
||||||
for (unsigned n = 0; n < ntotal; ++n) {
|
for (unsigned n = 0; n < ntotal; ++n) {
|
||||||
InterInsert(n, mutex_vec, cut_graph_dist);
|
InterInsert(n, mutex_vec, cut_graph_dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
delete[] cut_graph_dist;
|
delete[] cut_graph_dist;
|
||||||
|
|
||||||
//>> Debug code
|
|
||||||
/////
|
|
||||||
// int count = 0;
|
|
||||||
// for (int i = 0; i < ntotal; ++i) {
|
|
||||||
// if (bak_nsg[i].size() != nsg[i].size()) {
|
|
||||||
// //count += nsg[i].size() - bak_nsg[i].size();
|
|
||||||
// count += nsg[i].size();
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
/////
|
|
||||||
|
|
||||||
for (size_t i = 0; i < ntotal; ++i) {
|
|
||||||
nsg[i].shrink_to_fit();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -655,8 +641,8 @@ NsgIndex::DFS(size_t root, boost::dynamic_bitset<>& has_linked, int64_t& linked_
|
||||||
s.push(root);
|
s.push(root);
|
||||||
if (!has_linked[root]) {
|
if (!has_linked[root]) {
|
||||||
linked_count++; // not link
|
linked_count++; // not link
|
||||||
has_linked[root] = true; // link start...
|
|
||||||
}
|
}
|
||||||
|
has_linked[root] = true; // link start...
|
||||||
|
|
||||||
while (!s.empty()) {
|
while (!s.empty()) {
|
||||||
size_t next = ntotal + 1;
|
size_t next = ntotal + 1;
|
||||||
|
@ -709,7 +695,6 @@ NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (found == 0) {
|
if (found == 0) {
|
||||||
unsigned int seed = 100;
|
|
||||||
while (true) { // random a linked-node and add unlinked-node as its neighbor
|
while (true) { // random a linked-node and add unlinked-node as its neighbor
|
||||||
size_t rid = rand_r(&seed) % ntotal;
|
size_t rid = rand_r(&seed) % ntotal;
|
||||||
if (has_linked[rid]) {
|
if (has_linked[rid]) {
|
||||||
|
@ -726,7 +711,10 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
||||||
int64_t* ids, SearchParams& params) {
|
int64_t* ids, SearchParams& params) {
|
||||||
std::vector<std::vector<Neighbor>> resset(nq);
|
std::vector<std::vector<Neighbor>> resset(nq);
|
||||||
|
|
||||||
|
if (k >= 45) {
|
||||||
params.search_length = k;
|
params.search_length = k;
|
||||||
|
}
|
||||||
|
|
||||||
TimeRecorder rc("NsgIndex::search", 1);
|
TimeRecorder rc("NsgIndex::search", 1);
|
||||||
// TODO(linxj): when to use openmp
|
// TODO(linxj): when to use openmp
|
||||||
if (nq <= 4) {
|
if (nq <= 4) {
|
||||||
|
@ -734,7 +722,7 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
||||||
} else {
|
} else {
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (unsigned int i = 0; i < nq; ++i) {
|
for (unsigned int i = 0; i < nq; ++i) {
|
||||||
auto single_query = query + i * dim;
|
const float* single_query = query + i * dim;
|
||||||
GetNeighbors(single_query, resset[i], nsg, ¶ms);
|
GetNeighbors(single_query, resset[i], nsg, ¶ms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -759,13 +747,6 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
||||||
}
|
}
|
||||||
rc.RecordSection("merge");
|
rc.RecordSection("merge");
|
||||||
|
|
||||||
//>> Debug: test single insert
|
|
||||||
// int x_0 = resset[0].size();
|
|
||||||
// for (int l = 0; l < resset[0].size(); ++l) {
|
|
||||||
// resset[0].pop_back();
|
|
||||||
//}
|
|
||||||
// resset.clear();
|
|
||||||
|
|
||||||
// ProfilerStart("xx.prof");
|
// ProfilerStart("xx.prof");
|
||||||
// std::vector<Neighbor> resset;
|
// std::vector<Neighbor> resset;
|
||||||
// GetNeighbors(query, resset, nsg, ¶ms);
|
// GetNeighbors(query, resset, nsg, ¶ms);
|
||||||
|
@ -781,30 +762,5 @@ NsgIndex::SetKnnGraph(Graph& g) {
|
||||||
knng = std::move(g);
|
knng = std::move(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
// void NsgIndex::GetKnnGraphFromFile() {
|
|
||||||
// //std::string filename = "sift.1M.50NN.graph";
|
|
||||||
// std::string filename = "sift.50NN.graph";
|
|
||||||
//
|
|
||||||
// std::ifstream in(filename, std::ios::binary);
|
|
||||||
// unsigned k;
|
|
||||||
// in.read((char *) &k, sizeof(unsigned));
|
|
||||||
// in.seekg(0, std::ios::end);
|
|
||||||
// std::ios::pos_type ss = in.tellg();
|
|
||||||
// size_t fsize = (size_t) ss;
|
|
||||||
// size_t num = (unsigned) (fsize / (k + 1) / 4);
|
|
||||||
// in.seekg(0, std::ios::beg);
|
|
||||||
//
|
|
||||||
// knng.resize(num);
|
|
||||||
// knng.reserve(num);
|
|
||||||
// unsigned kk = (k + 3) / 4 * 4;
|
|
||||||
// for (size_t i = 0; i < num; i++) {
|
|
||||||
// in.seekg(4, std::ios::cur);
|
|
||||||
// knng[i].resize(k);
|
|
||||||
// knng[i].reserve(kk);
|
|
||||||
// in.read((char *) knng[i].data(), k * sizeof(unsigned));
|
|
||||||
// }
|
|
||||||
// in.close();
|
|
||||||
//}
|
|
||||||
|
|
||||||
} // namespace algo
|
} // namespace algo
|
||||||
} // namespace knowhere
|
} // namespace knowhere
|
||||||
|
|
|
@ -52,7 +52,7 @@ class NsgIndex {
|
||||||
Distance* distance_;
|
Distance* distance_;
|
||||||
|
|
||||||
float* ori_data_;
|
float* ori_data_;
|
||||||
int64_t* ids_; // TODO: support different type
|
int64_t* ids_;
|
||||||
Graph nsg; // final graph
|
Graph nsg; // final graph
|
||||||
Graph knng; // reset after build
|
Graph knng; // reset after build
|
||||||
|
|
||||||
|
@ -134,9 +134,6 @@ class NsgIndex {
|
||||||
|
|
||||||
void
|
void
|
||||||
FindUnconnectedNode(boost::dynamic_bitset<>& flags, int64_t& root);
|
FindUnconnectedNode(boost::dynamic_bitset<>& flags, int64_t& root);
|
||||||
|
|
||||||
// private:
|
|
||||||
// void GetKnnGraphFromFile();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace algo
|
} // namespace algo
|
||||||
|
|
|
@ -22,6 +22,8 @@
|
||||||
#include "knowhere/index/vector_index/FaissBaseIndex.h"
|
#include "knowhere/index/vector_index/FaissBaseIndex.h"
|
||||||
#include "knowhere/index/vector_index/IndexNSG.h"
|
#include "knowhere/index/vector_index/IndexNSG.h"
|
||||||
#ifdef MILVUS_GPU_VERSION
|
#ifdef MILVUS_GPU_VERSION
|
||||||
|
#include "knowhere/index/vector_index/IndexGPUIDMAP.h"
|
||||||
|
#include "knowhere/index/vector_index/helpers/Cloner.h"
|
||||||
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
|
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -50,6 +52,7 @@ class NSGInterfaceTest : public DataGen, public ::testing::Test {
|
||||||
|
|
||||||
auto tmp_conf = std::make_shared<knowhere::NSGCfg>();
|
auto tmp_conf = std::make_shared<knowhere::NSGCfg>();
|
||||||
tmp_conf->gpu_id = DEVICEID;
|
tmp_conf->gpu_id = DEVICEID;
|
||||||
|
tmp_conf->d = 256;
|
||||||
tmp_conf->knng = 20;
|
tmp_conf->knng = 20;
|
||||||
tmp_conf->nprobe = 8;
|
tmp_conf->nprobe = 8;
|
||||||
tmp_conf->nlist = 163;
|
tmp_conf->nlist = 163;
|
||||||
|
@ -116,3 +119,174 @@ TEST_F(NSGInterfaceTest, comparetest) {
|
||||||
}
|
}
|
||||||
tc.RecordSection("IP");
|
tc.RecordSection("IP");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//#include <src/index/knowhere/knowhere/index/vector_index/nsg/OriNSG.h>
|
||||||
|
// TEST(test, ori_nsg) {
|
||||||
|
// // float* p_data = nullptr;
|
||||||
|
// size_t rows, dim;
|
||||||
|
// char* filename = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_base.fvecs";
|
||||||
|
// // loads_data(filename, p_data, rows, dim);
|
||||||
|
// float* p_data = fvecs_read(filename, &dim, &rows);
|
||||||
|
//
|
||||||
|
// std::string knng_filename =
|
||||||
|
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.1M.50NN.graph";
|
||||||
|
// std::vector<std::vector<int64_t>> knng;
|
||||||
|
// Load_nns_graph(knng, knng_filename.c_str());
|
||||||
|
//
|
||||||
|
// // float* search_data = nullptr;
|
||||||
|
// size_t nq, search_dim;
|
||||||
|
// char* searchfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_query.fvecs";
|
||||||
|
// // loads_data(searchfile, search_data, nq, search_dim);
|
||||||
|
// float* search_data = fvecs_read(searchfile, &search_dim, &nq);
|
||||||
|
// assert(search_dim == dim);
|
||||||
|
//
|
||||||
|
// size_t k, nq2;
|
||||||
|
// char* gtfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_groundtruth.ivecs";
|
||||||
|
// int* gt_int = ivecs_read(gtfile, &k, &nq2);
|
||||||
|
// int64_t* gt = new int64_t[k * nq2];
|
||||||
|
// for (int i = 0; i < k * nq2; i++) {
|
||||||
|
// gt[i] = gt_int[i];
|
||||||
|
// }
|
||||||
|
// delete[] gt_int;
|
||||||
|
//
|
||||||
|
// std::vector<int64_t> store_ids(rows);
|
||||||
|
// for (int i = 0; i < rows; ++i) {
|
||||||
|
// store_ids[i] = i;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// int64_t* I = new int64_t[nq * k];
|
||||||
|
// float* D = new float[nq * k];
|
||||||
|
//#if 0
|
||||||
|
// efanna2e::Parameters params;
|
||||||
|
// params.Set<int64_t>("L", 50);
|
||||||
|
// params.Set<int64_t>("R", 55);
|
||||||
|
// params.Set<int64_t>("C", 300);
|
||||||
|
// auto orinsg = std::make_shared<efanna2e::IndexNSG>(dim, rows, efanna2e::Metric::L2, nullptr);
|
||||||
|
// orinsg->Load_nn_graph(knng);
|
||||||
|
// orinsg->Build(rows, (float*)p_data, params);
|
||||||
|
//
|
||||||
|
// efanna2e::Parameters paras;
|
||||||
|
// paras.Set<unsigned>("L_search", 45);
|
||||||
|
// paras.Set<unsigned>("P_search",100);
|
||||||
|
// k = 10;
|
||||||
|
// std::vector<std::vector<int64_t> > res;
|
||||||
|
// for (unsigned i = 0; i < nq; i++) {
|
||||||
|
// std::vector<int64_t> tmp(k);
|
||||||
|
// orinsg->Search(search_data + i * dim, p_data, k, paras, tmp.data());
|
||||||
|
// res.push_back(tmp);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//#else
|
||||||
|
// knowhere::algo::BuildParams params;
|
||||||
|
// params.search_length = 50;
|
||||||
|
// params.out_degree = 55;
|
||||||
|
// params.candidate_pool_size = 300;
|
||||||
|
// auto nsg = std::make_shared<knowhere::algo::NsgIndex>(dim, rows);
|
||||||
|
//#if 1
|
||||||
|
// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
|
||||||
|
// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data());
|
||||||
|
// auto config = std::make_shared<knowhere::IVFCfg>();
|
||||||
|
// config->d = dim;
|
||||||
|
// config->gpu_id = 0;
|
||||||
|
// config->metric_type = knowhere::METRICTYPE::L2;
|
||||||
|
// auto preprocess_index = std::make_shared<knowhere::IDMAP>();
|
||||||
|
// preprocess_index->Train(config);
|
||||||
|
// preprocess_index->AddWithoutId(dataset, config);
|
||||||
|
// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config);
|
||||||
|
// auto ss = std::dynamic_pointer_cast<knowhere::GPUIDMAP>(xx);
|
||||||
|
//
|
||||||
|
// std::vector<std::vector<int64_t>> kng;
|
||||||
|
// ss->GenGraph(p_data, 50, kng, config);
|
||||||
|
// nsg->SetKnnGraph(kng);
|
||||||
|
// knowhere::FaissGpuResourceMgr::GetInstance().Free();
|
||||||
|
//#else
|
||||||
|
// nsg->SetKnnGraph(knng);
|
||||||
|
//#endif
|
||||||
|
// nsg->Build_with_ids(rows, (float*)p_data, store_ids.data(), params);
|
||||||
|
// knowhere::algo::SearchParams s_params;
|
||||||
|
// s_params.search_length = 45;
|
||||||
|
// nsg->Search(search_data, nq, dim, k, D, I, s_params);
|
||||||
|
//#endif
|
||||||
|
//
|
||||||
|
// int n_1 = 0, n_10 = 0, n_100 = 0;
|
||||||
|
// for (int i = 0; i < nq; i++) {
|
||||||
|
// int gt_nn = gt[i * k];
|
||||||
|
// for (int j = 0; j < k; j++) {
|
||||||
|
// if (I[i * k + j] == gt_nn) {
|
||||||
|
// if (j < 1)
|
||||||
|
// n_1++;
|
||||||
|
// if (j < 10)
|
||||||
|
// n_10++;
|
||||||
|
// if (j < 100)
|
||||||
|
// n_100++;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// printf("R@1 = %.4f\n", n_1 / float(nq));
|
||||||
|
// printf("R@10 = %.4f\n", n_10 / float(nq));
|
||||||
|
// printf("R@100 = %.4f\n", n_100 / float(nq));
|
||||||
|
//}
|
||||||
|
//
|
||||||
|
// TEST(testxx, test_idmap){
|
||||||
|
// int k = 50;
|
||||||
|
// std::string knng_filename =
|
||||||
|
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.50NN.graph";
|
||||||
|
// std::vector<std::vector<int64_t>> gt_knng;
|
||||||
|
// Load_nns_graph(gt_knng, knng_filename.c_str());
|
||||||
|
//
|
||||||
|
// size_t rows, dim;
|
||||||
|
// char* filename =
|
||||||
|
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/siftsmall/siftsmall_base.fvecs";
|
||||||
|
// float* p_data = fvecs_read(filename, &dim, &rows);
|
||||||
|
//
|
||||||
|
// std::vector<int64_t> store_ids(rows);
|
||||||
|
// for (int i = 0; i < rows; ++i) {
|
||||||
|
// store_ids[i] = i;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
|
||||||
|
// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data());
|
||||||
|
// auto config = std::make_shared<knowhere::IVFCfg>();
|
||||||
|
// config->d = dim;
|
||||||
|
// config->gpu_id = 0;
|
||||||
|
// config->metric_type = knowhere::METRICTYPE::L2;
|
||||||
|
// auto preprocess_index = std::make_shared<knowhere::IDMAP>();
|
||||||
|
// preprocess_index->Train(config);
|
||||||
|
// preprocess_index->AddWithoutId(dataset, config);
|
||||||
|
// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config);
|
||||||
|
// auto ss = std::dynamic_pointer_cast<knowhere::GPUIDMAP>(xx);
|
||||||
|
// std::vector<std::vector<int64_t>> idmap_knng;
|
||||||
|
// ss->GenGraph(p_data, k, idmap_knng,config);
|
||||||
|
// knowhere::FaissGpuResourceMgr::GetInstance().Free();
|
||||||
|
//
|
||||||
|
// int n_1 = 0, n_10 = 0, n_100 = 0;
|
||||||
|
// for (int i = 0; i < rows; i++) {
|
||||||
|
// int gt_nn = gt_knng[i][0];
|
||||||
|
// int l_n_1 = 0;
|
||||||
|
// int l_n_10 = 0;
|
||||||
|
// int l_n_100 = 0;
|
||||||
|
// for (int j = 0; j < k; j++) {
|
||||||
|
// if (idmap_knng[i][j] == gt_nn) {
|
||||||
|
// if (j < 1){
|
||||||
|
// n_1++;
|
||||||
|
// l_n_1++;
|
||||||
|
// }
|
||||||
|
// if (j < 10){
|
||||||
|
// n_10++;
|
||||||
|
// l_n_10++;
|
||||||
|
// }
|
||||||
|
// if (j < 100){
|
||||||
|
// n_100++;
|
||||||
|
// l_n_100++;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
// if ((j == k-1) && (l_n_100 == 0)){
|
||||||
|
// std::cout << "error id: " << i << std::endl;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// printf("R@1 = %.4f\n", n_1 / float(rows));
|
||||||
|
// printf("R@10 = %.4f\n", n_10 / float(rows));
|
||||||
|
// printf("R@100 = %.4f\n", n_100 / float(rows));
|
||||||
|
//}
|
||||||
|
|
|
@ -178,3 +178,72 @@ PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
|
||||||
std::cout << "id\n" << ss_id.str() << std::endl;
|
std::cout << "id\n" << ss_id.str() << std::endl;
|
||||||
std::cout << "dist\n" << ss_dist.str() << std::endl;
|
std::cout << "dist\n" << ss_dist.str() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Load_nns_graph(std::vector<std::vector<int64_t>>& final_graph, const char* filename) {
|
||||||
|
std::vector<std::vector<unsigned>> knng;
|
||||||
|
|
||||||
|
std::ifstream in(filename, std::ios::binary);
|
||||||
|
unsigned k;
|
||||||
|
in.read((char*)&k, sizeof(unsigned));
|
||||||
|
in.seekg(0, std::ios::end);
|
||||||
|
std::ios::pos_type ss = in.tellg();
|
||||||
|
size_t fsize = (size_t)ss;
|
||||||
|
size_t num = (size_t)(fsize / (k + 1) / 4);
|
||||||
|
in.seekg(0, std::ios::beg);
|
||||||
|
|
||||||
|
knng.resize(num);
|
||||||
|
knng.reserve(num);
|
||||||
|
int64_t kk = (k + 3) / 4 * 4;
|
||||||
|
for (size_t i = 0; i < num; i++) {
|
||||||
|
in.seekg(4, std::ios::cur);
|
||||||
|
knng[i].resize(k);
|
||||||
|
knng[i].reserve(kk);
|
||||||
|
in.read((char*)knng[i].data(), k * sizeof(unsigned));
|
||||||
|
}
|
||||||
|
in.close();
|
||||||
|
|
||||||
|
final_graph.resize(knng.size());
|
||||||
|
for (int i = 0; i < knng.size(); ++i) {
|
||||||
|
final_graph[i].resize(knng[i].size());
|
||||||
|
for (int j = 0; j < knng[i].size(); ++j) {
|
||||||
|
final_graph[i][j] = knng[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float*
|
||||||
|
fvecs_read(const char* fname, size_t* d_out, size_t* n_out) {
|
||||||
|
FILE* f = fopen(fname, "r");
|
||||||
|
if (!f) {
|
||||||
|
fprintf(stderr, "could not open %s\n", fname);
|
||||||
|
perror("");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
int d;
|
||||||
|
fread(&d, 1, sizeof(int), f);
|
||||||
|
assert((d > 0 && d < 1000000) || !"unreasonable dimension");
|
||||||
|
fseek(f, 0, SEEK_SET);
|
||||||
|
struct stat st;
|
||||||
|
fstat(fileno(f), &st);
|
||||||
|
size_t sz = st.st_size;
|
||||||
|
assert(sz % ((d + 1) * 4) == 0 || !"weird file size");
|
||||||
|
size_t n = sz / ((d + 1) * 4);
|
||||||
|
|
||||||
|
*d_out = d;
|
||||||
|
*n_out = n;
|
||||||
|
float* x = new float[n * (d + 1)];
|
||||||
|
size_t nr = fread(x, sizeof(float), n * (d + 1), f);
|
||||||
|
assert(nr == n * (d + 1) || !"could not read whole file");
|
||||||
|
|
||||||
|
// shift array to remove row headers
|
||||||
|
for (size_t i = 0; i < n; i++) memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x));
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
int* // not very clean, but works as long as sizeof(int) == sizeof(float)
|
||||||
|
ivecs_read(const char* fname, size_t* d_out, size_t* n_out) {
|
||||||
|
return (int*)fvecs_read(fname, d_out, n_out);
|
||||||
|
}
|
||||||
|
|
|
@ -93,3 +93,12 @@ struct FileIOReader {
|
||||||
size_t
|
size_t
|
||||||
operator()(void* ptr, size_t size);
|
operator()(void* ptr, size_t size);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
Load_nns_graph(std::vector<std::vector<int64_t>>& final_graph_, const char* filename);
|
||||||
|
|
||||||
|
float*
|
||||||
|
fvecs_read(const char* fname, size_t* d_out, size_t* n_out);
|
||||||
|
|
||||||
|
int*
|
||||||
|
ivecs_read(const char* fname, size_t* d_out, size_t* n_out);
|
||||||
|
|
|
@ -204,10 +204,11 @@ NSGConfAdapter::Match(const TempMetaConf& metaconf) {
|
||||||
auto scale_factor = round(metaconf.dim / 128.0);
|
auto scale_factor = round(metaconf.dim / 128.0);
|
||||||
scale_factor = scale_factor >= 4 ? 4 : scale_factor;
|
scale_factor = scale_factor >= 4 ? 4 : scale_factor;
|
||||||
conf->nprobe = int64_t(conf->nlist * 0.01);
|
conf->nprobe = int64_t(conf->nlist * 0.01);
|
||||||
conf->knng = 40 + 10 * scale_factor; // the size of knng
|
// conf->knng = 40 + 10 * scale_factor; // the size of knng
|
||||||
conf->search_length = 40 + 5 * scale_factor;
|
conf->knng = 50;
|
||||||
|
conf->search_length = 50 + 5 * scale_factor;
|
||||||
conf->out_degree = 50 + 5 * scale_factor;
|
conf->out_degree = 50 + 5 * scale_factor;
|
||||||
conf->candidate_pool_size = 200 + 100 * scale_factor;
|
conf->candidate_pool_size = 300;
|
||||||
MatchBase(conf);
|
MatchBase(conf);
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue