1. fix operand serialize bug

2. support gpu-build
3. add unittest


Former-commit-id: bb36dcb05220d8f0648f282c7e38fe20f4ab3c16
pull/191/head
xj.lin 2019-05-05 16:00:13 +08:00
parent 675777d072
commit 56bbe40faf
6 changed files with 88 additions and 69 deletions

View File

@ -39,11 +39,11 @@ public:
virtual bool reset();
/**
* @brief Same as add, but stores xids instead of sequential ids.
*
* @param data input matrix, size n * d
* @param if ids is not empty ids for the std::vectors
*/
* @brief Same as add, but stores xids instead of sequential ids.
*
* @param data input matrix, size n * d
* @param if ids is not empty ids for the std::vectors
*/
virtual bool add_with_ids(idx_t n, const float *xdata, const long *xids);
/**
@ -57,23 +57,20 @@ public:
*/
virtual bool search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const;
// virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location);
//virtual bool search(idx_t n, const std::vector<float> &data, idx_t k,
// std::vector<float> &distances, std::vector<float> &labels) const;
// virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove);
//virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location);
//virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove);
//virtual bool index_display();
// virtual bool index_display();
//
virtual std::shared_ptr<faiss::Index> data() { return index_; }
virtual const std::shared_ptr<faiss::Index>& data() const { return index_; }
private:
friend void write_index(const Index_ptr &index, const std::string &file_name);
std::shared_ptr<faiss::Index> index_ = nullptr;
// std::vector<faiss::gpu::GpuResources *> res_;
// std::vector<int> devs_;
// bool usegpu = true;
// int ngpus = 0;
// faiss::gpu::GpuMultipleClonerOptions *options = new faiss::gpu::GpuMultipleClonerOptions();
};

View File

@ -6,41 +6,52 @@
#include "mutex"
#include <faiss/gpu/StandardGpuResources.h>
#include "faiss/gpu/GpuIndexIVFFlat.h"
#include "faiss/gpu/GpuAutoTune.h"
#include "IndexBuilder.h"
namespace zilliz {
namespace vecwise {
namespace engine {
using std::vector;
// todo(linxj): use ResourceMgr instead
static std::mutex cpu_resource;
static std::mutex gpu_resource;
IndexBuilder::IndexBuilder(const Operand_ptr &opd) {
opd_ = opd;
}
// Default: build use gpu
Index_ptr IndexBuilder::build_all(const long &nb,
const float* xb,
const long* ids,
const long &nt,
const float* xt) {
std::shared_ptr<faiss::Index> index = nullptr;
index.reset(faiss::index_factory(opd_->d, opd_->index_type.c_str()));
std::shared_ptr<faiss::Index> host_index = nullptr;
{
// currently only cpu resources are used.
std::lock_guard<std::mutex> lk(cpu_resource);
if (!index->is_trained) {
nt == 0 || xt == nullptr ? index->train(nb, xb)
: index->train(nt, xt);
// TODO: list support index-type.
faiss::Index *ori_index = faiss::index_factory(opd_->d, opd_->index_type.c_str());
std::lock_guard<std::mutex> lk(gpu_resource);
faiss::gpu::StandardGpuResources res;
auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index);
if (!device_index->is_trained) {
nt == 0 || xt == nullptr ? device_index->train(nb, xb)
: device_index->train(nt, xt);
}
index->add_with_ids(nb, xb, ids); // todo(linxj): support add_with_idmap
device_index->add_with_ids(nb, xb, ids);
host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index));
delete device_index;
delete ori_index;
}
return std::make_shared<Index>(index);
return std::make_shared<Index>(host_index);
}
Index_ptr IndexBuilder::build_all(const long &nb, const vector<float> &xb,

View File

@ -43,7 +43,6 @@ public:
private:
Operand_ptr opd_ = nullptr;
// std::shared_ptr<faiss::Index> index_ = nullptr;
};
using IndexBuilderPtr = std::shared_ptr<IndexBuilder>;

View File

@ -13,9 +13,9 @@ namespace engine {
std::ostream &operator<<(std::ostream &os, const Operand &obj) {
os << obj.d << " "
<< obj.index_type << " "
<< obj.metric_type << " "
<< obj.preproc << " "
<< obj.postproc << " "
<< obj.metric_type << " "
<< obj.ncent;
return os;
}
@ -23,16 +23,16 @@ std::ostream &operator<<(std::ostream &os, const Operand &obj) {
std::istream &operator>>(std::istream &is, Operand &obj) {
is >> obj.d
>> obj.index_type
>> obj.metric_type
>> obj.preproc
>> obj.postproc
>> obj.metric_type
>> obj.ncent;
return is;
}
std::string operand_to_str(const Operand_ptr &opd) {
std::ostringstream ss;
ss << opd;
ss << *opd;
return ss.str();
}

View File

@ -22,9 +22,9 @@ struct Operand {
int d;
std::string index_type = "IVF13864,Flat";
std::string metric_type = "L2"; //> L2 / Inner Product
std::string preproc;
std::string postproc;
std::string metric_type = "L2"; // L2 / Inner Product
int ncent;
};

View File

@ -14,11 +14,21 @@ using namespace zilliz::vecwise::engine;
TEST(operand_test, Wrapper_Test) {
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF16384,Flat";
opd->d = 256;
using std::cout;
using std::endl;
std::cout << opd << std::endl;
auto opd = std::make_shared<Operand>();
opd->index_type = "IDMap,Flat";
opd->preproc = "opq";
opd->postproc = "pq";
opd->metric_type = "L2";
opd->ncent = 256;
opd->d = 64;
auto opd_str = operand_to_str(opd);
auto new_opd = str_to_operand(opd_str);
assert(new_opd->index_type == opd->index_type);
}
TEST(build_test, Wrapper_Test) {
@ -68,59 +78,61 @@ TEST(build_test, Wrapper_Test) {
//search in first quadrant
int nq = 1, k = 10;
std::vector<float> xq = {0.5, 0.5, 0.5};
float* result_dists = new float[k];
long* result_ids = new long[k];
float *result_dists = new float[k];
long *result_ids = new long[k];
index_1->search(nq, xq.data(), k, result_dists, result_ids);
for(int i = 0; i < k; i++) {
if(result_ids[i] < 0) {
for (int i = 0; i < k; i++) {
if (result_ids[i] < 0) {
ASSERT_TRUE(false);
break;
}
long id = result_ids[i];
std::cout << "No." << id << " [" << xb[id*3] << ", " << xb[id*3 + 1] << ", "
<< xb[id*3 + 2] <<"] distance = " << result_dists[i] << std::endl;
std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", "
<< xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl;
//makesure result vector is in first quadrant
ASSERT_TRUE(xb[id*3] > 0.0);
ASSERT_TRUE(xb[id*3 + 1] > 0.0);
ASSERT_TRUE(xb[id*3 + 2] > 0.0);
ASSERT_TRUE(xb[id * 3] > 0.0);
ASSERT_TRUE(xb[id * 3 + 1] > 0.0);
ASSERT_TRUE(xb[id * 3 + 2] > 0.0);
}
delete[] result_dists;
delete[] result_ids;
}
TEST(search_test, Wrapper_Test) {
const int dim = 256;
TEST(gpu_build_test, Wrapper_Test) {
using std::vector;
size_t nb = 25000;
size_t nq = 100;
size_t k = 100;
std::vector<float> xb(nb*dim);
std::vector<float> xq(nq*dim);
std::vector<long> ids(nb*dim);
int d = 256;
int nb = 3 * 1000 * 100;
int nq = 100;
vector<float> xb(d * nb);
vector<float> xq(d * nq);
vector<long> ids(nb);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
for (size_t i = 0; i < nb*dim; i++) {
xb[i] = dis_xt(gen);
ids[i] = i;
}
for (size_t i = 0; i < nq*dim; i++) {
xq[i] = dis_xt(gen);
}
for (auto &e : xb) { e = float(dis_xt(gen)); }
for (auto &e : xq) { e = float(dis_xt(gen)); }
for (int i = 0; i < nb; ++i) { ids[i] = i; }
// result data
std::vector<long> nns_gt(nq*k); // nns = nearst neg search
std::vector<long> nns(nq*k);
std::vector<float> dis_gt(nq*k);
std::vector<float> dis(nq*k);
faiss::Index* index_gt(faiss::index_factory(dim, "IDMap,Flat"));
index_gt->add_with_ids(nb, xb.data(), ids.data());
index_gt->search(nq, xq.data(), 10, dis_gt.data(), nns_gt.data());
std::cout << "data: " << nns_gt[0];
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF256,Flat";
opd->d = d;
opd->ncent = 256;
IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data());
assert(index_1->ntotal == nb);
assert(index_1->dim == d);
// sanity check: search 5 first vectors of xb
int k = 1;
vector<long> I(5 * k);
vector<float> D(5 * k);
index_1->search(5, xb.data(), k, D.data(), I.data());
for (int i = 0; i < 5; ++i) { assert(i == I[i]); }
}