Merge branch 'branch-0.5.0' into 'branch-0.5.0'

format knowhere code

See merge request megasearch/milvus!641

Former-commit-id: dce348b81de9047f3f452e01a74825ae7f6e57f5
pull/191/head
jinhai 2019-09-28 19:02:47 +08:00
commit dd048c2593
78 changed files with 2057 additions and 1976 deletions

View File

@ -5,5 +5,4 @@
*thirdparty*
*easylogging++*
*SqliteMetaImpl.cpp
*src/grpc*
*src/core*
*src/grpc*

View File

@ -15,42 +15,41 @@
// specific language governing permissions and limitations
// under the License.
#include "ArrowAdapter.h"
#include "knowhere/adapter/ArrowAdapter.h"
namespace zilliz {
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr &origin) {
CopyArray(const ArrayPtr& origin) {
ArrayPtr copy = nullptr;
auto copy_data = origin->data()->Copy();
switch (origin->type_id()) {
#define DEFINE_TYPE(type, clazz) \
case arrow::Type::type: { \
copy = std::make_shared<arrow::clazz>(copy_data); \
}
#define DEFINE_TYPE(type, clazz) \
case arrow::Type::type: { \
copy = std::make_shared<arrow::clazz>(copy_data); \
}
DEFINE_TYPE(BOOL, BooleanArray)
DEFINE_TYPE(BINARY, BinaryArray)
DEFINE_TYPE(FIXED_SIZE_BINARY, FixedSizeBinaryArray)
DEFINE_TYPE(DECIMAL, Decimal128Array)
DEFINE_TYPE(FLOAT, NumericArray<arrow::FloatType>)
DEFINE_TYPE(INT64, NumericArray<arrow::Int64Type>)
default:break;
default:
break;
}
return copy;
}
SchemaPtr
CopySchema(const SchemaPtr &origin) {
CopySchema(const SchemaPtr& origin) {
std::vector<std::shared_ptr<Field>> fields;
for (auto &field : origin->fields()) {
auto copy = std::make_shared<Field>(field->name(), field->type(),field->nullable(), nullptr);
for (auto& field : origin->fields()) {
auto copy = std::make_shared<Field>(field->name(), field->type(), field->nullable(), nullptr);
fields.emplace_back(copy);
}
return std::make_shared<Schema>(std::move(fields));
}
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,22 +15,22 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "knowhere/common/Array.h"
namespace zilliz {
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr &origin);
CopyArray(const ArrayPtr& origin);
SchemaPtr
CopySchema(const SchemaPtr &origin);
CopySchema(const SchemaPtr& origin);
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,36 +15,31 @@
// specific language governing permissions and limitations
// under the License.
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/adapter/Structure.h"
#include "knowhere/index/vector_index/helpers/Definitions.h"
#include "SptagAdapter.h"
#include "Structure.h"
namespace zilliz {
namespace knowhere {
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr &dataset) {
ConvertToMetadataSet(const DatasetPtr& dataset) {
auto array = dataset->array()[0];
auto elems = array->length();
auto p_data = array->data()->GetValues<int64_t>(1, 0);
auto p_offset = (int64_t *) malloc(sizeof(int64_t) * elems);
for (auto i = 0; i <= elems; ++i)
p_offset[i] = i * 8;
std::shared_ptr<SPTAG::MetadataSet> metaset(new SPTAG::MemMetadataSet(
SPTAG::ByteArray((std::uint8_t *) p_data, elems * sizeof(int64_t), false),
SPTAG::ByteArray((std::uint8_t *) p_offset, elems * sizeof(int64_t), true),
elems));
auto p_offset = (int64_t*)malloc(sizeof(int64_t) * elems);
for (auto i = 0; i <= elems; ++i) p_offset[i] = i * 8;
std::shared_ptr<SPTAG::MetadataSet> metaset(
new SPTAG::MemMetadataSet(SPTAG::ByteArray((std::uint8_t*)p_data, elems * sizeof(int64_t), false),
SPTAG::ByteArray((std::uint8_t*)p_offset, elems * sizeof(int64_t), true), elems));
return metaset;
}
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr &dataset) {
ConvertToVectorSet(const DatasetPtr& dataset) {
auto tensor = dataset->tensor()[0];
auto p_data = tensor->raw_mutable_data();
@ -54,18 +49,16 @@ ConvertToVectorSet(const DatasetPtr &dataset) {
SPTAG::ByteArray byte_array(p_data, num_bytes, false);
auto vectorset = std::make_shared<SPTAG::BasicVectorSet>(byte_array,
SPTAG::VectorValueType::Float,
dimension,
rows);
auto vectorset =
std::make_shared<SPTAG::BasicVectorSet>(byte_array, SPTAG::VectorValueType::Float, dimension, rows);
return vectorset;
}
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr &dataset, const Config &config) {
ConvertToQueryResult(const DatasetPtr& dataset, const Config& config) {
auto tensor = dataset->tensor()[0];
auto p_data = (float *) tensor->raw_mutable_data();
auto p_data = (float*)tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
@ -82,23 +75,23 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
auto k = query_results[0].GetResultNum();
auto elems = query_results.size() * k;
auto p_id = (int64_t *) malloc(sizeof(int64_t) * elems);
auto p_dist = (float *) malloc(sizeof(float) * elems);
// TODO: throw if malloc failed.
auto p_id = (int64_t*)malloc(sizeof(int64_t) * elems);
auto p_dist = (float*)malloc(sizeof(float) * elems);
// TODO: throw if malloc failed.
#pragma omp parallel for
for (auto i = 0; i < query_results.size(); ++i) {
auto results = query_results[i].GetResults();
auto num_result = query_results[i].GetResultNum();
for (auto j = 0; j < num_result; ++j) {
// p_id[i * k + j] = results[j].VID;
p_id[i * k + j] = *(int64_t *) query_results[i].GetMetadata(j).Data();
// p_id[i * k + j] = results[j].VID;
p_id[i * k + j] = *(int64_t*)query_results[i].GetMetadata(j).Data();
p_dist[i * k + j] = results[j].Dist;
}
}
auto id_buf = MakeMutableBufferSmart((uint8_t *) p_id, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t *) p_dist, sizeof(float) * elems);
auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
// TODO: magic
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
@ -109,11 +102,11 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
@ -127,5 +120,5 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
return std::make_shared<Dataset>(array, schema);
}
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,12 +15,11 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
#include <memory>
#include <vector>
#include "knowhere/common/Dataset.h"
@ -28,16 +27,16 @@ namespace zilliz {
namespace knowhere {
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr &dataset);
ConvertToVectorSet(const DatasetPtr& dataset);
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr &dataset);
ConvertToMetadataSet(const DatasetPtr& dataset);
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr &dataset, const Config &config);
ConvertToQueryResult(const DatasetPtr& dataset, const Config& config);
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results);
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,15 +15,16 @@
// specific language governing permissions and limitations
// under the License.
#include "knowhere/adapter/Structure.h"
#include "Structure.h"
#include <string>
#include <vector>
namespace zilliz {
namespace knowhere {
ArrayPtr
ConstructInt64ArraySmart(uint8_t *data, int64_t size) {
ConstructInt64ArraySmart(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
@ -32,7 +33,7 @@ ConstructInt64ArraySmart(uint8_t *data, int64_t size) {
}
ArrayPtr
ConstructFloatArraySmart(uint8_t *data, int64_t size) {
ConstructFloatArraySmart(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::FloatType>();
@ -41,14 +42,14 @@ ConstructFloatArraySmart(uint8_t *data, int64_t size) {
}
TensorPtr
ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector<int64_t> shape) {
ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBufferSmart(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
ArrayPtr
ConstructInt64Array(uint8_t *data, int64_t size) {
ConstructInt64Array(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
@ -57,7 +58,7 @@ ConstructInt64Array(uint8_t *data, int64_t size) {
}
ArrayPtr
ConstructFloatArray(uint8_t *data, int64_t size) {
ConstructFloatArray(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::FloatType>();
@ -66,23 +67,23 @@ ConstructFloatArray(uint8_t *data, int64_t size) {
}
TensorPtr
ConstructFloatTensor(uint8_t *data, int64_t size, std::vector<int64_t> shape) {
ConstructFloatTensor(uint8_t* data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBuffer(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
FieldPtr
ConstructInt64Field(const std::string &name) {
ConstructInt64Field(const std::string& name) {
auto type = std::make_shared<arrow::Int64Type>();
return std::make_shared<Field>(name, type);
}
FieldPtr
ConstructFloatField(const std::string &name) {
ConstructFloatField(const std::string& name) {
auto type = std::make_shared<arrow::FloatType>();
return std::make_shared<Field>(name, type);
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,40 +15,40 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "knowhere/common/Dataset.h"
#include <string>
#include <vector>
#include "knowhere/common/Dataset.h"
namespace zilliz {
namespace knowhere {
extern ArrayPtr
ConstructInt64ArraySmart(uint8_t *data, int64_t size);
ConstructInt64ArraySmart(uint8_t* data, int64_t size);
extern ArrayPtr
ConstructFloatArraySmart(uint8_t *data, int64_t size);
ConstructFloatArraySmart(uint8_t* data, int64_t size);
extern TensorPtr
ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector<int64_t> shape);
ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector<int64_t> shape);
extern ArrayPtr
ConstructInt64Array(uint8_t *data, int64_t size);
ConstructInt64Array(uint8_t* data, int64_t size);
extern ArrayPtr
ConstructFloatArray(uint8_t *data, int64_t size);
ConstructFloatArray(uint8_t* data, int64_t size);
extern TensorPtr
ConstructFloatTensor(uint8_t *data, int64_t size, std::vector<int64_t> shape);
ConstructFloatTensor(uint8_t* data, int64_t size, std::vector<int64_t> shape);
extern FieldPtr
ConstructInt64Field(const std::string &name);
ConstructInt64Field(const std::string& name);
extern FieldPtr
ConstructFloatField(const std::string &name);
ConstructFloatField(const std::string& name);
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,18 +15,16 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
namespace zilliz {
namespace knowhere {
#define GETTENSOR(dataset) \
auto tensor = dataset->tensor()[0]; \
auto p_data = tensor->raw_data(); \
auto dim = tensor->shape()[1]; \
auto rows = tensor->shape()[0]; \
#define GETTENSOR(dataset) \
auto tensor = dataset->tensor()[0]; \
auto p_data = tensor->raw_data(); \
auto dim = tensor->shape()[1]; \
auto rows = tensor->shape()[0];
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,14 +15,13 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <arrow/array.h>
#include <memory>
#include "Schema.h"
namespace zilliz {
namespace knowhere {
@ -35,9 +34,9 @@ using ArrayPtr = std::shared_ptr<Array>;
using BooleanArray = arrow::BooleanArray;
using BooleanArrayPtr = std::shared_ptr<arrow::BooleanArray>;
template<typename DType>
template <typename DType>
using NumericArray = arrow::NumericArray<DType>;
template<typename DType>
template <typename DType>
using NumericArrayPtr = std::shared_ptr<arrow::NumericArray<DType>>;
using BinaryArray = arrow::BinaryArray;
@ -49,6 +48,5 @@ using FixedSizeBinaryArrayPtr = std::shared_ptr<arrow::FixedSizeBinaryArray>;
using Decimal128Array = arrow::Decimal128Array;
using Decimal128ArrayPtr = std::shared_ptr<arrow::Decimal128Array>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,21 +15,19 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <map>
#include <string>
#include <vector>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "Id.h"
namespace zilliz {
namespace knowhere {
struct Binary {
ID id;
std::shared_ptr<uint8_t> data;
@ -37,29 +35,28 @@ struct Binary {
};
using BinaryPtr = std::shared_ptr<Binary>;
class BinarySet {
public:
BinaryPtr
GetByName(const std::string &name) const {
GetByName(const std::string& name) const {
return binary_map_.at(name);
}
void
Append(const std::string &name, BinaryPtr binary) {
Append(const std::string& name, BinaryPtr binary) {
binary_map_[name] = std::move(binary);
}
void
Append(const std::string &name, std::shared_ptr<uint8_t> data, int64_t size) {
Append(const std::string& name, std::shared_ptr<uint8_t> data, int64_t size) {
auto binary = std::make_shared<Binary>();
binary->data = data;
binary->size = size;
binary_map_[name] = std::move(binary);
}
//void
//Append(const std::string &name, void *data, int64_t size, ID id) {
// void
// Append(const std::string &name, void *data, int64_t size, ID id) {
// Binary binary;
// binary.data = data;
// binary.size = size;
@ -67,7 +64,8 @@ class BinarySet {
// binary_map_[name] = binary;
//}
void clear() {
void
clear() {
binary_map_.clear();
}
@ -75,6 +73,5 @@ class BinarySet {
std::map<std::string, BinaryPtr> binary_map_;
};
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,14 +15,12 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <arrow/buffer.h>
namespace zilliz {
namespace knowhere {
@ -34,31 +32,32 @@ using MutableBufferPtr = std::shared_ptr<MutableBuffer>;
namespace internal {
struct BufferDeleter {
void operator()(Buffer *buffer) {
free((void *) buffer->data());
void
operator()(Buffer* buffer) {
free((void*)buffer->data());
}
};
}
inline BufferPtr
MakeBufferSmart(uint8_t *data, const int64_t size) {
MakeBufferSmart(uint8_t* data, const int64_t size) {
return BufferPtr(new Buffer(data, size), internal::BufferDeleter());
}
inline MutableBufferPtr
MakeMutableBufferSmart(uint8_t *data, const int64_t size) {
MakeMutableBufferSmart(uint8_t* data, const int64_t size) {
return MutableBufferPtr(new MutableBuffer(data, size), internal::BufferDeleter());
}
inline BufferPtr
MakeBuffer(uint8_t *data, const int64_t size) {
MakeBuffer(uint8_t* data, const int64_t size) {
return std::make_shared<Buffer>(data, size);
}
inline MutableBufferPtr
MakeMutableBuffer(uint8_t *data, const int64_t size) {
MakeMutableBuffer(uint8_t* data, const int64_t size) {
return std::make_shared<MutableBuffer>(data, size);
}
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
@ -42,20 +41,18 @@ struct Cfg {
int64_t gpu_id = DEFAULT_GPUID;
int64_t d = DEFAULT_DIM;
Cfg(const int64_t &dim,
const int64_t &k,
const int64_t &gpu_id,
METRICTYPE type)
: metric_type(type), k(k), gpu_id(gpu_id), d(dim) {}
Cfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, METRICTYPE type)
: metric_type(type), k(k), gpu_id(gpu_id), d(dim) {
}
Cfg() = default;
virtual bool
CheckValid(){
CheckValid() {
return true;
};
}
};
using Config = std::shared_ptr<Cfg>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,20 +15,19 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <vector>
#include <memory>
#include <utility>
#include <vector>
#include "Array.h"
#include "Buffer.h"
#include "Tensor.h"
#include "Schema.h"
#include "Config.h"
#include "Schema.h"
#include "Tensor.h"
#include "knowhere/adapter/ArrowAdapter.h"
namespace zilliz {
namespace knowhere {
@ -40,34 +39,38 @@ class Dataset {
public:
Dataset() = default;
Dataset(std::vector<ArrayPtr> &&array, SchemaPtr array_schema,
std::vector<TensorPtr> &&tensor, SchemaPtr tensor_schema)
Dataset(std::vector<ArrayPtr>&& array, SchemaPtr array_schema, std::vector<TensorPtr>&& tensor,
SchemaPtr tensor_schema)
: array_(std::move(array)),
array_schema_(std::move(array_schema)),
tensor_(std::move(tensor)),
tensor_schema_(std::move(tensor_schema)) {}
tensor_schema_(std::move(tensor_schema)) {
}
Dataset(std::vector<ArrayPtr> array, SchemaPtr array_schema)
: array_(std::move(array)), array_schema_(std::move(array_schema)) {}
: array_(std::move(array)), array_schema_(std::move(array_schema)) {
}
Dataset(std::vector<TensorPtr> tensor, SchemaPtr tensor_schema)
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {}
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {
}
Dataset(const Dataset &) = delete;
Dataset &operator=(const Dataset &) = delete;
Dataset(const Dataset&) = delete;
Dataset&
operator=(const Dataset&) = delete;
DatasetPtr
Clone() {
auto dataset = std::make_shared<Dataset>();
std::vector<ArrayPtr> clone_array;
for (auto &array : array_) {
for (auto& array : array_) {
clone_array.emplace_back(CopyArray(array));
}
dataset->set_array(clone_array);
std::vector<TensorPtr> clone_tensor;
for (auto &tensor : tensor_) {
for (auto& tensor : tensor_) {
auto buffer = tensor->data();
std::shared_ptr<Buffer> copy_buffer;
// TODO: checkout copy success;
@ -86,16 +89,20 @@ class Dataset {
}
public:
const std::vector<ArrayPtr> &
array() const { return array_; }
const std::vector<ArrayPtr>&
array() const {
return array_;
}
void
set_array(std::vector<ArrayPtr> array) {
array_ = std::move(array);
}
const std::vector<TensorPtr> &
tensor() const { return tensor_; }
const std::vector<TensorPtr>&
tensor() const {
return tensor_;
}
void
set_tensor(std::vector<TensorPtr> tensor) {
@ -103,7 +110,9 @@ class Dataset {
}
SchemaConstPtr
array_schema() const { return array_schema_; }
array_schema() const {
return array_schema_;
}
void
set_array_schema(SchemaPtr array_schema) {
@ -111,18 +120,20 @@ class Dataset {
}
SchemaConstPtr
tensor_schema() const { return tensor_schema_; }
tensor_schema() const {
return tensor_schema_;
}
void
set_tensor_schema(SchemaPtr tensor_schema) {
tensor_schema_ = std::move(tensor_schema);
}
//const Config &
//meta() const { return meta_; }
// const Config &
// meta() const { return meta_; }
//void
//set_meta(Config meta) {
// void
// set_meta(Config meta) {
// meta_ = std::move(meta);
//}
@ -131,11 +142,10 @@ class Dataset {
SchemaPtr array_schema_;
std::vector<TensorPtr> tensor_;
SchemaPtr tensor_schema_;
//Config meta_;
// Config meta_;
};
using DatasetPtr = std::shared_ptr<Dataset>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,41 +15,37 @@
// specific language governing permissions and limitations
// under the License.
#include <cstdio>
#include "Exception.h"
#include "Log.h"
#include "knowhere/common/Exception.h"
namespace zilliz {
namespace knowhere {
KnowhereException::KnowhereException(const std::string& msg) : msg(msg) {
}
KnowhereException::KnowhereException(const std::string &msg):msg(msg) {}
KnowhereException::KnowhereException(const std::string &m, const char *funcName, const char *file, int line) {
KnowhereException::KnowhereException(const std::string& m, const char* funcName, const char* file, int line) {
#ifdef DEBUG
int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s",
funcName, file, line, m.c_str());
int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s", funcName, file, line, m.c_str());
msg.resize(size + 1);
snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s",
funcName, file, line, m.c_str());
snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s", funcName, file, line, m.c_str());
#else
std::string file_path(file);
auto const pos = file_path.find_last_of('/');
auto filename = file_path.substr(pos+1).c_str();
auto filename = file_path.substr(pos + 1).c_str();
int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s",
funcName, filename, line, m.c_str());
int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s", funcName, filename, line, m.c_str());
msg.resize(size + 1);
snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s",
funcName, filename, line, m.c_str());
snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s", funcName, filename, line, m.c_str());
#endif
}
const char *KnowhereException::what() const noexcept {
const char*
KnowhereException::what() const noexcept {
return msg.c_str();
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,46 +15,41 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <exception>
#include <string>
namespace zilliz {
namespace knowhere {
class KnowhereException : public std::exception {
public:
explicit KnowhereException(const std::string &msg);
explicit KnowhereException(const std::string& msg);
KnowhereException(const std::string &msg, const char *funName,
const char *file, int line);
KnowhereException(const std::string& msg, const char* funName, const char* file, int line);
const char *what() const noexcept override;
const char*
what() const noexcept override;
std::string msg;
};
#define KNOHWERE_ERROR_MSG(MSG) printf("%s", KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__).what())
#define KNOHWERE_ERROR_MSG(MSG)\
printf("%s", KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__).what())
#define KNOWHERE_THROW_MSG(MSG) \
do { \
throw KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__); \
} while (false)
#define KNOWHERE_THROW_MSG(MSG)\
do {\
throw KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__);\
} while (false)
#define KNOHERE_THROW_FORMAT(FMT, ...) \
do { \
std::string __s; \
int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__); \
__s.resize(__size + 1); \
snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__); \
throw faiss::FaissException(__s, __PRETTY_FUNCTION__, __FILE__, __LINE__); \
} while (false)
#define KNOHERE_THROW_FORMAT(FMT, ...)\
do { \
std::string __s;\
int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__);\
__s.resize(__size + 1);\
snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__);\
throw faiss::FaissException(__s, __PRETTY_FUNCTION__, __FILE__, __LINE__);\
} while (false)
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,11 +15,10 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
//#include "zcommon/id/id.h"
//using ID = zilliz::common::ID;
// using ID = zilliz::common::ID;
#include <stdint.h>
#include <string>
@ -27,18 +26,20 @@
namespace zilliz {
namespace knowhere {
class ID {
public:
constexpr static int64_t kIDSize = 20;
public:
const int32_t *
data() const { return content_; }
const int32_t*
data() const {
return content_;
}
int32_t *
mutable_data() { return content_; }
int32_t*
mutable_data() {
return content_;
}
bool
IsValid() const;
@ -47,14 +48,14 @@ class ID {
ToString() const;
bool
operator==(const ID &that) const;
operator==(const ID& that) const;
bool
operator<(const ID &that) const;
operator<(const ID& that) const;
protected:
int32_t content_[5] = {};
};
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "utils/easylogging++.h"
@ -33,5 +32,5 @@ namespace knowhere {
#define KNOWHERE_LOG_ERROR LOG(ERROR) << KNOWHERE_DOMAIN_NAME
#define KNOWHERE_LOG_FATAL LOG(FATAL) << KNOWHERE_DOMAIN_NAME
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,18 +15,15 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <arrow/type.h>
namespace zilliz {
namespace knowhere {
using DataType = arrow::DataType;
using Field = arrow::Field;
using FieldPtr = std::shared_ptr<arrow::Field>;
@ -34,7 +31,5 @@ using Schema = arrow::Schema;
using SchemaPtr = std::shared_ptr<Schema>;
using SchemaConstPtr = std::shared_ptr<const Schema>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,21 +15,17 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <arrow/tensor.h>
namespace zilliz {
namespace knowhere {
using Tensor = arrow::Tensor;
using TensorPtr = std::shared_ptr<Tensor>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,18 +15,14 @@
// specific language governing permissions and limitations
// under the License.
#include <iostream> // TODO(linxj): using Log instead
#include <iostream> // TODO(linxj): using Log instead
#include "Timer.h"
#include "knowhere/common/Timer.h"
namespace zilliz {
namespace knowhere {
TimeRecorder::TimeRecorder(const std::string &header,
int64_t log_level) :
header_(header),
log_level_(log_level) {
TimeRecorder::TimeRecorder(const std::string& header, int64_t log_level) : header_(header), log_level_(log_level) {
start_ = last_ = stdclock::now();
}
@ -42,9 +38,10 @@ TimeRecorder::GetTimeSpanStr(double span) {
}
void
TimeRecorder::PrintTimeRecord(const std::string &msg, double span) {
TimeRecorder::PrintTimeRecord(const std::string& msg, double span) {
std::string str_log;
if (!header_.empty()) str_log += header_ + ": ";
if (!header_.empty())
str_log += header_ + ": ";
str_log += msg;
str_log += " (";
str_log += TimeRecorder::GetTimeSpanStr(span);
@ -55,35 +52,35 @@ TimeRecorder::PrintTimeRecord(const std::string &msg, double span) {
std::cout << str_log << std::endl;
break;
}
//case 1: {
// SERVER_LOG_DEBUG << str_log;
// break;
//}
//case 2: {
// SERVER_LOG_INFO << str_log;
// break;
//}
//case 3: {
// SERVER_LOG_WARNING << str_log;
// break;
//}
//case 4: {
// SERVER_LOG_ERROR << str_log;
// break;
//}
//case 5: {
// SERVER_LOG_FATAL << str_log;
// break;
//}
//default: {
// SERVER_LOG_INFO << str_log;
// break;
//}
// case 1: {
// SERVER_LOG_DEBUG << str_log;
// break;
//}
// case 2: {
// SERVER_LOG_INFO << str_log;
// break;
//}
// case 3: {
// SERVER_LOG_WARNING << str_log;
// break;
//}
// case 4: {
// SERVER_LOG_ERROR << str_log;
// break;
//}
// case 5: {
// SERVER_LOG_FATAL << str_log;
// break;
//}
// default: {
// SERVER_LOG_INFO << str_log;
// break;
//}
}
}
double
TimeRecorder::RecordSection(const std::string &msg) {
TimeRecorder::RecordSection(const std::string& msg) {
stdclock::time_point curr = stdclock::now();
double span = (std::chrono::duration<double, std::micro>(curr - last_)).count();
last_ = curr;
@ -93,7 +90,7 @@ TimeRecorder::RecordSection(const std::string &msg) {
}
double
TimeRecorder::ElapseFromBegin(const std::string &msg) {
TimeRecorder::ElapseFromBegin(const std::string& msg) {
stdclock::time_point curr = stdclock::now();
double span = (std::chrono::duration<double, std::micro>(curr - start_)).count();
@ -101,5 +98,5 @@ TimeRecorder::ElapseFromBegin(const std::string &msg) {
return span;
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,11 +15,10 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include <chrono>
#include <string>
namespace zilliz {
namespace knowhere {
@ -28,19 +27,22 @@ class TimeRecorder {
using stdclock = std::chrono::high_resolution_clock;
public:
TimeRecorder(const std::string &header,
int64_t log_level = 0);
explicit TimeRecorder(const std::string& header, int64_t log_level = 0);
~TimeRecorder();//trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5
~TimeRecorder(); // trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5
double RecordSection(const std::string &msg);
double
RecordSection(const std::string& msg);
double ElapseFromBegin(const std::string &msg);
double
ElapseFromBegin(const std::string& msg);
static std::string GetTimeSpanStr(double span);
static std::string
GetTimeSpanStr(double span);
private:
void PrintTimeRecord(const std::string &msg, double span);
void
PrintTimeRecord(const std::string& msg, double span);
private:
std::string header_;
@ -49,5 +51,5 @@ class TimeRecorder {
int64_t log_level_;
};
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,54 +15,55 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "IndexModel.h"
#include "IndexType.h"
#include "knowhere/common/BinarySet.h"
#include "knowhere/common/Dataset.h"
#include "IndexType.h"
#include "IndexModel.h"
#include "knowhere/index/preprocessor/Preprocessor.h"
namespace zilliz {
namespace knowhere {
class Index {
public:
virtual BinarySet
Serialize() = 0;
virtual void
Load(const BinarySet &index_binary) = 0;
Load(const BinarySet& index_binary) = 0;
// @throw
virtual DatasetPtr
Search(const DatasetPtr &dataset, const Config &config) = 0;
Search(const DatasetPtr& dataset, const Config& config) = 0;
public:
IndexType
idx_type() const { return idx_type_; }
idx_type() const {
return idx_type_;
}
void
set_idx_type(IndexType idx_type) { idx_type_ = idx_type; }
set_idx_type(IndexType idx_type) {
idx_type_ = idx_type;
}
virtual void
set_preprocessor(PreprocessorPtr preprocessor) {}
set_preprocessor(PreprocessorPtr preprocessor) {
}
virtual void
set_index_model(IndexModelPtr model) {}
set_index_model(IndexModelPtr model) {
}
private:
IndexType idx_type_;
};
using IndexPtr = std::shared_ptr<Index>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
@ -24,19 +23,16 @@
namespace zilliz {
namespace knowhere {
class IndexModel {
public:
virtual BinarySet
Serialize() = 0;
virtual void
Load(const BinarySet &binary) = 0;
Load(const BinarySet& binary) = 0;
};
using IndexModelPtr = std::shared_ptr<IndexModel>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,14 +15,11 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
namespace zilliz {
namespace knowhere {
enum class IndexType {
kUnknown = 0,
kVecIdxBegin = 100,
@ -30,6 +27,5 @@ enum class IndexType {
kVecIdxEnd,
};
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -1,14 +1,30 @@
//// Licensed to the Apache Software Foundation (ASF) under one
//// or more contributor license agreements. See the NOTICE file
//// distributed with this work for additional information
//// regarding copyright ownership. The ASF licenses this file
//// to you under the Apache License, Version 2.0 (the
//// "License"); you may not use this file except in compliance
//// with the License. You may obtain a copy of the License at
////
//// http://www.apache.org/licenses/LICENSE-2.0
////
//// Unless required by applicable law or agreed to in writing,
//// software distributed under the License is distributed on an
//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
//// KIND, either express or implied. See the License for the
//// specific language governing permissions and limitations
//// under the License.
//
//#include "knowhere/index/vector_index/definitions.h"
//#include "knowhere/common/config.h"
//#include "knowhere/index/preprocessor/normalize.h"
#include "knowhere/index/preprocessor/Normalize.h"
//
//
//namespace zilliz {
//namespace knowhere {
// namespace zilliz {
// namespace knowhere {
//
//DatasetPtr
//NormalizePreprocessor::Preprocess(const DatasetPtr &dataset) {
// DatasetPtr
// NormalizePreprocessor::Preprocess(const DatasetPtr &dataset) {
// // TODO: wrap dataset->tensor
// auto tensor = dataset->tensor()[0];
// auto p_data = (float *)tensor->raw_mutable_data();
@ -21,8 +37,8 @@
// }
//}
//
//void
//NormalizePreprocessor::Normalize(float *arr, int64_t dimension) {
// void
// NormalizePreprocessor::Normalize(float *arr, int64_t dimension) {
// double vector_length = 0;
// for (auto j = 0; j < dimension; j++) {
// double val = arr[j];
@ -39,4 +55,3 @@
//
//} // namespace knowhere
//} // namespace zilliz

View File

@ -1,13 +1,30 @@
//// Licensed to the Apache Software Foundation (ASF) under one
//// or more contributor license agreements. See the NOTICE file
//// distributed with this work for additional information
//// regarding copyright ownership. The ASF licenses this file
//// to you under the Apache License, Version 2.0 (the
//// "License"); you may not use this file except in compliance
//// with the License. You may obtain a copy of the License at
////
//// http://www.apache.org/licenses/LICENSE-2.0
////
//// Unless required by applicable law or agreed to in writing,
//// software distributed under the License is distributed on an
//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
//// KIND, either express or implied. See the License for the
//// specific language governing permissions and limitations
//// under the License.
//
//#pragma once
//
//#include <memory>
//#include "preprocessor.h"
//
//
//namespace zilliz {
//namespace knowhere {
// namespace zilliz {
// namespace knowhere {
//
//class NormalizePreprocessor : public Preprocessor {
// class NormalizePreprocessor : public Preprocessor {
// public:
// DatasetPtr
// Preprocess(const DatasetPtr &input) override;
@ -19,7 +36,7 @@
//};
//
//
//using NormalizePreprocessorPtr = std::shared_ptr<NormalizePreprocessor>;
// using NormalizePreprocessorPtr = std::shared_ptr<NormalizePreprocessor>;
//
//
//} // namespace knowhere

View File

@ -15,27 +15,22 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "knowhere/common/Dataset.h"
namespace zilliz {
namespace knowhere {
class Preprocessor {
public:
virtual DatasetPtr
Preprocess(const DatasetPtr &input) = 0;
Preprocess(const DatasetPtr& input) = 0;
};
using PreprocessorPtr = std::shared_ptr<Preprocessor>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,23 +15,24 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/index_io.h>
#include <faiss/IndexIVF.h>
#include <faiss/index_io.h>
#include <utility>
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/FaissBaseIndex.h"
#include "knowhere/index/vector_index/helpers/FaissIO.h"
#include "FaissBaseIndex.h"
namespace zilliz {
namespace knowhere {
FaissBaseIndex::FaissBaseIndex(std::shared_ptr<faiss::Index> index) : index_(std::move(index)) {}
FaissBaseIndex::FaissBaseIndex(std::shared_ptr<faiss::Index> index) : index_(std::move(index)) {
}
BinarySet FaissBaseIndex::SerializeImpl() {
BinarySet
FaissBaseIndex::SerializeImpl() {
try {
faiss::Index *index = index_.get();
faiss::Index* index = index_.get();
SealImpl();
@ -44,37 +45,38 @@ BinarySet FaissBaseIndex::SerializeImpl() {
// TODO(linxj): use virtual func Name() instead of raw string.
res_set.Append("IVF", data, writer.rp);
return res_set;
} catch (std::exception &e) {
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void FaissBaseIndex::LoadImpl(const BinarySet &index_binary) {
void
FaissBaseIndex::LoadImpl(const BinarySet& index_binary) {
auto binary = index_binary.GetByName("IVF");
MemoryIOReader reader;
reader.total = binary->size;
reader.data_ = binary->data.get();
faiss::Index *index = faiss::read_index(&reader);
faiss::Index* index = faiss::read_index(&reader);
index_.reset(index);
}
void FaissBaseIndex::SealImpl() {
// TODO(linxj): enable
//#ifdef ZILLIZ_FAISS
faiss::Index *index = index_.get();
auto idx = dynamic_cast<faiss::IndexIVF *>(index);
void
FaissBaseIndex::SealImpl() {
// TODO(linxj): enable
//#ifdef ZILLIZ_FAISS
faiss::Index* index = index_.get();
auto idx = dynamic_cast<faiss::IndexIVF*>(index);
if (idx != nullptr) {
idx->to_readonly();
}
//else {
// else {
// KNOHWERE_ERROR_MSG("Seal failed");
//}
//#endif
//#endif
}
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
@ -24,7 +23,6 @@
#include "knowhere/common/BinarySet.h"
namespace zilliz {
namespace knowhere {
@ -36,7 +34,7 @@ class FaissBaseIndex {
SerializeImpl();
virtual void
LoadImpl(const BinarySet &index_binary);
LoadImpl(const BinarySet& index_binary);
virtual void
SealImpl();
@ -45,8 +43,5 @@ class FaissBaseIndex {
std::shared_ptr<faiss::Index> index_ = nullptr;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,30 +15,28 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/IndexIVFPQ.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVF.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/index_io.h>
#include <memory>
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "IndexGPUIVF.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "knowhere/index/vector_index/helpers/FaissIO.h"
namespace zilliz {
namespace knowhere {
IndexModelPtr GPUIVF::Train(const DatasetPtr &dataset, const Config &config) {
IndexModelPtr
GPUIVF::Train(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<IVFCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
build_cfg->CheckValid(); // throw exception
}
gpu_id_ = build_cfg->gpu_id;
@ -49,10 +47,9 @@ IndexModelPtr GPUIVF::Train(const DatasetPtr &dataset, const Config &config) {
ResScope rs(temp_resource, gpu_id_, true);
faiss::gpu::GpuIndexIVFFlatConfig idx_config;
idx_config.device = gpu_id_;
faiss::gpu::GpuIndexIVFFlat device_index(temp_resource->faiss_res.get(), dim,
build_cfg->nlist, GetMetricType(build_cfg->metric_type),
idx_config);
device_index.train(rows, (float *) p_data);
faiss::gpu::GpuIndexIVFFlat device_index(temp_resource->faiss_res.get(), dim, build_cfg->nlist,
GetMetricType(build_cfg->metric_type), idx_config);
device_index.train(rows, (float*)p_data);
std::shared_ptr<faiss::Index> host_index = nullptr;
host_index.reset(faiss::gpu::index_gpu_to_cpu(&device_index));
@ -63,7 +60,8 @@ IndexModelPtr GPUIVF::Train(const DatasetPtr &dataset, const Config &config) {
}
}
void GPUIVF::set_index_model(IndexModelPtr model) {
void
GPUIVF::set_index_model(IndexModelPtr model) {
std::lock_guard<std::mutex> lk(mutex_);
auto host_index = std::static_pointer_cast<IVFIndexModel>(model);
@ -77,7 +75,8 @@ void GPUIVF::set_index_model(IndexModelPtr model) {
}
}
BinarySet GPUIVF::SerializeImpl() {
BinarySet
GPUIVF::SerializeImpl() {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
@ -85,8 +84,8 @@ BinarySet GPUIVF::SerializeImpl() {
try {
MemoryIOWriter writer;
{
faiss::Index *index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(index);
faiss::Index* index = index_.get();
faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(index);
SealImpl();
@ -100,19 +99,20 @@ BinarySet GPUIVF::SerializeImpl() {
res_set.Append("IVF", data, writer.rp);
return res_set;
} catch (std::exception &e) {
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void GPUIVF::LoadImpl(const BinarySet &index_binary) {
void
GPUIVF::LoadImpl(const BinarySet& index_binary) {
auto binary = index_binary.GetByName("IVF");
MemoryIOReader reader;
{
reader.total = binary->size;
reader.data_ = binary->data.get();
faiss::Index *index = faiss::read_index(&reader);
faiss::Index* index = faiss::read_index(&reader);
if (auto temp_res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
ResScope rs(temp_res, gpu_id_, false);
@ -127,23 +127,20 @@ void GPUIVF::LoadImpl(const BinarySet &index_binary) {
}
}
IVFIndexPtr GPUIVF::Copy_index_gpu_to_cpu() {
IVFIndexPtr
GPUIVF::Copy_index_gpu_to_cpu() {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
faiss::Index* device_index = index_.get();
faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IVF>(new_index);
}
void GPUIVF::search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) {
void
GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) {
std::lock_guard<std::mutex> lk(mutex_);
// TODO(linxj): gpu index support GenParams
@ -154,49 +151,54 @@ void GPUIVF::search_impl(int64_t n,
{
// TODO(linxj): allocate mem
ResScope rs(res_, gpu_id_);
device_index->search(n, (float *) data, k, distances, labels);
device_index->search(n, (float*)data, k, distances, labels);
}
}
}
VectorIndexPtr GPUIVF::CopyGpuToCpu(const Config &config) {
VectorIndexPtr
GPUIVF::CopyGpuToCpu(const Config& config) {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
faiss::Index* device_index = index_.get();
faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IVF>(new_index);
}
VectorIndexPtr GPUIVF::Clone() {
VectorIndexPtr
GPUIVF::Clone() {
auto cpu_idx = CopyGpuToCpu(Config());
return ::zilliz::knowhere::cloner::CopyCpuToGpu(cpu_idx, gpu_id_, Config());
}
VectorIndexPtr GPUIVF::CopyGpuToGpu(const int64_t &device_id, const Config &config) {
VectorIndexPtr
GPUIVF::CopyGpuToGpu(const int64_t& device_id, const Config& config) {
auto host_index = CopyGpuToCpu(config);
return std::static_pointer_cast<IVF>(host_index)->CopyCpuToGpu(device_id, config);
}
void GPUIVF::Add(const DatasetPtr &dataset, const Config &config) {
void
GPUIVF::Add(const DatasetPtr& dataset, const Config& config) {
if (auto spt = res_.lock()) {
ResScope rs(res_, gpu_id_);
IVF::Add(dataset, config);
}
else {
} else {
KNOWHERE_THROW_MSG("Add IVF can't get gpu resource");
}
}
void GPUIndex::SetGpuDevice(const int &gpu_id) {
void
GPUIndex::SetGpuDevice(const int& gpu_id) {
gpu_id_ = gpu_id;
}
const int64_t &GPUIndex::GetGpuDevice() {
const int64_t&
GPUIndex::GetGpuDevice() {
return gpu_id_;
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,84 +15,84 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <utility>
#include "IndexIVF.h"
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
namespace zilliz {
namespace knowhere {
class GPUIndex {
public:
explicit GPUIndex(const int &device_id) : gpu_id_(device_id) {}
public:
explicit GPUIndex(const int& device_id) : gpu_id_(device_id) {
}
GPUIndex(const int& device_id, const ResPtr& resource): gpu_id_(device_id), res_(resource) {}
GPUIndex(const int& device_id, const ResPtr& resource) : gpu_id_(device_id), res_(resource) {
}
virtual VectorIndexPtr
CopyGpuToCpu(const Config &config) = 0;
virtual VectorIndexPtr
CopyGpuToCpu(const Config& config) = 0;
virtual VectorIndexPtr
CopyGpuToGpu(const int64_t &device_id, const Config &config) = 0;
virtual VectorIndexPtr
CopyGpuToGpu(const int64_t& device_id, const Config& config) = 0;
void
SetGpuDevice(const int &gpu_id);
void
SetGpuDevice(const int& gpu_id);
const int64_t &
GetGpuDevice();
const int64_t&
GetGpuDevice();
protected:
int64_t gpu_id_;
ResWPtr res_;
protected:
int64_t gpu_id_;
ResWPtr res_;
};
class GPUIVF : public IVF, public GPUIndex {
public:
explicit GPUIVF(const int &device_id) : IVF(), GPUIndex(device_id) {}
public:
explicit GPUIVF(const int& device_id) : IVF(), GPUIndex(device_id) {
}
explicit GPUIVF(std::shared_ptr<faiss::Index> index, const int64_t &device_id, ResPtr &resource)
: IVF(std::move(index)), GPUIndex(device_id, resource) {};
explicit GPUIVF(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& resource)
: IVF(std::move(index)), GPUIndex(device_id, resource) {
}
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
IndexModelPtr
Train(const DatasetPtr& dataset, const Config& config) override;
void
Add(const DatasetPtr &dataset, const Config &config) override;
void
Add(const DatasetPtr& dataset, const Config& config) override;
void
set_index_model(IndexModelPtr model) override;
void
set_index_model(IndexModelPtr model) override;
//DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr
CopyGpuToCpu(const Config &config) override;
// DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr
CopyGpuToCpu(const Config& config) override;
VectorIndexPtr
CopyGpuToGpu(const int64_t &device_id, const Config &config) override;
VectorIndexPtr
CopyGpuToGpu(const int64_t& device_id, const Config& config) override;
VectorIndexPtr
Clone() final;
VectorIndexPtr
Clone() final;
// TODO(linxj): Deprecated
virtual IVFIndexPtr Copy_index_gpu_to_cpu();
// TODO(linxj): Deprecated
virtual IVFIndexPtr
Copy_index_gpu_to_cpu();
protected:
void
search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) override;
protected:
void
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override;
BinarySet
SerializeImpl() override;
BinarySet
SerializeImpl() override;
void
LoadImpl(const BinarySet &index_binary) override;
void
LoadImpl(const BinarySet& index_binary) override;
};
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,23 +15,23 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <memory>
#include "IndexGPUIVFPQ.h"
#include "knowhere/common/Exception.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexGPUIVFPQ.h"
namespace zilliz {
namespace knowhere {
IndexModelPtr GPUIVFPQ::Train(const DatasetPtr &dataset, const Config &config) {
IndexModelPtr
GPUIVFPQ::Train(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<IVFPQCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
build_cfg->CheckValid(); // throw exception
}
gpu_id_ = build_cfg->gpu_id;
@ -40,9 +40,9 @@ IndexModelPtr GPUIVFPQ::Train(const DatasetPtr &dataset, const Config &config) {
// TODO(linxj): set device here.
// TODO(linxj): set gpu resource here.
faiss::gpu::StandardGpuResources res;
faiss::gpu::GpuIndexIVFPQ device_index(&res, dim, build_cfg->nlist, build_cfg->m,
build_cfg->nbits, GetMetricType(build_cfg->metric_type)); // IP not support
device_index.train(rows, (float *) p_data);
faiss::gpu::GpuIndexIVFPQ device_index(&res, dim, build_cfg->nlist, build_cfg->m, build_cfg->nbits,
GetMetricType(build_cfg->metric_type)); // IP not support
device_index.train(rows, (float*)p_data);
std::shared_ptr<faiss::Index> host_index = nullptr;
host_index.reset(faiss::gpu::index_gpu_to_cpu(&device_index));
@ -50,20 +50,22 @@ IndexModelPtr GPUIVFPQ::Train(const DatasetPtr &dataset, const Config &config) {
return std::make_shared<IVFIndexModel>(host_index);
}
std::shared_ptr<faiss::IVFSearchParameters> GPUIVFPQ::GenParams(const Config &config) {
std::shared_ptr<faiss::IVFSearchParameters>
GPUIVFPQ::GenParams(const Config& config) {
auto params = std::make_shared<faiss::IVFPQSearchParameters>();
auto search_cfg = std::dynamic_pointer_cast<IVFPQCfg>(config);
params->nprobe = search_cfg->nprobe;
// params->scan_table_threshold = conf->scan_table_threhold;
// params->polysemous_ht = conf->polysemous_ht;
// params->max_codes = conf->max_codes;
// params->scan_table_threshold = conf->scan_table_threhold;
// params->polysemous_ht = conf->polysemous_ht;
// params->max_codes = conf->max_codes;
return params;
}
VectorIndexPtr GPUIVFPQ::CopyGpuToCpu(const Config &config) {
VectorIndexPtr
GPUIVFPQ::CopyGpuToCpu(const Config& config) {
KNOWHERE_THROW_MSG("not support yet");
}
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,33 +15,32 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "IndexGPUIVF.h"
namespace zilliz {
namespace knowhere {
class GPUIVFPQ : public GPUIVF {
public:
explicit GPUIVFPQ(const int &device_id) : GPUIVF(device_id) {}
public:
explicit GPUIVFPQ(const int& device_id) : GPUIVF(device_id) {
}
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
Train(const DatasetPtr& dataset, const Config& config) override;
public:
public:
VectorIndexPtr
CopyGpuToCpu(const Config &config) override;
CopyGpuToCpu(const Config& config) override;
protected:
protected:
// TODO(linxj): remove GenParams.
std::shared_ptr<faiss::IVFSearchParameters>
GenParams(const Config &config) override;
GenParams(const Config& config) override;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,60 +15,62 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/gpu/GpuAutoTune.h>
#include <memory>
#include <utility>
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "IndexGPUIVFSQ.h"
#include "IndexIVFSQ.h"
#include "knowhere/index/vector_index/IndexGPUIVFSQ.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
namespace zilliz {
namespace knowhere {
IndexModelPtr GPUIVFSQ::Train(const DatasetPtr &dataset, const Config &config) {
auto build_cfg = std::dynamic_pointer_cast<IVFSQCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
}
gpu_id_ = build_cfg->gpu_id;
GETTENSOR(dataset)
std::stringstream index_type;
index_type << "IVF" << build_cfg->nlist << "," << "SQ" << build_cfg->nbits;
auto build_index = faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(build_cfg->metric_type));
auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_);
if (temp_resource != nullptr) {
ResScope rs(temp_resource, gpu_id_, true);
auto device_index = faiss::gpu::index_cpu_to_gpu(temp_resource->faiss_res.get(), gpu_id_, build_index);
device_index->train(rows, (float *) p_data);
std::shared_ptr<faiss::Index> host_index = nullptr;
host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index));
delete device_index;
delete build_index;
return std::make_shared<IVFIndexModel>(host_index);
} else {
KNOWHERE_THROW_MSG("Build IVFSQ can't get gpu resource");
}
IndexModelPtr
GPUIVFSQ::Train(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<IVFSQCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
}
gpu_id_ = build_cfg->gpu_id;
VectorIndexPtr GPUIVFSQ::CopyGpuToCpu(const Config &config) {
std::lock_guard<std::mutex> lk(mutex_);
GETTENSOR(dataset)
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::stringstream index_type;
index_type << "IVF" << build_cfg->nlist << ","
<< "SQ" << build_cfg->nbits;
auto build_index = faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(build_cfg->metric_type));
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IVFSQ>(new_index);
auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_);
if (temp_resource != nullptr) {
ResScope rs(temp_resource, gpu_id_, true);
auto device_index = faiss::gpu::index_cpu_to_gpu(temp_resource->faiss_res.get(), gpu_id_, build_index);
device_index->train(rows, (float*)p_data);
std::shared_ptr<faiss::Index> host_index = nullptr;
host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index));
delete device_index;
delete build_index;
return std::make_shared<IVFIndexModel>(host_index);
} else {
KNOWHERE_THROW_MSG("Build IVFSQ can't get gpu resource");
}
}
} // knowhere
} // zilliz
VectorIndexPtr
GPUIVFSQ::CopyGpuToCpu(const Config& config) {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index* device_index = index_.get();
faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IVFSQ>(new_index);
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,29 +15,31 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "IndexGPUIVF.h"
#include <memory>
#include <utility>
#include "IndexGPUIVF.h"
namespace zilliz {
namespace knowhere {
class GPUIVFSQ : public GPUIVF {
public:
explicit GPUIVFSQ(const int &device_id) : GPUIVF(device_id) {}
public:
explicit GPUIVFSQ(const int& device_id) : GPUIVF(device_id) {
}
explicit GPUIVFSQ(std::shared_ptr<faiss::Index> index, const int64_t &device_id, ResPtr &resource)
: GPUIVF(std::move(index), device_id, resource) {};
explicit GPUIVFSQ(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& resource)
: GPUIVF(std::move(index), device_id, resource) {
}
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
Train(const DatasetPtr& dataset, const Config& config) override;
VectorIndexPtr
CopyGpuToCpu(const Config &config) override;
CopyGpuToCpu(const Config& config) override;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,24 +15,23 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/IndexFlat.h>
#include <faiss/AutoTune.h>
#include <faiss/IndexFlat.h>
#include <faiss/MetaIndexes.h>
#include <faiss/index_io.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/index_io.h>
#include <vector>
#include "knowhere/common/Exception.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIDMAP.h"
#include "knowhere/index/vector_index/helpers/FaissIO.h"
#include "IndexIDMAP.h"
namespace zilliz {
namespace knowhere {
BinarySet IDMAP::Serialize() {
BinarySet
IDMAP::Serialize() {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
@ -41,31 +40,33 @@ BinarySet IDMAP::Serialize() {
return SerializeImpl();
}
void IDMAP::Load(const BinarySet &index_binary) {
void
IDMAP::Load(const BinarySet& index_binary) {
std::lock_guard<std::mutex> lk(mutex_);
LoadImpl(index_binary);
}
DatasetPtr IDMAP::Search(const DatasetPtr &dataset, const Config &config) {
DatasetPtr
IDMAP::Search(const DatasetPtr& dataset, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
config->CheckValid();
//auto metric_type = config["metric_type"].as_string() == "L2" ?
// auto metric_type = config["metric_type"].as_string() == "L2" ?
// faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
//index_->metric_type = metric_type;
// index_->metric_type = metric_type;
GETTENSOR(dataset)
auto elems = rows * config->k;
auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems);
auto res_dis = (float *) malloc(sizeof(float) * elems);
auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems);
auto res_dis = (float*)malloc(sizeof(float) * elems);
search_impl(rows, (float *) p_data, config->k, res_dis, res_ids, Config());
search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config());
auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems);
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
@ -83,12 +84,13 @@ DatasetPtr IDMAP::Search(const DatasetPtr &dataset, const Config &config) {
return std::make_shared<Dataset>(array, nullptr);
}
void IDMAP::search_impl(int64_t n, const float *data, int64_t k, float *distances, int64_t *labels, const Config &cfg) {
index_->search(n, (float *) data, k, distances, labels);
void
IDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) {
index_->search(n, (float*)data, k, distances, labels);
}
void IDMAP::Add(const DatasetPtr &dataset, const Config &config) {
void
IDMAP::Add(const DatasetPtr& dataset, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
@ -98,49 +100,56 @@ void IDMAP::Add(const DatasetPtr &dataset, const Config &config) {
// TODO: magic here.
auto array = dataset->array()[0];
auto p_ids = array->data()->GetValues<long>(1, 0);
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
index_->add_with_ids(rows, (float *) p_data, p_ids);
index_->add_with_ids(rows, (float*)p_data, p_ids);
}
int64_t IDMAP::Count() {
int64_t
IDMAP::Count() {
return index_->ntotal;
}
int64_t IDMAP::Dimension() {
int64_t
IDMAP::Dimension() {
return index_->d;
}
// TODO(linxj): return const pointer
float *IDMAP::GetRawVectors() {
float*
IDMAP::GetRawVectors() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap *>(index_.get());
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());
auto flat_index = dynamic_cast<faiss::IndexFlat*>(file_index->index);
return flat_index->xb.data();
} catch (std::exception &e) {
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
// TODO(linxj): return const pointer
int64_t *IDMAP::GetRawIds() {
int64_t*
IDMAP::GetRawIds() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap *>(index_.get());
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());
return file_index->id_map.data();
} catch (std::exception &e) {
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
const char* type = "IDMap,Flat";
void IDMAP::Train(const Config &config) {
void
IDMAP::Train(const Config& config) {
config->CheckValid();
auto index = faiss::index_factory(config->d, type, GetMetricType(config->metric_type));
index_.reset(index);
}
VectorIndexPtr IDMAP::Clone() {
VectorIndexPtr
IDMAP::Clone() {
std::lock_guard<std::mutex> lk(mutex_);
auto clone_index = faiss::clone_index(index_.get());
@ -149,8 +158,9 @@ VectorIndexPtr IDMAP::Clone() {
return std::make_shared<IDMAP>(new_index);
}
VectorIndexPtr IDMAP::CopyCpuToGpu(const int64_t &device_id, const Config &config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)){
VectorIndexPtr
IDMAP::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
ResScope rs(res, device_id, false);
auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get());
@ -162,38 +172,41 @@ VectorIndexPtr IDMAP::CopyCpuToGpu(const int64_t &device_id, const Config &confi
}
}
void IDMAP::Seal() {
void
IDMAP::Seal() {
// do nothing
}
VectorIndexPtr GPUIDMAP::CopyGpuToCpu(const Config &config) {
VectorIndexPtr
GPUIDMAP::CopyGpuToCpu(const Config& config) {
std::lock_guard<std::mutex> lk(mutex_);
faiss::Index *device_index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index);
faiss::Index* device_index = index_.get();
faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index);
std::shared_ptr<faiss::Index> new_index;
new_index.reset(host_index);
return std::make_shared<IDMAP>(new_index);
}
VectorIndexPtr GPUIDMAP::Clone() {
VectorIndexPtr
GPUIDMAP::Clone() {
auto cpu_idx = CopyGpuToCpu(Config());
if (auto idmap = std::dynamic_pointer_cast<IDMAP>(cpu_idx)){
if (auto idmap = std::dynamic_pointer_cast<IDMAP>(cpu_idx)) {
return idmap->CopyCpuToGpu(gpu_id_, Config());
}
else {
} else {
KNOWHERE_THROW_MSG("IndexType not Support GpuClone");
}
}
BinarySet GPUIDMAP::SerializeImpl() {
BinarySet
GPUIDMAP::SerializeImpl() {
try {
MemoryIOWriter writer;
{
faiss::Index *index = index_.get();
faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(index);
faiss::Index* index = index_.get();
faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(index);
faiss::write_index(host_index, &writer);
delete host_index;
@ -205,21 +218,22 @@ BinarySet GPUIDMAP::SerializeImpl() {
res_set.Append("IVF", data, writer.rp);
return res_set;
} catch (std::exception &e) {
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void GPUIDMAP::LoadImpl(const BinarySet &index_binary) {
void
GPUIDMAP::LoadImpl(const BinarySet& index_binary) {
auto binary = index_binary.GetByName("IVF");
MemoryIOReader reader;
{
reader.total = binary->size;
reader.data_ = binary->data.get();
faiss::Index *index = faiss::read_index(&reader);
faiss::Index* index = faiss::read_index(&reader);
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_) ){
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
ResScope rs(res, gpu_id_, false);
auto device_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index);
index_.reset(device_index);
@ -232,28 +246,27 @@ void GPUIDMAP::LoadImpl(const BinarySet &index_binary) {
}
}
VectorIndexPtr GPUIDMAP::CopyGpuToGpu(const int64_t &device_id, const Config &config) {
VectorIndexPtr
GPUIDMAP::CopyGpuToGpu(const int64_t& device_id, const Config& config) {
auto cpu_index = CopyGpuToCpu(config);
return std::static_pointer_cast<IDMAP>(cpu_index)->CopyCpuToGpu(device_id, config);
}
float *GPUIDMAP::GetRawVectors() {
float*
GPUIDMAP::GetRawVectors() {
KNOWHERE_THROW_MSG("Not support");
}
int64_t *GPUIDMAP::GetRawIds() {
int64_t*
GPUIDMAP::GetRawIds() {
KNOWHERE_THROW_MSG("Not support");
}
void GPUIDMAP::search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) {
void
GPUIDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) {
ResScope rs(res_, gpu_id_);
index_->search(n, (float *) data, k, distances, labels);
index_->search(n, (float*)data, k, distances, labels);
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,41 +15,54 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "IndexIVF.h"
#include "IndexGPUIVF.h"
#include "IndexIVF.h"
#include <memory>
#include <utility>
namespace zilliz {
namespace knowhere {
class IDMAP : public VectorIndex, public FaissBaseIndex {
public:
IDMAP() : FaissBaseIndex(nullptr) {};
explicit IDMAP(std::shared_ptr<faiss::Index> index) : FaissBaseIndex(std::move(index)) {};
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
void Train(const Config &config);
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
int64_t Count() override;
VectorIndexPtr Clone() override;
int64_t Dimension() override;
void Add(const DatasetPtr &dataset, const Config &config) override;
VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config);
void Seal() override;
IDMAP() : FaissBaseIndex(nullptr) {
}
virtual float *GetRawVectors();
virtual int64_t *GetRawIds();
explicit IDMAP(std::shared_ptr<faiss::Index> index) : FaissBaseIndex(std::move(index)) {
}
BinarySet
Serialize() override;
void
Load(const BinarySet& index_binary) override;
void
Train(const Config& config);
DatasetPtr
Search(const DatasetPtr& dataset, const Config& config) override;
int64_t
Count() override;
VectorIndexPtr
Clone() override;
int64_t
Dimension() override;
void
Add(const DatasetPtr& dataset, const Config& config) override;
VectorIndexPtr
CopyCpuToGpu(const int64_t& device_id, const Config& config);
void
Seal() override;
virtual float*
GetRawVectors();
virtual int64_t*
GetRawIds();
protected:
virtual void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg);
virtual void
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg);
std::mutex mutex_;
};
@ -57,27 +70,31 @@ using IDMAPPtr = std::shared_ptr<IDMAP>;
class GPUIDMAP : public IDMAP, public GPUIndex {
public:
explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t &device_id, ResPtr& res)
: IDMAP(std::move(index)), GPUIndex(device_id, res) {}
explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& res)
: IDMAP(std::move(index)), GPUIndex(device_id, res) {
}
VectorIndexPtr CopyGpuToCpu(const Config &config) override;
float *GetRawVectors() override;
int64_t *GetRawIds() override;
VectorIndexPtr Clone() override;
VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) override;
VectorIndexPtr
CopyGpuToCpu(const Config& config) override;
float*
GetRawVectors() override;
int64_t*
GetRawIds() override;
VectorIndexPtr
Clone() override;
VectorIndexPtr
CopyGpuToGpu(const int64_t& device_id, const Config& config) override;
protected:
void search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) override;
BinarySet SerializeImpl() override;
void LoadImpl(const BinarySet &index_binary) override;
void
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override;
BinarySet
SerializeImpl() override;
void
LoadImpl(const BinarySet& index_binary) override;
};
using GPUIDMAPPtr = std::shared_ptr<GPUIDMAP>;
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,47 +15,47 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/AutoTune.h>
#include <faiss/AuxIndexStructures.h>
#include <faiss/IVFlib.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/AutoTune.h>
#include <faiss/IVFlib.h>
#include <faiss/AuxIndexStructures.h>
#include <faiss/index_io.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/index_io.h>
#include <memory>
#include <utility>
#include <vector>
#include "knowhere/common/Exception.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "IndexIVF.h"
#include "IndexGPUIVF.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "knowhere/index/vector_index/IndexIVF.h"
namespace zilliz {
namespace knowhere {
IndexModelPtr IVF::Train(const DatasetPtr &dataset, const Config &config) {
IndexModelPtr
IVF::Train(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<IVFCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
build_cfg->CheckValid(); // throw exception
}
GETTENSOR(dataset)
faiss::Index *coarse_quantizer = new faiss::IndexFlatL2(dim);
auto index = std::make_shared<faiss::IndexIVFFlat>(coarse_quantizer, dim,
build_cfg->nlist,
faiss::Index* coarse_quantizer = new faiss::IndexFlatL2(dim);
auto index = std::make_shared<faiss::IndexIVFFlat>(coarse_quantizer, dim, build_cfg->nlist,
GetMetricType(build_cfg->metric_type));
index->train(rows, (float *) p_data);
index->train(rows, (float*)p_data);
// TODO(linxj): override here. train return model or not.
return std::make_shared<IVFIndexModel>(index);
}
void IVF::Add(const DatasetPtr &dataset, const Config &config) {
void
IVF::Add(const DatasetPtr& dataset, const Config& config) {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
@ -64,11 +64,12 @@ void IVF::Add(const DatasetPtr &dataset, const Config &config) {
GETTENSOR(dataset)
auto array = dataset->array()[0];
auto p_ids = array->data()->GetValues<long>(1, 0);
index_->add_with_ids(rows, (float *) p_data, p_ids);
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
index_->add_with_ids(rows, (float*)p_data, p_ids);
}
void IVF::AddWithoutIds(const DatasetPtr &dataset, const Config &config) {
void
IVF::AddWithoutIds(const DatasetPtr& dataset, const Config& config) {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
@ -76,10 +77,11 @@ void IVF::AddWithoutIds(const DatasetPtr &dataset, const Config &config) {
std::lock_guard<std::mutex> lk(mutex_);
GETTENSOR(dataset)
index_->add(rows, (float *) p_data);
index_->add(rows, (float*)p_data);
}
BinarySet IVF::Serialize() {
BinarySet
IVF::Serialize() {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
@ -89,31 +91,33 @@ BinarySet IVF::Serialize() {
return SerializeImpl();
}
void IVF::Load(const BinarySet &index_binary) {
void
IVF::Load(const BinarySet& index_binary) {
std::lock_guard<std::mutex> lk(mutex_);
LoadImpl(index_binary);
}
DatasetPtr IVF::Search(const DatasetPtr &dataset, const Config &config) {
DatasetPtr
IVF::Search(const DatasetPtr& dataset, const Config& config) {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
auto search_cfg = std::dynamic_pointer_cast<IVFCfg>(config);
if (search_cfg != nullptr) {
search_cfg->CheckValid(); // throw exception
search_cfg->CheckValid(); // throw exception
}
GETTENSOR(dataset)
auto elems = rows * search_cfg->k;
auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems);
auto res_dis = (float *) malloc(sizeof(float) * elems);
auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems);
auto res_dis = (float*)malloc(sizeof(float) * elems);
search_impl(rows, (float*) p_data, search_cfg->k, res_dis, res_ids, config);
search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config);
auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems);
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
@ -131,7 +135,8 @@ DatasetPtr IVF::Search(const DatasetPtr &dataset, const Config &config) {
return std::make_shared<Dataset>(array, nullptr);
}
void IVF::set_index_model(IndexModelPtr model) {
void
IVF::set_index_model(IndexModelPtr model) {
std::lock_guard<std::mutex> lk(mutex_);
auto rel_model = std::static_pointer_cast<IVFIndexModel>(model);
@ -140,25 +145,29 @@ void IVF::set_index_model(IndexModelPtr model) {
index_.reset(faiss::clone_index(rel_model->index_.get()));
}
std::shared_ptr<faiss::IVFSearchParameters> IVF::GenParams(const Config &config) {
std::shared_ptr<faiss::IVFSearchParameters>
IVF::GenParams(const Config& config) {
auto params = std::make_shared<faiss::IVFPQSearchParameters>();
auto search_cfg = std::dynamic_pointer_cast<IVFCfg>(config);
params->nprobe = search_cfg->nprobe;
//params->max_codes = config.get_with_default("max_codes", size_t(0));
// params->max_codes = config.get_with_default("max_codes", size_t(0));
return params;
}
int64_t IVF::Count() {
int64_t
IVF::Count() {
return index_->ntotal;
}
int64_t IVF::Dimension() {
int64_t
IVF::Dimension() {
return index_->d;
}
void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, const Config &config) {
void
IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config) {
GETTENSOR(dataset)
auto ntotal = Count();
@ -174,7 +183,7 @@ void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, co
for (int i = 0; i < total_search_count; ++i) {
auto b_size = i == total_search_count - 1 && tail_batch_size != 0 ? tail_batch_size : batch_size;
auto &res = res_vec[i];
auto& res = res_vec[i];
res.resize(k * b_size);
auto xq = p_data + batch_size * dim * i;
@ -182,7 +191,7 @@ void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, co
int tmp = 0;
for (int j = 0; j < b_size; ++j) {
auto &node = graph[batch_size * i + j];
auto& node = graph[batch_size * i + j];
node.resize(k);
for (int m = 0; m < k && tmp < k * b_size; ++m, ++tmp) {
// TODO(linxj): avoid memcopy here.
@ -192,18 +201,15 @@ void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, co
}
}
void IVF::search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg) {
void
IVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) {
auto params = GenParams(cfg);
faiss::ivflib::search_with_parameters(index_.get(), n, (float *) data, k, distances, labels, params.get());
faiss::ivflib::search_with_parameters(index_.get(), n, (float*)data, k, distances, labels, params.get());
}
VectorIndexPtr IVF::CopyCpuToGpu(const int64_t& device_id, const Config &config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)){
VectorIndexPtr
IVF::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
ResScope rs(res, device_id, false);
auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get());
@ -215,7 +221,8 @@ VectorIndexPtr IVF::CopyCpuToGpu(const int64_t& device_id, const Config &config)
}
}
VectorIndexPtr IVF::Clone() {
VectorIndexPtr
IVF::Clone() {
std::lock_guard<std::mutex> lk(mutex_);
auto clone_index = faiss::clone_index(index_.get());
@ -224,21 +231,24 @@ VectorIndexPtr IVF::Clone() {
return Clone_impl(new_index);
}
VectorIndexPtr IVF::Clone_impl(const std::shared_ptr<faiss::Index> &index) {
VectorIndexPtr
IVF::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
return std::make_shared<IVF>(index);
}
void IVF::Seal() {
void
IVF::Seal() {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
SealImpl();
}
IVFIndexModel::IVFIndexModel(std::shared_ptr<faiss::Index> index) : FaissBaseIndex(std::move(index)) {
}
IVFIndexModel::IVFIndexModel(std::shared_ptr<faiss::Index> index) : FaissBaseIndex(std::move(index)) {}
BinarySet IVFIndexModel::Serialize() {
BinarySet
IVFIndexModel::Serialize() {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("indexmodel not initialize or trained");
}
@ -246,18 +256,16 @@ BinarySet IVFIndexModel::Serialize() {
return SerializeImpl();
}
void IVFIndexModel::Load(const BinarySet &binary_set) {
void
IVFIndexModel::Load(const BinarySet& binary_set) {
std::lock_guard<std::mutex> lk(mutex_);
LoadImpl(binary_set);
}
void IVFIndexModel::SealImpl() {
void
IVFIndexModel::SealImpl() {
// do nothing
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,17 +15,17 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <mutex>
#include <utility>
#include <vector>
#include "VectorIndex.h"
#include "FaissBaseIndex.h"
#include "VectorIndex.h"
#include "faiss/IndexIVF.h"
namespace zilliz {
namespace knowhere {
@ -33,36 +33,38 @@ using Graph = std::vector<std::vector<int64_t>>;
class IVF : public VectorIndex, protected FaissBaseIndex {
public:
IVF() : FaissBaseIndex(nullptr) {};
IVF() : FaissBaseIndex(nullptr) {
}
explicit IVF(std::shared_ptr<faiss::Index> index) : FaissBaseIndex(std::move(index)) {}
explicit IVF(std::shared_ptr<faiss::Index> index) : FaissBaseIndex(std::move(index)) {
}
VectorIndexPtr
Clone() override;;
Clone() override;
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
Train(const DatasetPtr& dataset, const Config& config) override;
void
set_index_model(IndexModelPtr model) override;
void
Add(const DatasetPtr &dataset, const Config &config) override;
Add(const DatasetPtr& dataset, const Config& config) override;
void
AddWithoutIds(const DatasetPtr &dataset, const Config &config);
AddWithoutIds(const DatasetPtr& dataset, const Config& config);
DatasetPtr
Search(const DatasetPtr &dataset, const Config &config) override;
Search(const DatasetPtr& dataset, const Config& config) override;
void
GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, const Config &config);
GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config);
BinarySet
Serialize() override;
void
Load(const BinarySet &index_binary) override;
Load(const BinarySet& index_binary) override;
int64_t
Count() override;
@ -74,23 +76,17 @@ class IVF : public VectorIndex, protected FaissBaseIndex {
Seal() override;
virtual VectorIndexPtr
CopyCpuToGpu(const int64_t &device_id, const Config &config);
CopyCpuToGpu(const int64_t& device_id, const Config& config);
protected:
virtual std::shared_ptr<faiss::IVFSearchParameters>
GenParams(const Config &config);
GenParams(const Config& config);
virtual VectorIndexPtr
Clone_impl(const std::shared_ptr<faiss::Index> &index);
Clone_impl(const std::shared_ptr<faiss::Index>& index);
virtual void
search_impl(int64_t n,
const float *data,
int64_t k,
float *distances,
int64_t *labels,
const Config &cfg);
search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg);
protected:
std::mutex mutex_;
@ -106,13 +102,14 @@ class IVFIndexModel : public IndexModel, public FaissBaseIndex {
public:
explicit IVFIndexModel(std::shared_ptr<faiss::Index> index);
IVFIndexModel() : FaissBaseIndex(nullptr) {};
IVFIndexModel() : FaissBaseIndex(nullptr) {
}
BinarySet
Serialize() override;
void
Load(const BinarySet &binary) override;
Load(const BinarySet& binary) override;
protected:
void
@ -121,7 +118,8 @@ class IVFIndexModel : public IndexModel, public FaissBaseIndex {
protected:
std::mutex mutex_;
};
using IVFIndexModelPtr = std::shared_ptr<IVFIndexModel>;
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,47 +15,51 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <memory>
#include <utility>
#include "IndexIVFPQ.h"
#include "knowhere/common/Exception.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
namespace zilliz {
namespace knowhere {
IndexModelPtr IVFPQ::Train(const DatasetPtr &dataset, const Config &config) {
IndexModelPtr
IVFPQ::Train(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<IVFPQCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
build_cfg->CheckValid(); // throw exception
}
GETTENSOR(dataset)
faiss::Index *coarse_quantizer = new faiss::IndexFlat(dim, GetMetricType(build_cfg->metric_type));
auto index = std::make_shared<faiss::IndexIVFPQ>(coarse_quantizer, dim,
build_cfg->nlist, build_cfg->m, build_cfg->nbits);
index->train(rows, (float *) p_data);
faiss::Index* coarse_quantizer = new faiss::IndexFlat(dim, GetMetricType(build_cfg->metric_type));
auto index =
std::make_shared<faiss::IndexIVFPQ>(coarse_quantizer, dim, build_cfg->nlist, build_cfg->m, build_cfg->nbits);
index->train(rows, (float*)p_data);
return std::make_shared<IVFIndexModel>(index);
}
std::shared_ptr<faiss::IVFSearchParameters> IVFPQ::GenParams(const Config &config) {
std::shared_ptr<faiss::IVFSearchParameters>
IVFPQ::GenParams(const Config& config) {
auto params = std::make_shared<faiss::IVFPQSearchParameters>();
auto search_cfg = std::dynamic_pointer_cast<IVFPQCfg>(config);
params->nprobe = search_cfg->nprobe;
// params->scan_table_threshold = conf->scan_table_threhold;
// params->polysemous_ht = conf->polysemous_ht;
// params->max_codes = conf->max_codes;
// params->scan_table_threshold = conf->scan_table_threhold;
// params->polysemous_ht = conf->polysemous_ht;
// params->max_codes = conf->max_codes;
return params;
}
VectorIndexPtr IVFPQ::Clone_impl(const std::shared_ptr<faiss::Index> &index) {
VectorIndexPtr
IVFPQ::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
return std::make_shared<IVFPQ>(index);
}
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,33 +15,33 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <utility>
#include "IndexIVF.h"
namespace zilliz {
namespace knowhere {
class IVFPQ : public IVF {
public:
explicit IVFPQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {}
public:
explicit IVFPQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {
}
IVFPQ() = default;
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
Train(const DatasetPtr& dataset, const Config& config) override;
protected:
protected:
std::shared_ptr<faiss::IVFSearchParameters>
GenParams(const Config &config) override;
GenParams(const Config& config) override;
VectorIndexPtr
Clone_impl(const std::shared_ptr<faiss::Index> &index) override;
Clone_impl(const std::shared_ptr<faiss::Index>& index) override;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,44 +15,46 @@
// specific language governing permissions and limitations
// under the License.
#include <faiss/gpu/GpuAutoTune.h>
#include <memory>
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "IndexIVFSQ.h"
#include "IndexGPUIVFSQ.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexGPUIVFSQ.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
namespace zilliz {
namespace knowhere {
IndexModelPtr IVFSQ::Train(const DatasetPtr &dataset, const Config &config) {
IndexModelPtr
IVFSQ::Train(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<IVFSQCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
build_cfg->CheckValid(); // throw exception
}
GETTENSOR(dataset)
std::stringstream index_type;
index_type << "IVF" << build_cfg->nlist << "," << "SQ" << build_cfg->nbits;
auto build_index = faiss::index_factory(dim, index_type.str().c_str(),
GetMetricType(build_cfg->metric_type));
build_index->train(rows, (float *) p_data);
index_type << "IVF" << build_cfg->nlist << ","
<< "SQ" << build_cfg->nbits;
auto build_index = faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(build_cfg->metric_type));
build_index->train(rows, (float*)p_data);
std::shared_ptr<faiss::Index> ret_index;
ret_index.reset(build_index);
return std::make_shared<IVFIndexModel>(ret_index);
}
VectorIndexPtr IVFSQ::Clone_impl(const std::shared_ptr<faiss::Index> &index) {
VectorIndexPtr
IVFSQ::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
return std::make_shared<IVFSQ>(index);
}
VectorIndexPtr IVFSQ::CopyCpuToGpu(const int64_t &device_id, const Config &config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)){
VectorIndexPtr
IVFSQ::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
ResScope rs(res, device_id, false);
faiss::gpu::GpuClonerOptions option;
option.allInGpu = true;
@ -67,5 +69,5 @@ VectorIndexPtr IVFSQ::CopyCpuToGpu(const int64_t &device_id, const Config &confi
}
}
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,31 +15,33 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <utility>
#include "IndexIVF.h"
namespace zilliz {
namespace knowhere {
class IVFSQ : public IVF {
public:
explicit IVFSQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {}
public:
explicit IVFSQ(std::shared_ptr<faiss::Index> index) : IVF(std::move(index)) {
}
IVFSQ() = default;
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
Train(const DatasetPtr& dataset, const Config& config) override;
VectorIndexPtr
CopyCpuToGpu(const int64_t &device_id, const Config &config) override;
CopyCpuToGpu(const int64_t& device_id, const Config& config) override;
protected:
protected:
VectorIndexPtr
Clone_impl(const std::shared_ptr<faiss::Index> &index) override;
Clone_impl(const std::shared_ptr<faiss::Index>& index) override;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,41 +15,39 @@
// specific language governing permissions and limitations
// under the License.
#include <sstream>
#include <SPTAG/AnnService/inc/Server/QueryParser.h>
#include <SPTAG/AnnService/inc/Core/VectorSet.h>
#include <SPTAG/AnnService/inc/Core/Common.h>
#include <SPTAG/AnnService/inc/Core/VectorSet.h>
#include <SPTAG/AnnService/inc/Server/QueryParser.h>
#include <sstream>
#include <vector>
#undef mkdir
#include "IndexKDT.h"
#include "knowhere/index/vector_index/IndexKDT.h"
#include "knowhere/index/vector_index/helpers/Definitions.h"
//#include "knowhere/index/preprocessor/normalize.h"
#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h"
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h"
namespace zilliz {
namespace knowhere {
BinarySet
CPUKDTRNG::Serialize() {
std::vector<void *> index_blobs;
std::vector<void*> index_blobs;
std::vector<int64_t> index_len;
index_ptr_->SaveIndexToMemory(index_blobs, index_len);
BinarySet binary_set;
auto sample = std::make_shared<uint8_t>();
sample.reset(static_cast<uint8_t *>(index_blobs[0]));
sample.reset(static_cast<uint8_t*>(index_blobs[0]));
auto tree = std::make_shared<uint8_t>();
tree.reset(static_cast<uint8_t *>(index_blobs[1]));
tree.reset(static_cast<uint8_t*>(index_blobs[1]));
auto graph = std::make_shared<uint8_t>();
graph.reset(static_cast<uint8_t *>(index_blobs[2]));
graph.reset(static_cast<uint8_t*>(index_blobs[2]));
auto metadata = std::make_shared<uint8_t>();
metadata.reset(static_cast<uint8_t *>(index_blobs[3]));
metadata.reset(static_cast<uint8_t*>(index_blobs[3]));
binary_set.Append("samples", sample, index_len[0]);
binary_set.Append("tree", tree, index_len[1]);
@ -59,8 +57,8 @@ CPUKDTRNG::Serialize() {
}
void
CPUKDTRNG::Load(const BinarySet &binary_set) {
std::vector<void *> index_blobs;
CPUKDTRNG::Load(const BinarySet& binary_set) {
std::vector<void*> index_blobs;
auto samples = binary_set.GetByName("samples");
index_blobs.push_back(samples->data.get());
@ -77,17 +75,17 @@ CPUKDTRNG::Load(const BinarySet &binary_set) {
index_ptr_->LoadIndexFromMemory(index_blobs);
}
//PreprocessorPtr
//CPUKDTRNG::BuildPreprocessor(const DatasetPtr &dataset, const Config &config) {
// PreprocessorPtr
// CPUKDTRNG::BuildPreprocessor(const DatasetPtr &dataset, const Config &config) {
// return std::make_shared<NormalizePreprocessor>();
//}
IndexModelPtr
CPUKDTRNG::Train(const DatasetPtr &origin, const Config &train_config) {
CPUKDTRNG::Train(const DatasetPtr& origin, const Config& train_config) {
SetParameters(train_config);
DatasetPtr dataset = origin->Clone();
//if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
// if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
// && preprocessor_) {
// preprocessor_->Preprocess(dataset);
//}
@ -101,11 +99,11 @@ CPUKDTRNG::Train(const DatasetPtr &origin, const Config &train_config) {
}
void
CPUKDTRNG::Add(const DatasetPtr &origin, const Config &add_config) {
CPUKDTRNG::Add(const DatasetPtr& origin, const Config& add_config) {
SetParameters(add_config);
DatasetPtr dataset = origin->Clone();
//if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
// if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
// && preprocessor_) {
// preprocessor_->Preprocess(dataset);
//}
@ -116,18 +114,18 @@ CPUKDTRNG::Add(const DatasetPtr &origin, const Config &add_config) {
}
void
CPUKDTRNG::SetParameters(const Config &config) {
for (auto &para : KDTParameterMgr::GetInstance().GetKDTParameters()) {
// auto value = config.get_with_default(para.first, para.second);
CPUKDTRNG::SetParameters(const Config& config) {
for (auto& para : KDTParameterMgr::GetInstance().GetKDTParameters()) {
// auto value = config.get_with_default(para.first, para.second);
index_ptr_->SetParameter(para.first, para.second);
}
}
DatasetPtr
CPUKDTRNG::Search(const DatasetPtr &dataset, const Config &config) {
CPUKDTRNG::Search(const DatasetPtr& dataset, const Config& config) {
SetParameters(config);
auto tensor = dataset->tensor()[0];
auto p = (float *) tensor->raw_mutable_data();
auto p = (float*)tensor->raw_mutable_data();
for (auto i = 0; i < 10; ++i) {
for (auto j = 0; j < 10; ++j) {
std::cout << p[i * 10 + j] << " ";
@ -138,7 +136,7 @@ CPUKDTRNG::Search(const DatasetPtr &dataset, const Config &config) {
#pragma omp parallel for
for (auto i = 0; i < query_results.size(); ++i) {
auto target = (float *) query_results[i].GetTarget();
auto target = (float*)query_results[i].GetTarget();
std::cout << target[0] << ", " << target[1] << ", " << target[2] << std::endl;
index_ptr_->SearchIndex(query_results[i]);
}
@ -146,27 +144,34 @@ CPUKDTRNG::Search(const DatasetPtr &dataset, const Config &config) {
return ConvertToDataset(query_results);
}
int64_t CPUKDTRNG::Count() {
int64_t
CPUKDTRNG::Count() {
index_ptr_->GetNumSamples();
}
int64_t CPUKDTRNG::Dimension() {
int64_t
CPUKDTRNG::Dimension() {
index_ptr_->GetFeatureDim();
}
VectorIndexPtr CPUKDTRNG::Clone() {
VectorIndexPtr
CPUKDTRNG::Clone() {
KNOWHERE_THROW_MSG("not support");
}
void CPUKDTRNG::Seal() {
void
CPUKDTRNG::Seal() {
// do nothing
}
// TODO(linxj):
BinarySet
CPUKDTRNGIndexModel::Serialize() {}
CPUKDTRNGIndexModel::Serialize() {
}
void
CPUKDTRNGIndexModel::Load(const BinarySet &binary) {}
CPUKDTRNGIndexModel::Load(const BinarySet& binary) {
}
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,53 +15,54 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
#include <cstdint>
#include <memory>
#include "VectorIndex.h"
#include "knowhere/index/IndexModel.h"
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
namespace zilliz {
namespace knowhere {
class CPUKDTRNG : public VectorIndex {
public:
CPUKDTRNG() {
index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT,
SPTAG::VectorValueType::Float);
index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT, SPTAG::VectorValueType::Float);
index_ptr_->SetParameter("DistCalcMethod", "L2");
}
public:
BinarySet
Serialize() override;
VectorIndexPtr Clone() override;
VectorIndexPtr
Clone() override;
void
Load(const BinarySet &index_array) override;
Load(const BinarySet& index_array) override;
public:
//PreprocessorPtr
//BuildPreprocessor(const DatasetPtr &dataset, const Config &config) override;
int64_t Count() override;
int64_t Dimension() override;
// PreprocessorPtr
// BuildPreprocessor(const DatasetPtr &dataset, const Config &config) override;
int64_t
Count() override;
int64_t
Dimension() override;
IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) override;
Train(const DatasetPtr& dataset, const Config& config) override;
void
Add(const DatasetPtr &dataset, const Config &config) override;
Add(const DatasetPtr& dataset, const Config& config) override;
DatasetPtr
Search(const DatasetPtr &dataset, const Config &config) override;
void Seal() override;
Search(const DatasetPtr& dataset, const Config& config) override;
void
Seal() override;
private:
void
SetParameters(const Config &config);
SetParameters(const Config& config);
private:
PreprocessorPtr preprocessor_;
@ -76,7 +77,7 @@ class CPUKDTRNGIndexModel : public IndexModel {
Serialize() override;
void
Load(const BinarySet &binary) override;
Load(const BinarySet& binary) override;
private:
std::shared_ptr<SPTAG::VectorIndex> index_;
@ -84,5 +85,5 @@ class CPUKDTRNGIndexModel : public IndexModel {
using CPUKDTRNGIndexModelPtr = std::shared_ptr<CPUKDTRNGIndexModel>;
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,28 +15,27 @@
// specific language governing permissions and limitations
// under the License.
#include "IndexNSG.h"
#include "knowhere/index/vector_index/nsg/NSG.h"
#include "knowhere/index/vector_index/nsg/NSGIO.h"
#include "IndexIDMAP.h"
#include "IndexIVF.h"
#include "IndexGPUIVF.h"
#include "knowhere/index/vector_index/IndexNSG.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/common/Timer.h"
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "knowhere/index/vector_index/IndexIDMAP.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/nsg/NSG.h"
#include "knowhere/index/vector_index/nsg/NSGIO.h"
namespace zilliz {
namespace knowhere {
BinarySet NSG::Serialize() {
BinarySet
NSG::Serialize() {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
try {
algo::NsgIndex *index = index_.get();
algo::NsgIndex* index = index_.get();
MemoryIOWriter writer;
algo::write_index(index, writer);
@ -46,12 +45,13 @@ BinarySet NSG::Serialize() {
BinarySet res_set;
res_set.Append("NSG", data, writer.total);
return res_set;
} catch (std::exception &e) {
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void NSG::Load(const BinarySet &index_binary) {
void
NSG::Load(const BinarySet& index_binary) {
try {
auto binary = index_binary.GetByName("NSG");
@ -61,15 +61,16 @@ void NSG::Load(const BinarySet &index_binary) {
auto index = algo::read_index(reader);
index_.reset(index);
} catch (std::exception &e) {
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
DatasetPtr NSG::Search(const DatasetPtr &dataset, const Config &config) {
DatasetPtr
NSG::Search(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<NSGCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
build_cfg->CheckValid(); // throw exception
}
if (!index_ || !index_->is_trained) {
@ -79,16 +80,15 @@ DatasetPtr NSG::Search(const DatasetPtr &dataset, const Config &config) {
GETTENSOR(dataset)
auto elems = rows * build_cfg->k;
auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems);
auto res_dis = (float *) malloc(sizeof(float) * elems);
auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems);
auto res_dis = (float*)malloc(sizeof(float) * elems);
algo::SearchParams s_params;
s_params.search_length = build_cfg->search_length;
index_->Search((float *) p_data, rows, dim,
build_cfg->k, res_dis, res_ids, s_params);
index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params);
auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems);
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
@ -106,10 +106,11 @@ DatasetPtr NSG::Search(const DatasetPtr &dataset, const Config &config) {
return std::make_shared<Dataset>(array, nullptr);
}
IndexModelPtr NSG::Train(const DatasetPtr &dataset, const Config &config) {
IndexModelPtr
NSG::Train(const DatasetPtr& dataset, const Config& config) {
auto build_cfg = std::dynamic_pointer_cast<NSGCfg>(config);
if (build_cfg != nullptr) {
build_cfg->CheckValid(); // throw exception
build_cfg->CheckValid(); // throw exception
}
if (build_cfg->metric_type != METRICTYPE::L2) {
@ -132,34 +133,38 @@ IndexModelPtr NSG::Train(const DatasetPtr &dataset, const Config &config) {
GETTENSOR(dataset)
auto array = dataset->array()[0];
auto p_ids = array->data()->GetValues<long>(1, 0);
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
index_ = std::make_shared<algo::NsgIndex>(dim, rows);
index_->SetKnnGraph(knng);
index_->Build_with_ids(rows, (float *) p_data, (long *) p_ids, b_params);
return nullptr; // TODO(linxj): support serialize
index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params);
return nullptr; // TODO(linxj): support serialize
}
void NSG::Add(const DatasetPtr &dataset, const Config &config) {
void
NSG::Add(const DatasetPtr& dataset, const Config& config) {
// do nothing
}
int64_t NSG::Count() {
int64_t
NSG::Count() {
return index_->ntotal;
}
int64_t NSG::Dimension() {
int64_t
NSG::Dimension() {
return index_->dimension;
}
VectorIndexPtr NSG::Clone() {
VectorIndexPtr
NSG::Clone() {
KNOWHERE_THROW_MSG("not support");
}
void NSG::Seal() {
void
NSG::Seal() {
// do nothing
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,11 +15,12 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "VectorIndex.h"
#include <memory>
#include <vector>
#include "VectorIndex.h"
namespace zilliz {
namespace knowhere {
@ -30,18 +31,30 @@ class NsgIndex;
class NSG : public VectorIndex {
public:
explicit NSG(const int64_t& gpu_num):gpu_(gpu_num){}
explicit NSG(const int64_t& gpu_num) : gpu_(gpu_num) {
}
NSG() = default;
IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override;
DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override;
void Add(const DatasetPtr &dataset, const Config &config) override;
BinarySet Serialize() override;
void Load(const BinarySet &index_binary) override;
int64_t Count() override;
int64_t Dimension() override;
VectorIndexPtr Clone() override;
void Seal() override;
IndexModelPtr
Train(const DatasetPtr& dataset, const Config& config) override;
DatasetPtr
Search(const DatasetPtr& dataset, const Config& config) override;
void
Add(const DatasetPtr& dataset, const Config& config) override;
BinarySet
Serialize() override;
void
Load(const BinarySet& index_binary) override;
int64_t
Count() override;
int64_t
Dimension() override;
VectorIndexPtr
Clone() override;
void
Seal() override;
private:
std::shared_ptr<algo::NsgIndex> index_;
int64_t gpu_;
@ -49,5 +62,5 @@ class NSG : public VectorIndex {
using NSGIndexPtr = std::shared_ptr<NSG>();
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,18 +15,15 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "knowhere/common/Config.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/common/Dataset.h"
#include "knowhere/index/Index.h"
#include "knowhere/index/preprocessor/Preprocessor.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
namespace zilliz {
namespace knowhere {
@ -34,17 +31,20 @@ namespace knowhere {
class VectorIndex;
using VectorIndexPtr = std::shared_ptr<VectorIndex>;
class VectorIndex : public Index {
public:
virtual PreprocessorPtr
BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { return nullptr; }
BuildPreprocessor(const DatasetPtr& dataset, const Config& config) {
return nullptr;
}
virtual IndexModelPtr
Train(const DatasetPtr &dataset, const Config &config) { return nullptr; }
Train(const DatasetPtr& dataset, const Config& config) {
return nullptr;
}
virtual void
Add(const DatasetPtr &dataset, const Config &config) = 0;
Add(const DatasetPtr& dataset, const Config& config) = 0;
virtual void
Seal() = 0;
@ -59,7 +59,5 @@ class VectorIndex : public Index {
Dimension() = 0;
};
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,21 +15,20 @@
// specific language governing permissions and limitations
// under the License.
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "knowhere/index/vector_index/IndexIDMAP.h"
#include "Cloner.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
namespace zilliz {
namespace knowhere {
namespace cloner {
VectorIndexPtr CopyGpuToCpu(const VectorIndexPtr &index, const Config &config) {
VectorIndexPtr
CopyGpuToCpu(const VectorIndexPtr& index, const Config& config) {
if (auto device_index = std::dynamic_pointer_cast<GPUIndex>(index)) {
return device_index->CopyGpuToCpu(config);
} else {
@ -37,7 +36,8 @@ VectorIndexPtr CopyGpuToCpu(const VectorIndexPtr &index, const Config &config) {
}
}
VectorIndexPtr CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_id, const Config &config) {
VectorIndexPtr
CopyCpuToGpu(const VectorIndexPtr& index, const int64_t& device_id, const Config& config) {
if (auto device_index = std::dynamic_pointer_cast<GPUIndex>(index)) {
return device_index->CopyGpuToGpu(device_id, config);
}
@ -55,6 +55,6 @@ VectorIndexPtr CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_i
}
}
} // cloner
}
}
} // namespace cloner
} // namespace knowhere
} // namespace zilliz

View File

@ -15,23 +15,21 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "knowhere/index/vector_index/VectorIndex.h"
namespace zilliz {
namespace knowhere {
namespace cloner {
// TODO(linxj): rename CopyToGpu
extern VectorIndexPtr
CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_id, const Config &config);
CopyCpuToGpu(const VectorIndexPtr& index, const int64_t& device_id, const Config& config);
extern VectorIndexPtr
CopyGpuToCpu(const VectorIndexPtr &index, const Config &config);
CopyGpuToCpu(const VectorIndexPtr& index, const Config& config);
} // cloner
} // knowhere
} // zilliz
} // namespace cloner
} // namespace knowhere
} // namespace zilliz

View File

@ -15,10 +15,8 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
namespace zilliz {
namespace knowhere {
namespace definition {
@ -27,6 +25,6 @@ namespace definition {
#define META_DIM ("dimension")
#define META_K ("k")
} // definition
} // knowhere
} // zilliz
} // namespace definition
} // namespace knowhere
} // namespace zilliz

View File

@ -15,25 +15,24 @@
// specific language governing permissions and limitations
// under the License.
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
#include "FaissGpuResourceMgr.h"
#include <utility>
namespace zilliz {
namespace knowhere {
FaissGpuResourceMgr &FaissGpuResourceMgr::GetInstance() {
FaissGpuResourceMgr&
FaissGpuResourceMgr::GetInstance() {
static FaissGpuResourceMgr instance;
return instance;
}
void FaissGpuResourceMgr::AllocateTempMem(ResPtr &resource,
const int64_t &device_id,
const int64_t &size) {
void
FaissGpuResourceMgr::AllocateTempMem(ResPtr& resource, const int64_t& device_id, const int64_t& size) {
if (size) {
resource->faiss_res->setTempMemory(size);
}
else {
} else {
auto search = devices_params_.find(device_id);
if (search != devices_params_.end()) {
resource->faiss_res->setTempMemory(search->second.temp_mem_size);
@ -42,10 +41,8 @@ void FaissGpuResourceMgr::AllocateTempMem(ResPtr &resource,
}
}
void FaissGpuResourceMgr::InitDevice(int64_t device_id,
int64_t pin_mem_size,
int64_t temp_mem_size,
int64_t res_num) {
void
FaissGpuResourceMgr::InitDevice(int64_t device_id, int64_t pin_mem_size, int64_t temp_mem_size, int64_t res_num) {
DeviceParams params;
params.pinned_mem_size = pin_mem_size;
params.temp_mem_size = temp_mem_size;
@ -54,23 +51,25 @@ void FaissGpuResourceMgr::InitDevice(int64_t device_id,
devices_params_.emplace(device_id, params);
}
void FaissGpuResourceMgr::InitResource() {
if(is_init) return ;
void
FaissGpuResourceMgr::InitResource() {
if (is_init)
return;
is_init = true;
//std::cout << "InitResource" << std::endl;
for(auto& device : devices_params_) {
// std::cout << "InitResource" << std::endl;
for (auto& device : devices_params_) {
auto& device_id = device.first;
mutex_cache_.emplace(device_id, std::make_unique<std::mutex>());
//std::cout << "Device Id: " << device_id << std::endl;
// std::cout << "Device Id: " << device_id << std::endl;
auto& device_param = device.second;
auto& bq = idle_map_[device_id];
for (int64_t i = 0; i < device_param.resource_num; ++i) {
//std::cout << "Resource Id: " << i << std::endl;
// std::cout << "Resource Id: " << i << std::endl;
auto raw_resource = std::make_shared<faiss::gpu::StandardGpuResources>();
// TODO(linxj): enable set pinned memory
@ -80,11 +79,11 @@ void FaissGpuResourceMgr::InitResource() {
bq.Put(res_wrapper);
}
}
//std::cout << "End initResource" << std::endl;
// std::cout << "End initResource" << std::endl;
}
ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id,
const int64_t &alloc_size) {
ResPtr
FaissGpuResourceMgr::GetRes(const int64_t& device_id, const int64_t& alloc_size) {
InitResource();
auto finder = idle_map_.find(device_id);
@ -97,7 +96,8 @@ ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id,
return nullptr;
}
void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res) {
void
FaissGpuResourceMgr::MoveToIdle(const int64_t& device_id, const ResPtr& res) {
auto finder = idle_map_.find(device_id);
if (finder != idle_map_.end()) {
auto& bq = finder->second;
@ -105,8 +105,9 @@ void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res
}
}
void FaissGpuResourceMgr::Free() {
for (auto &item : idle_map_) {
void
FaissGpuResourceMgr::Free() {
for (auto& item : idle_map_) {
auto& bq = item.second;
while (!bq.Empty()) {
bq.Take();
@ -117,12 +118,11 @@ void FaissGpuResourceMgr::Free() {
void
FaissGpuResourceMgr::Dump() {
for (auto &item : idle_map_) {
for (auto& item : idle_map_) {
auto& bq = item.second;
std::cout << "device_id: " << item.first
<< ", resource count:" << bq.Size();
std::cout << "device_id: " << item.first << ", resource count:" << bq.Size();
}
}
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,12 +15,12 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <map>
#include <memory>
#include <mutex>
#include <map>
#include <utility>
#include <faiss/gpu/StandardGpuResources.h>
@ -30,7 +30,7 @@ namespace zilliz {
namespace knowhere {
struct Resource {
explicit Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r) : faiss_res(r) {
explicit Resource(std::shared_ptr<faiss::gpu::StandardGpuResources>& r) : faiss_res(r) {
static int64_t global_id = 0;
id = global_id++;
}
@ -43,19 +43,19 @@ using ResPtr = std::shared_ptr<Resource>;
using ResWPtr = std::weak_ptr<Resource>;
class FaissGpuResourceMgr {
public:
public:
friend class ResScope;
using ResBQ = zilliz::milvus::server::BlockingQueue<ResPtr>;
public:
public:
struct DeviceParams {
int64_t temp_mem_size = 0;
int64_t pinned_mem_size = 0;
int64_t resource_num = 2;
};
public:
static FaissGpuResourceMgr &
public:
static FaissGpuResourceMgr&
GetInstance();
// Free gpu resource, avoid cudaGetDevice error when deallocate.
@ -64,67 +64,67 @@ public:
Free();
void
AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size);
AllocateTempMem(ResPtr& resource, const int64_t& device_id, const int64_t& size);
void
InitDevice(int64_t device_id,
int64_t pin_mem_size = 0,
int64_t temp_mem_size = 0,
int64_t res_num = 2);
InitDevice(int64_t device_id, int64_t pin_mem_size = 0, int64_t temp_mem_size = 0, int64_t res_num = 2);
void
InitResource();
// allocate gpu memory invoke by build or copy_to_gpu
ResPtr
GetRes(const int64_t &device_id, const int64_t& alloc_size = 0);
GetRes(const int64_t& device_id, const int64_t& alloc_size = 0);
void
MoveToIdle(const int64_t &device_id, const ResPtr& res);
MoveToIdle(const int64_t& device_id, const ResPtr& res);
void
Dump();
protected:
protected:
bool is_init = false;
std::map<int64_t ,std::unique_ptr<std::mutex>> mutex_cache_;
std::map<int64_t, std::unique_ptr<std::mutex>> mutex_cache_;
std::map<int64_t, DeviceParams> devices_params_;
std::map<int64_t, ResBQ> idle_map_;
};
class ResScope {
public:
ResScope(ResPtr &res, const int64_t& device_id, const bool& isown)
: resource(res), device_id(device_id), move(true), own(isown) {
public:
ResScope(ResPtr& res, const int64_t& device_id, const bool& isown)
: resource(res), device_id(device_id), move(true), own(isown) {
Lock();
}
// specif for search
// get the ownership of gpuresource and gpu
ResScope(ResWPtr &res, const int64_t &device_id)
:device_id(device_id),move(false),own(true) {
ResScope(ResWPtr& res, const int64_t& device_id) : device_id(device_id), move(false), own(true) {
resource = res.lock();
Lock();
}
void Lock() {
if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock();
void
Lock() {
if (own)
FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock();
resource->mutex.lock();
}
~ResScope() {
if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->unlock();
if (move) FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource);
if (own)
FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->unlock();
if (move)
FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource);
resource->mutex.unlock();
}
private:
ResPtr resource; // hold resource until deconstruct
private:
ResPtr resource; // hold resource until deconstruct
int64_t device_id;
bool move = true;
bool own = false;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,51 +15,55 @@
// specific language governing permissions and limitations
// under the License.
#include <cstring>
#include "FaissIO.h"
#include "knowhere/index/vector_index/helpers/FaissIO.h"
namespace zilliz {
namespace knowhere {
// TODO(linxj): Get From Config File
static size_t magic_num = 2;
size_t MemoryIOWriter::operator()(const void *ptr, size_t size, size_t nitems) {
size_t
MemoryIOWriter::operator()(const void* ptr, size_t size, size_t nitems) {
auto total_need = size * nitems + rp;
if (!data_) { // data == nullptr
if (!data_) { // data == nullptr
total = total_need * magic_num;
rp = size * nitems;
data_ = new uint8_t[total];
memcpy((void *) (data_), ptr, rp);
memcpy((void*)(data_), ptr, rp);
}
if (total_need > total) {
total = total_need * magic_num;
auto new_data = new uint8_t[total];
memcpy((void *) new_data, (void *) data_, rp);
memcpy((void*)new_data, (void*)data_, rp);
delete data_;
data_ = new_data;
memcpy((void *) (data_ + rp), ptr, size * nitems);
memcpy((void*)(data_ + rp), ptr, size * nitems);
rp = total_need;
} else {
memcpy((void *) (data_ + rp), ptr, size * nitems);
memcpy((void*)(data_ + rp), ptr, size * nitems);
rp = total_need;
}
return nitems;
}
size_t MemoryIOReader::operator()(void *ptr, size_t size, size_t nitems) {
if (rp >= total) return 0;
size_t
MemoryIOReader::operator()(void* ptr, size_t size, size_t nitems) {
if (rp >= total)
return 0;
size_t nremain = (total - rp) / size;
if (nremain < nitems) nitems = nremain;
memcpy(ptr, (void *) (data_ + rp), size * nitems);
if (nremain < nitems)
nitems = nremain;
memcpy(ptr, (void*)(data_ + rp), size * nitems);
rp += size * nitems;
return nitems;
}
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <faiss/AuxIndexStructures.h>
@ -24,25 +23,22 @@ namespace zilliz {
namespace knowhere {
struct MemoryIOWriter : public faiss::IOWriter {
uint8_t *data_ = nullptr;
uint8_t* data_ = nullptr;
size_t total = 0;
size_t rp = 0;
size_t
operator()(const void *ptr, size_t size, size_t nitems) override;
operator()(const void* ptr, size_t size, size_t nitems) override;
};
struct MemoryIOReader : public faiss::IOReader {
uint8_t *data_;
uint8_t* data_;
size_t rp = 0;
size_t total = 0;
size_t
operator()(void *ptr, size_t size, size_t nitems) override;
operator()(void* ptr, size_t size, size_t nitems) override;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.
#include "IndexParameter.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/common/Exception.h"
#include <faiss/Index.h>
@ -24,7 +23,8 @@
namespace zilliz {
namespace knowhere {
faiss::MetricType GetMetricType(METRICTYPE &type) {
faiss::MetricType
GetMetricType(METRICTYPE& type) {
if (type == METRICTYPE::L2) {
return faiss::METRIC_L2;
}
@ -35,6 +35,5 @@ faiss::MetricType GetMetricType(METRICTYPE &type) {
KNOWHERE_THROW_MSG("Metric type is invalid");
}
}
}
} // namespace knowhere
} // namespace zilliz

View File

@ -15,17 +15,18 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "knowhere/common/Config.h"
#include <faiss/Index.h>
#include <memory>
#include "knowhere/common/Config.h"
namespace zilliz {
namespace knowhere {
extern faiss::MetricType GetMetricType(METRICTYPE &type);
extern faiss::MetricType
GetMetricType(METRICTYPE& type);
// IVF Config
constexpr int64_t DEFAULT_NLIST = INVALID_VALUE;
@ -46,11 +47,7 @@ struct IVFCfg : public Cfg {
int64_t nlist = DEFAULT_NLIST;
int64_t nprobe = DEFAULT_NPROBE;
IVFCfg(const int64_t &dim,
const int64_t &k,
const int64_t &gpu_id,
const int64_t &nlist,
const int64_t &nprobe,
IVFCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe,
METRICTYPE type)
: Cfg(dim, k, gpu_id, type), nlist(nlist), nprobe(nprobe) {
}
@ -68,13 +65,8 @@ struct IVFSQCfg : public IVFCfg {
// TODO(linxj): cpu only support SQ4 SQ6 SQ8 SQ16, gpu only support SQ4, SQ8, SQ16
int64_t nbits = DEFAULT_NBITS;
IVFSQCfg(const int64_t &dim,
const int64_t &k,
const int64_t &gpu_id,
const int64_t &nlist,
const int64_t &nprobe,
const int64_t &nbits,
METRICTYPE type)
IVFSQCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe,
const int64_t& nbits, METRICTYPE type)
: IVFCfg(dim, k, gpu_id, nlist, nprobe, type), nbits(nbits) {
}
@ -88,22 +80,16 @@ struct IVFSQCfg : public IVFCfg {
using IVFSQConfig = std::shared_ptr<IVFSQCfg>;
struct IVFPQCfg : public IVFCfg {
int64_t m = DEFAULT_NSUBVECTORS; // number of subquantizers(subvector)
int64_t nbits = DEFAULT_NBITS; // number of bit per subvector index
int64_t m = DEFAULT_NSUBVECTORS; // number of subquantizers(subvector)
int64_t nbits = DEFAULT_NBITS; // number of bit per subvector index
// TODO(linxj): not use yet
int64_t scan_table_threhold = DEFAULT_SCAN_TABLE_THREHOLD;
int64_t polysemous_ht = DEFAULT_POLYSEMOUS_HT;
int64_t max_codes = DEFAULT_MAX_CODES;
IVFPQCfg(const int64_t &dim,
const int64_t &k,
const int64_t &gpu_id,
const int64_t &nlist,
const int64_t &nprobe,
const int64_t &nbits,
const int64_t &m,
METRICTYPE type)
IVFPQCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe,
const int64_t& nbits, const int64_t& m, METRICTYPE type)
: IVFCfg(dim, k, gpu_id, nlist, nprobe, type), m(m), nbits(nbits) {
}
@ -122,19 +108,14 @@ struct NSGCfg : public IVFCfg {
int64_t out_degree = DEFAULT_OUT_DEGREE;
int64_t candidate_pool_size = DEFAULT_CANDIDATE_SISE;
NSGCfg(const int64_t &dim,
const int64_t &k,
const int64_t &gpu_id,
const int64_t &nlist,
const int64_t &nprobe,
const int64_t &knng,
const int64_t &search_length,
const int64_t &out_degree,
const int64_t &candidate_size,
NSGCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe,
const int64_t& knng, const int64_t& search_length, const int64_t& out_degree, const int64_t& candidate_size,
METRICTYPE type)
: IVFCfg(dim, k, gpu_id, nlist, nprobe, type),
knng(knng), search_length(search_length),
out_degree(out_degree), candidate_pool_size(candidate_size) {
knng(knng),
search_length(search_length),
out_degree(out_degree),
candidate_pool_size(candidate_size) {
}
NSGCfg() = default;
@ -150,6 +131,5 @@ struct KDTCfg : public Cfg {
int64_t tptnubmber = -1;
};
} // knowhere
} // zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,16 +15,14 @@
// specific language governing permissions and limitations
// under the License.
#include <mutex>
#include "KDTParameterMgr.h"
#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h"
namespace zilliz {
namespace knowhere {
const std::vector<KDTParameter> &
const std::vector<KDTParameter>&
KDTParameterMgr::GetKDTParameters() {
return kdt_parameters_;
}
@ -55,5 +53,5 @@ KDTParameterMgr::KDTParameterMgr() {
};
}
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,13 +15,13 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace zilliz {
namespace knowhere {
@ -29,18 +29,20 @@ using KDTParameter = std::pair<std::string, std::string>;
class KDTParameterMgr {
public:
const std::vector<KDTParameter> &
const std::vector<KDTParameter>&
GetKDTParameters();
public:
static KDTParameterMgr &
static KDTParameterMgr&
GetInstance() {
static KDTParameterMgr instance;
return instance;
}
KDTParameterMgr(const KDTParameterMgr &) = delete;
KDTParameterMgr &operator=(const KDTParameterMgr &) = delete;
KDTParameterMgr(const KDTParameterMgr&) = delete;
KDTParameterMgr&
operator=(const KDTParameterMgr&) = delete;
private:
KDTParameterMgr();
@ -48,5 +50,5 @@ class KDTParameterMgr {
std::vector<KDTParameter> kdt_parameters_;
};
} // namespace knowhere
} // namespace zilliz
} // namespace knowhere
} // namespace zilliz

View File

@ -15,29 +15,28 @@
// specific language governing permissions and limitations
// under the License.
#include <cstring>
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <cstring>
#include <fstream>
#include <iostream>
#include <stack>
#include <omp.h>
#include <utility>
#include "NSG.h"
#include "knowhere/common/Exception.h"
#include "knowhere/common/Log.h"
#include "knowhere/common/Timer.h"
#include "NSGHelper.h"
#include "knowhere/index/vector_index/nsg/NSG.h"
#include "knowhere/index/vector_index/nsg/NSGHelper.h"
// TODO: enable macro
//#include <gperftools/profiler.h>
namespace zilliz {
namespace knowhere {
namespace algo {
NsgIndex::NsgIndex(const size_t &dimension, const size_t &n, MetricType metric)
NsgIndex::NsgIndex(const size_t& dimension, const size_t& n, MetricType metric)
: dimension(dimension), ntotal(n), metric_type(metric) {
}
@ -46,16 +45,17 @@ NsgIndex::~NsgIndex() {
delete[] ids_;
}
//void NsgIndex::Build(size_t nb, const float *data, const BuildParam &parameters) {
// void NsgIndex::Build(size_t nb, const float *data, const BuildParam &parameters) {
//}
void NsgIndex::Build_with_ids(size_t nb, const float *data, const long *ids, const BuildParams &parameters) {
void
NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
TimeRecorder rc("NSG");
ntotal = nb;
ori_data_ = new float[ntotal * dimension];
ids_ = new long[ntotal];
memcpy((void *) ori_data_, (void *) data, sizeof(float) * ntotal * dimension);
memcpy((void *) ids_, (void *) ids, sizeof(long) * ntotal);
ids_ = new int64_t[ntotal];
memcpy((void*)ori_data_, (void*)data, sizeof(float) * ntotal * dimension);
memcpy((void*)ids_, (void*)ids, sizeof(int64_t) * ntotal);
search_length = parameters.search_length;
out_degree = parameters.out_degree;
@ -69,8 +69,8 @@ void NsgIndex::Build_with_ids(size_t nb, const float *data, const long *ids, con
//>> Debug code
/////
//int count = 0;
//for (int i = 0; i < ntotal; ++i) {
// int count = 0;
// for (int i = 0; i < ntotal; ++i) {
// count += nsg[i].size();
//}
/////
@ -92,7 +92,8 @@ void NsgIndex::Build_with_ids(size_t nb, const float *data, const long *ids, con
is_trained = true;
}
void NsgIndex::InitNavigationPoint() {
void
NsgIndex::InitNavigationPoint() {
// calculate the center of vectors
auto center = new float[dimension];
memset(center, 0, sizeof(float) * dimension);
@ -108,11 +109,12 @@ void NsgIndex::InitNavigationPoint() {
// select navigation point
std::vector<Neighbor> resset, fullset;
navigation_point = rand() % ntotal; // random initialize navigating point
unsigned int seed = 100;
navigation_point = rand_r(&seed) % ntotal; // random initialize navigating point
//>> Debug code
/////
//navigation_point = drand48();
// navigation_point = drand48();
/////
GetNeighbors(center, resset, knng);
@ -120,22 +122,21 @@ void NsgIndex::InitNavigationPoint() {
//>> Debug code
/////
//std::cout << "ep: " << navigation_point << std::endl;
// std::cout << "ep: " << navigation_point << std::endl;
/////
//>> Debug code
/////
//float r1 = calculate(center, ori_data_ + navigation_point * dimension, dimension);
//assert(r1 == resset[0].distance);
// float r1 = calculate(center, ori_data_ + navigation_point * dimension, dimension);
// assert(r1 == resset[0].distance);
/////
}
// Specify Link
void NsgIndex::GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
std::vector<Neighbor> &fullset,
boost::dynamic_bitset<> &has_calculated_dist) {
auto &graph = knng;
void
NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::vector<Neighbor>& fullset,
boost::dynamic_bitset<>& has_calculated_dist) {
auto& graph = knng;
size_t buffer_size = search_length;
if (buffer_size > ntotal) {
@ -156,9 +157,12 @@ void NsgIndex::GetNeighbors(const float *query,
has_calculated_dist[init_ids[i]] = true;
++count;
}
unsigned int seed = 100;
while (count < buffer_size) {
node_t id = rand() % ntotal;
if (has_calculated_dist[id]) continue; // duplicate id
node_t id = rand_r(&seed) % ntotal;
if (has_calculated_dist[id])
continue; // duplicate id
init_ids.push_back(id);
++count;
has_calculated_dist[id] = true;
@ -184,9 +188,9 @@ void NsgIndex::GetNeighbors(const float *query,
fullset.push_back(resset[i]);
///////////////////////////////////////
}
std::sort(resset.begin(), resset.end()); // sort by distance
std::sort(resset.begin(), resset.end()); // sort by distance
//search nearest neighbor
// search nearest neighbor
size_t cursor = 0;
while (cursor < buffer_size) {
size_t nearest_updated_pos = buffer_size;
@ -195,36 +199,42 @@ void NsgIndex::GetNeighbors(const float *query,
resset[cursor].has_explored = true;
node_t start_pos = resset[cursor].id;
auto &wait_for_search_node_vec = graph[start_pos];
auto& wait_for_search_node_vec = graph[start_pos];
for (size_t i = 0; i < wait_for_search_node_vec.size(); ++i) {
node_t id = wait_for_search_node_vec[i];
if (has_calculated_dist[id]) continue;
if (has_calculated_dist[id])
continue;
has_calculated_dist[id] = true;
float
dist = calculate(query, ori_data_ + dimension * id, dimension);
float dist = calculate(query, ori_data_ + dimension * id, dimension);
Neighbor nn(id, dist, false);
fullset.push_back(nn);
if (dist >= resset[buffer_size - 1].distance) continue;
if (dist >= resset[buffer_size - 1].distance)
continue;
size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node
if (pos < nearest_updated_pos) nearest_updated_pos = pos;
size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node
if (pos < nearest_updated_pos)
nearest_updated_pos = pos;
//assert(buffer_size + 1 >= resset.size());
if (buffer_size + 1 < resset.size()) ++buffer_size;
// assert(buffer_size + 1 >= resset.size());
if (buffer_size + 1 < resset.size())
++buffer_size;
}
}
if (cursor >= nearest_updated_pos) {
cursor = nearest_updated_pos; // re-search from new pos
} else ++cursor;
cursor = nearest_updated_pos; // re-search from new pos
} else {
++cursor;
}
}
}
}
// FindUnconnectedNode
void NsgIndex::GetNeighbors(const float *query, std::vector<Neighbor> &resset, std::vector<Neighbor> &fullset) {
auto &graph = nsg;
void
NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::vector<Neighbor>& fullset) {
auto& graph = nsg;
size_t buffer_size = search_length;
if (buffer_size > ntotal) {
@ -232,7 +242,7 @@ void NsgIndex::GetNeighbors(const float *query, std::vector<Neighbor> &resset, s
}
std::vector<node_t> init_ids;
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; // TODO: ?
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; // TODO: ?
{
/*
@ -246,9 +256,11 @@ void NsgIndex::GetNeighbors(const float *query, std::vector<Neighbor> &resset, s
has_calculated_dist[init_ids[i]] = true;
++count;
}
unsigned int seed = 100;
while (count < buffer_size) {
node_t id = rand() % ntotal;
if (has_calculated_dist[id]) continue; // duplicate id
node_t id = rand_r(&seed) % ntotal;
if (has_calculated_dist[id])
continue; // duplicate id
init_ids.push_back(id);
++count;
has_calculated_dist[id] = true;
@ -270,7 +282,7 @@ void NsgIndex::GetNeighbors(const float *query, std::vector<Neighbor> &resset, s
float dist = calculate(ori_data_ + id * dimension, query, dimension);
resset[i] = Neighbor(id, dist, false);
}
std::sort(resset.begin(), resset.end()); // sort by distance
std::sort(resset.begin(), resset.end()); // sort by distance
// search nearest neighbor
size_t cursor = 0;
@ -281,38 +293,41 @@ void NsgIndex::GetNeighbors(const float *query, std::vector<Neighbor> &resset, s
resset[cursor].has_explored = true;
node_t start_pos = resset[cursor].id;
auto &wait_for_search_node_vec = graph[start_pos];
auto& wait_for_search_node_vec = graph[start_pos];
for (size_t i = 0; i < wait_for_search_node_vec.size(); ++i) {
node_t id = wait_for_search_node_vec[i];
if (has_calculated_dist[id]) continue;
if (has_calculated_dist[id])
continue;
has_calculated_dist[id] = true;
float
dist = calculate(ori_data_ + dimension * id, query, dimension);
float dist = calculate(ori_data_ + dimension * id, query, dimension);
Neighbor nn(id, dist, false);
fullset.push_back(nn);
if (dist >= resset[buffer_size - 1].distance) continue;
if (dist >= resset[buffer_size - 1].distance)
continue;
size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node
if (pos < nearest_updated_pos) nearest_updated_pos = pos;
size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node
if (pos < nearest_updated_pos)
nearest_updated_pos = pos;
//assert(buffer_size + 1 >= resset.size());
if (buffer_size + 1 < resset.size()) ++buffer_size; // trick
// assert(buffer_size + 1 >= resset.size());
if (buffer_size + 1 < resset.size())
++buffer_size; // trick
}
}
if (cursor >= nearest_updated_pos) {
cursor = nearest_updated_pos; // re-search from new pos
} else ++cursor;
cursor = nearest_updated_pos; // re-search from new pos
} else {
++cursor;
}
}
}
}
void NsgIndex::GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
Graph &graph,
SearchParams *params) {
size_t &buffer_size = params ? params->search_length : search_length;
void
NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph& graph, SearchParams* params) {
size_t& buffer_size = params ? params->search_length : search_length;
if (buffer_size > ntotal) {
// TODO: throw exception here.
@ -333,9 +348,11 @@ void NsgIndex::GetNeighbors(const float *query,
has_calculated_dist[init_ids[i]] = true;
++count;
}
unsigned int seed = 100;
while (count < buffer_size) {
node_t id = rand() % ntotal;
if (has_calculated_dist[id]) continue; // duplicate id
node_t id = rand_r(&seed) % ntotal;
if (has_calculated_dist[id])
continue; // duplicate id
init_ids.push_back(id);
++count;
has_calculated_dist[id] = true;
@ -349,7 +366,7 @@ void NsgIndex::GetNeighbors(const float *query,
for (size_t i = 0; i < init_ids.size(); ++i) {
node_t id = init_ids[i];
//assert(id < ntotal);
// assert(id < ntotal);
if (id >= static_cast<node_t>(ntotal)) {
KNOWHERE_THROW_MSG("Build Index Error, id > ntotal");
continue;
@ -358,11 +375,11 @@ void NsgIndex::GetNeighbors(const float *query,
float dist = calculate(ori_data_ + id * dimension, query, dimension);
resset[i] = Neighbor(id, dist, false);
}
std::sort(resset.begin(), resset.end()); // sort by distance
std::sort(resset.begin(), resset.end()); // sort by distance
//>> Debug code
/////
//for (int j = 0; j < buffer_size; ++j) {
// for (int j = 0; j < buffer_size; ++j) {
// std::cout << "resset_id: " << resset[j].id << ", resset_dist: " << resset[j].distance << std::endl;
//}
/////
@ -376,41 +393,47 @@ void NsgIndex::GetNeighbors(const float *query,
resset[cursor].has_explored = true;
node_t start_pos = resset[cursor].id;
auto &wait_for_search_node_vec = graph[start_pos];
auto& wait_for_search_node_vec = graph[start_pos];
for (size_t i = 0; i < wait_for_search_node_vec.size(); ++i) {
node_t id = wait_for_search_node_vec[i];
if (has_calculated_dist[id]) continue;
if (has_calculated_dist[id])
continue;
has_calculated_dist[id] = true;
float
dist = calculate(query, ori_data_ + dimension * id, dimension);
float dist = calculate(query, ori_data_ + dimension * id, dimension);
if (dist >= resset[buffer_size - 1].distance) continue;
if (dist >= resset[buffer_size - 1].distance)
continue;
///////////// difference from other GetNeighbors ///////////////
Neighbor nn(id, dist, false);
///////////////////////////////////////
size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node
if (pos < nearest_updated_pos) nearest_updated_pos = pos;
size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node
if (pos < nearest_updated_pos)
nearest_updated_pos = pos;
//>> Debug code
/////
//std::cout << "pos: " << pos << ", nn: " << nn.id << ":" << nn.distance << ", nup: " << nearest_updated_pos << std::endl;
// std::cout << "pos: " << pos << ", nn: " << nn.id << ":" << nn.distance << ", nup: " <<
// nearest_updated_pos << std::endl;
/////
// trick: avoid search query search_length < init_ids.size() ...
if (buffer_size + 1 < resset.size()) ++buffer_size;
if (buffer_size + 1 < resset.size())
++buffer_size;
}
}
if (cursor >= nearest_updated_pos) {
cursor = nearest_updated_pos; // re-search from new pos
} else ++cursor;
cursor = nearest_updated_pos; // re-search from new pos
} else {
++cursor;
}
}
}
}
void NsgIndex::Link() {
void
NsgIndex::Link() {
auto cut_graph_dist = new float[ntotal * out_degree];
nsg.resize(ntotal);
@ -418,7 +441,7 @@ void NsgIndex::Link() {
{
std::vector<Neighbor> fullset;
std::vector<Neighbor> temp;
boost::dynamic_bitset<> flags{ntotal, 0}; // TODO: ?
boost::dynamic_bitset<> flags{ntotal, 0}; // TODO: ?
#pragma omp for schedule(dynamic, 100)
for (size_t n = 0; n < ntotal; ++n) {
fullset.clear();
@ -427,8 +450,8 @@ void NsgIndex::Link() {
//>> Debug code
/////
//float r1 = calculate(ori_data_ + n * dimension, ori_data_ + temp[0].id * dimension, dimension);
//assert(r1 == temp[0].distance);
// float r1 = calculate(ori_data_ + n * dimension, ori_data_ + temp[0].id * dimension, dimension);
// assert(r1 == temp[0].distance);
/////
SyncPrune(n, fullset, flags, cut_graph_dist);
}
@ -436,7 +459,7 @@ void NsgIndex::Link() {
//>> Debug code
/////
//auto bak_nsg = nsg;
// auto bak_nsg = nsg;
/////
knng.clear();
@ -452,8 +475,8 @@ void NsgIndex::Link() {
//>> Debug code
/////
//int count = 0;
//for (int i = 0; i < ntotal; ++i) {
// int count = 0;
// for (int i = 0; i < ntotal; ++i) {
// if (bak_nsg[i].size() != nsg[i].size()) {
// //count += nsg[i].size() - bak_nsg[i].size();
// count += nsg[i].size();
@ -466,16 +489,15 @@ void NsgIndex::Link() {
}
}
void NsgIndex::SyncPrune(size_t n,
std::vector<Neighbor> &pool,
boost::dynamic_bitset<> &has_calculated,
float *cut_graph_dist) {
void
NsgIndex::SyncPrune(size_t n, std::vector<Neighbor>& pool, boost::dynamic_bitset<>& has_calculated,
float* cut_graph_dist) {
// avoid lose nearest neighbor in knng
for (size_t i = 0; i < knng[n].size(); ++i) {
auto id = knng[n][i];
if (has_calculated[id]) continue;
float dist = calculate(ori_data_ + dimension * n,
ori_data_ + dimension * id, dimension);
if (has_calculated[id])
continue;
float dist = calculate(ori_data_ + dimension * n, ori_data_ + dimension * id, dimension);
pool.emplace_back(Neighbor(id, dist, true));
}
@ -486,13 +508,13 @@ void NsgIndex::SyncPrune(size_t n,
if (pool[cursor].id == static_cast<node_t>(n)) {
cursor++;
}
result.push_back(pool[cursor]); // init result with nearest neighbor
result.push_back(pool[cursor]); // init result with nearest neighbor
SelectEdge(cursor, pool, result, true);
// filling the cut_graph
auto &des_id_pool = nsg[n];
float *des_dist_pool = cut_graph_dist + n * out_degree;
auto& des_id_pool = nsg[n];
float* des_dist_pool = cut_graph_dist + n * out_degree;
for (size_t i = 0; i < result.size(); ++i) {
des_id_pool.push_back(result[i].id);
des_dist_pool[i] = result[i].distance;
@ -504,24 +526,27 @@ void NsgIndex::SyncPrune(size_t n,
}
//>> Optimize: remove read-lock
void NsgIndex::InterInsert(unsigned n, std::vector<std::mutex> &mutex_vec, float *cut_graph_dist) {
auto &current = n;
void
NsgIndex::InterInsert(unsigned n, std::vector<std::mutex>& mutex_vec, float* cut_graph_dist) {
auto& current = n;
auto &neighbor_id_pool = nsg[current];
float *neighbor_dist_pool = cut_graph_dist + current * out_degree;
auto& neighbor_id_pool = nsg[current];
float* neighbor_dist_pool = cut_graph_dist + current * out_degree;
for (size_t i = 0; i < out_degree; ++i) {
if (neighbor_dist_pool[i] == -1) break;
if (neighbor_dist_pool[i] == -1)
break;
size_t current_neighbor = neighbor_id_pool[i]; // center's neighbor id
auto &nsn_id_pool = nsg[current_neighbor]; // nsn => neighbor's neighbor
float *nsn_dist_pool = cut_graph_dist + current_neighbor * out_degree;
size_t current_neighbor = neighbor_id_pool[i]; // center's neighbor id
auto& nsn_id_pool = nsg[current_neighbor]; // nsn => neighbor's neighbor
float* nsn_dist_pool = cut_graph_dist + current_neighbor * out_degree;
std::vector<Neighbor> wait_for_link_pool; // maintain candidate neighbor of the current neighbor.
std::vector<Neighbor> wait_for_link_pool; // maintain candidate neighbor of the current neighbor.
int duplicate = false;
{
LockGuard lk(mutex_vec[current_neighbor]);
for (size_t j = 0; j < out_degree; ++j) {
if (nsn_dist_pool[j] == -1) break;
if (nsn_dist_pool[j] == -1)
break;
// 保证至少有一条边能连回来
if (n == nsn_id_pool[j]) {
@ -533,7 +558,8 @@ void NsgIndex::InterInsert(unsigned n, std::vector<std::mutex> &mutex_vec, float
wait_for_link_pool.push_back(nsn);
}
}
if (duplicate) continue;
if (duplicate)
continue;
// original: (neighbor) <------- (current)
// after: (neighbor) -------> (current)
@ -564,20 +590,18 @@ void NsgIndex::InterInsert(unsigned n, std::vector<std::mutex> &mutex_vec, float
if (nsn_dist_pool[j] == -1) {
nsn_id_pool.push_back(current_as_neighbor.id);
nsn_dist_pool[j] = current_as_neighbor.distance;
if (j + 1 < out_degree) nsn_dist_pool[j + 1] = -1;
if (j + 1 < out_degree)
nsn_dist_pool[j + 1] = -1;
break;
}
}
}
}
}
void NsgIndex::SelectEdge(unsigned &cursor,
std::vector<Neighbor> &sort_pool,
std::vector<Neighbor> &result,
bool limit) {
auto &pool = sort_pool;
void
NsgIndex::SelectEdge(unsigned& cursor, std::vector<Neighbor>& sort_pool, std::vector<Neighbor>& result, bool limit) {
auto& pool = sort_pool;
/*
* edge selection
@ -587,22 +611,23 @@ void NsgIndex::SelectEdge(unsigned &cursor,
*/
size_t search_deepth = limit ? candidate_pool_size : pool.size();
while (result.size() < out_degree && cursor < search_deepth && (++cursor) < pool.size()) {
auto &p = pool[cursor];
auto& p = pool[cursor];
bool should_link = true;
for (size_t t = 0; t < result.size(); ++t) {
float dist = calculate(ori_data_ + dimension * result[t].id,
ori_data_ + dimension * p.id, dimension);
float dist = calculate(ori_data_ + dimension * result[t].id, ori_data_ + dimension * p.id, dimension);
if (dist < p.distance) {
should_link = false;
break;
}
}
if (should_link) result.push_back(p);
if (should_link)
result.push_back(p);
}
}
void NsgIndex::CheckConnectivity() {
void
NsgIndex::CheckConnectivity() {
auto root = navigation_point;
boost::dynamic_bitset<> has_linked{ntotal, 0};
int64_t linked_count = 0;
@ -616,28 +641,29 @@ void NsgIndex::CheckConnectivity() {
}
}
void NsgIndex::DFS(size_t root, boost::dynamic_bitset<> &has_linked, int64_t &linked_count) {
void
NsgIndex::DFS(size_t root, boost::dynamic_bitset<>& has_linked, int64_t& linked_count) {
size_t start = root;
std::stack<size_t> s;
s.push(root);
if (!has_linked[root]) {
linked_count++; // not link
has_linked[root] = true; // link start...
linked_count++; // not link
has_linked[root] = true; // link start...
}
while (!s.empty()) {
size_t next = ntotal + 1;
for (unsigned i = 0; i < nsg[start].size(); i++) {
if (has_linked[nsg[start][i]] == false) // if not link
{
if (has_linked[nsg[start][i]] == false) { // if not link
next = nsg[start][i];
break;
}
}
if (next == (ntotal + 1)) {
s.pop();
if (s.empty()) break;
if (s.empty())
break;
start = s.top();
continue;
}
@ -648,17 +674,19 @@ void NsgIndex::DFS(size_t root, boost::dynamic_bitset<> &has_linked, int64_t &li
}
}
void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t &root) {
void
NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root) {
// find any of unlinked-node
size_t id = ntotal;
for (size_t i = 0; i < ntotal; i++) { // find not link
for (size_t i = 0; i < ntotal; i++) { // find not link
if (has_linked[i] == false) {
id = i;
break;
}
}
if (id == ntotal) return; // No Unlinked Node
if (id == ntotal)
return; // No Unlinked Node
// search unlinked-node's neighbor
std::vector<Neighbor> tmp, pool;
@ -666,7 +694,7 @@ void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t
std::sort(pool.begin(), pool.end());
size_t found = 0;
for (size_t i = 0; i < pool.size(); i++) { // find nearest neighbor and add unlinked-node as its neighbor
for (size_t i = 0; i < pool.size(); i++) { // find nearest neighbor and add unlinked-node as its neighbor
if (has_linked[pool[i].id]) {
root = pool[i].id;
found = 1;
@ -674,8 +702,9 @@ void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t
}
}
if (found == 0) {
while (true) { // random a linked-node and add unlinked-node as its neighbor
size_t rid = rand() % ntotal;
unsigned int seed = 100;
while (true) { // random a linked-node and add unlinked-node as its neighbor
size_t rid = rand_r(&seed) % ntotal;
if (has_linked[rid]) {
root = rid;
break;
@ -685,22 +714,17 @@ void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t
nsg[root].push_back(id);
}
void NsgIndex::Search(const float *query,
const unsigned &nq,
const unsigned &dim,
const unsigned &k,
float *dist,
long *ids,
SearchParams &params) {
void
NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist,
int64_t* ids, SearchParams& params) {
std::vector<std::vector<Neighbor>> resset(nq);
TimeRecorder rc("search");
if (nq == 1) {
GetNeighbors(query, resset[0], nsg, &params);
} else{
//#pragma omp parallel for schedule(dynamic, 50)
#pragma omp parallel for
} else {
//#pragma omp parallel for schedule(dynamic, 50)
#pragma omp parallel for
for (unsigned int i = 0; i < nq; ++i) {
// TODO(linxj): when to use openmp
auto single_query = query + i * dim;
@ -711,7 +735,7 @@ void NsgIndex::Search(const float *query,
for (unsigned int i = 0; i < nq; ++i) {
for (unsigned int j = 0; j < k; ++j) {
//ids[i * k + j] = resset[i][j].id;
// ids[i * k + j] = resset[i][j].id;
// Fix(linxj): bug, reset[i][j] out of range
ids[i * k + j] = ids_[resset[i][j].id];
@ -720,27 +744,28 @@ void NsgIndex::Search(const float *query,
}
//>> Debug: test single insert
//int x_0 = resset[0].size();
//for (int l = 0; l < resset[0].size(); ++l) {
// int x_0 = resset[0].size();
// for (int l = 0; l < resset[0].size(); ++l) {
// resset[0].pop_back();
//}
//resset.clear();
// resset.clear();
//ProfilerStart("xx.prof");
//std::vector<Neighbor> resset;
//GetNeighbors(query, resset, nsg, &params);
//for (int i = 0; i < k; ++i) {
// ProfilerStart("xx.prof");
// std::vector<Neighbor> resset;
// GetNeighbors(query, resset, nsg, &params);
// for (int i = 0; i < k; ++i) {
// ids[i] = resset[i].id;
//dist[i] = resset[i].distance;
// dist[i] = resset[i].distance;
//}
//ProfilerStop();
// ProfilerStop();
}
void NsgIndex::SetKnnGraph(Graph &g) {
void
NsgIndex::SetKnnGraph(Graph& g) {
knng = std::move(g);
}
//void NsgIndex::GetKnnGraphFromFile() {
// void NsgIndex::GetKnnGraphFromFile() {
// //std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift.1M.50NN.graph";
// std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift.50NN.graph";
//
@ -765,6 +790,6 @@ void NsgIndex::SetKnnGraph(Graph &g) {
// in.close();
//}
}
}
}
} // namespace algo
} // namespace knowhere
} // namespace zilliz

View File

@ -15,22 +15,19 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstddef>
#include <vector>
#include <mutex>
#include <vector>
#include <boost/dynamic_bitset.hpp>
#include "Neighbor.h"
namespace zilliz {
namespace knowhere {
namespace algo {
using node_t = int64_t;
enum class MetricType {
@ -53,15 +50,15 @@ using Graph = std::vector<std::vector<node_t>>;
class NsgIndex {
public:
size_t dimension;
size_t ntotal; // totabl nb of indexed vectors
MetricType metric_type; // L2 | IP
size_t ntotal; // totabl nb of indexed vectors
MetricType metric_type; // L2 | IP
float *ori_data_;
long *ids_; // TODO: support different type
Graph nsg; // final graph
Graph knng; // reset after build
float* ori_data_;
int64_t* ids_; // TODO: support different type
Graph nsg; // final graph
Graph knng; // reset after build
node_t navigation_point; // offset of node in origin data
node_t navigation_point; // offset of node in origin data
bool is_trained = false;
@ -69,91 +66,81 @@ class NsgIndex {
* build and search parameter
*/
size_t search_length;
size_t candidate_pool_size; // search deepth in fullset
size_t candidate_pool_size; // search deepth in fullset
size_t out_degree;
public:
explicit NsgIndex(const size_t &dimension,
const size_t &n,
MetricType metric = MetricType::METRIC_L2);
explicit NsgIndex(const size_t& dimension, const size_t& n, MetricType metric = MetricType::METRIC_L2);
NsgIndex() = default;
virtual ~NsgIndex();
void SetKnnGraph(Graph &knng);
void
SetKnnGraph(Graph& knng);
virtual void Build_with_ids(size_t nb,
const float *data,
const long *ids,
const BuildParams &parameters);
virtual void
Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters);
void Search(const float *query,
const unsigned &nq,
const unsigned &dim,
const unsigned &k,
float *dist,
long *ids,
SearchParams &params);
void
Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids,
SearchParams& params);
// Not support yet.
//virtual void Add() = 0;
//virtual void Add_with_ids() = 0;
//virtual void Delete() = 0;
//virtual void Delete_with_ids() = 0;
//virtual void Rebuild(size_t nb,
// virtual void Add() = 0;
// virtual void Add_with_ids() = 0;
// virtual void Delete() = 0;
// virtual void Delete_with_ids() = 0;
// virtual void Rebuild(size_t nb,
// const float *data,
// const long *ids,
// const int64_t *ids,
// const Parameters &parameters) = 0;
//virtual void Build(size_t nb,
// virtual void Build(size_t nb,
// const float *data,
// const BuildParam &parameters);
protected:
virtual void InitNavigationPoint();
virtual void
InitNavigationPoint();
// link specify
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
std::vector<Neighbor> &fullset,
boost::dynamic_bitset<> &has_calculated_dist);
void
GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::vector<Neighbor>& fullset,
boost::dynamic_bitset<>& has_calculated_dist);
// FindUnconnectedNode
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
std::vector<Neighbor> &fullset);
void
GetNeighbors(const float* query, std::vector<Neighbor>& resset, std::vector<Neighbor>& fullset);
// search and navigation-point
void GetNeighbors(const float *query,
std::vector<Neighbor> &resset,
Graph &graph,
SearchParams *param = nullptr);
void
GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph& graph, SearchParams* param = nullptr);
void Link();
void
Link();
void SyncPrune(size_t q,
std::vector<Neighbor> &pool,
boost::dynamic_bitset<> &has_calculated,
float *cut_graph_dist
);
void
SyncPrune(size_t q, std::vector<Neighbor>& pool, boost::dynamic_bitset<>& has_calculated, float* cut_graph_dist);
void SelectEdge(unsigned &cursor,
std::vector<Neighbor> &sort_pool,
std::vector<Neighbor> &result,
bool limit = false);
void
SelectEdge(unsigned& cursor, std::vector<Neighbor>& sort_pool, std::vector<Neighbor>& result, bool limit = false);
void InterInsert(unsigned n, std::vector<std::mutex> &mutex_vec, float *dist);
void
InterInsert(unsigned n, std::vector<std::mutex>& mutex_vec, float* dist);
void CheckConnectivity();
void
CheckConnectivity();
void DFS(size_t root, boost::dynamic_bitset<> &flags, int64_t &count);
void
DFS(size_t root, boost::dynamic_bitset<>& flags, int64_t& count);
void FindUnconnectedNode(boost::dynamic_bitset<> &flags, int64_t &root);
void
FindUnconnectedNode(boost::dynamic_bitset<>& flags, int64_t& root);
//private:
// void GetKnnGraphFromFile();
// private:
// void GetKnnGraphFromFile();
};
}
}
}
} // namespace algo
} // namespace knowhere
} // namespace zilliz

View File

@ -15,19 +15,18 @@
// specific language governing permissions and limitations
// under the License.
#include <cstring>
#include <fstream>
#include "NSGHelper.h"
#include "knowhere/index/vector_index/nsg/NSGHelper.h"
namespace zilliz {
namespace knowhere {
namespace algo {
// TODO: impl search && insert && return insert pos. why not just find and swap?
int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) {
int
InsertIntoPool(Neighbor* addr, unsigned K, Neighbor nn) {
//>> Fix: Add assert
for (unsigned int i = 0; i < K; ++i) {
assert(addr[i].id != nn.id);
@ -37,7 +36,7 @@ int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) {
int left = 0, right = K - 1;
if (addr[left].distance > nn.distance) {
//>> Fix: memmove overflow, dump when vector<Neighbor> deconstruct
memmove((char *) &addr[left + 1], &addr[left], (K - 1) * sizeof(Neighbor));
memmove((char*)&addr[left + 1], &addr[left], (K - 1) * sizeof(Neighbor));
addr[left] = nn;
return left;
}
@ -52,10 +51,10 @@ int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) {
else
left = mid;
}
//check equal ID
// check equal ID
while (left > 0) {
if (addr[left].distance < nn.distance) // pos is right
if (addr[left].distance < nn.distance) // pos is right
break;
if (addr[left].id == nn.id)
return K + 1;
@ -65,24 +64,25 @@ int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) {
return K + 1;
//>> Fix: memmove overflow, dump when vector<Neighbor> deconstruct
memmove((char *) &addr[right + 1], &addr[right], (K - 1 - right) * sizeof(Neighbor));
memmove((char*)&addr[right + 1], &addr[right], (K - 1 - right) * sizeof(Neighbor));
addr[right] = nn;
return right;
}
// TODO: support L2 / IP
float calculate(const float *a, const float *b, unsigned size) {
float
calculate(const float* a, const float* b, unsigned size) {
float result = 0;
#ifdef __GNUC__
#ifdef __AVX__
#define AVX_L2SQR(addr1, addr2, dest, tmp1, tmp2) \
tmp1 = _mm256_loadu_ps(addr1);\
tmp2 = _mm256_loadu_ps(addr2);\
tmp1 = _mm256_sub_ps(tmp1, tmp2); \
tmp1 = _mm256_mul_ps(tmp1, tmp1); \
dest = _mm256_add_ps(dest, tmp1);
tmp1 = _mm256_loadu_ps(addr1); \
tmp2 = _mm256_loadu_ps(addr2); \
tmp1 = _mm256_sub_ps(tmp1, tmp2); \
tmp1 = _mm256_mul_ps(tmp1, tmp1); \
dest = _mm256_add_ps(dest, tmp1);
__m256 sum;
__m256 l0, l1;
@ -90,14 +90,16 @@ float calculate(const float *a, const float *b, unsigned size) {
unsigned D = (size + 7) & ~7U;
unsigned DR = D % 16;
unsigned DD = D - DR;
const float *l = a;
const float *r = b;
const float *e_l = l + DD;
const float *e_r = r + DD;
float unpack[8] __attribute__ ((aligned (32))) = {0, 0, 0, 0, 0, 0, 0, 0};
const float* l = a;
const float* r = b;
const float* e_l = l + DD;
const float* e_r = r + DD;
float unpack[8] __attribute__((aligned(32))) = {0, 0, 0, 0, 0, 0, 0, 0};
sum = _mm256_loadu_ps(unpack);
if (DR) { AVX_L2SQR(e_l, e_r, sum, l0, r0); }
if (DR) {
AVX_L2SQR(e_l, e_r, sum, l0, r0);
}
for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) {
AVX_L2SQR(l, r, sum, l0, r0);
@ -109,11 +111,11 @@ float calculate(const float *a, const float *b, unsigned size) {
#else
#ifdef __SSE2__
#define SSE_L2SQR(addr1, addr2, dest, tmp1, tmp2) \
tmp1 = _mm_load_ps(addr1);\
tmp2 = _mm_load_ps(addr2);\
tmp1 = _mm_sub_ps(tmp1, tmp2); \
tmp1 = _mm_mul_ps(tmp1, tmp1); \
dest = _mm_add_ps(dest, tmp1);
tmp1 = _mm_load_ps(addr1); \
tmp2 = _mm_load_ps(addr2); \
tmp1 = _mm_sub_ps(tmp1, tmp2); \
tmp1 = _mm_mul_ps(tmp1, tmp1); \
dest = _mm_add_ps(dest, tmp1);
__m128 sum;
__m128 l0, l1, l2, l3;
@ -121,18 +123,22 @@ float calculate(const float *a, const float *b, unsigned size) {
unsigned D = (size + 3) & ~3U;
unsigned DR = D % 16;
unsigned DD = D - DR;
const float *l = a;
const float *r = b;
const float *e_l = l + DD;
const float *e_r = r + DD;
float unpack[4] __attribute__ ((aligned (16))) = {0, 0, 0, 0};
const float* l = a;
const float* r = b;
const float* e_l = l + DD;
const float* e_r = r + DD;
float unpack[4] __attribute__((aligned(16))) = {0, 0, 0, 0};
sum = _mm_load_ps(unpack);
switch (DR) {
case 12:SSE_L2SQR(e_l + 8, e_r + 8, sum, l2, r2);
case 8:SSE_L2SQR(e_l + 4, e_r + 4, sum, l1, r1);
case 4:SSE_L2SQR(e_l, e_r, sum, l0, r0);
default:break;
case 12:
SSE_L2SQR(e_l + 8, e_r + 8, sum, l2, r2);
case 8:
SSE_L2SQR(e_l + 4, e_r + 4, sum, l1, r1);
case 4:
SSE_L2SQR(e_l, e_r, sum, l0, r0);
default:
break;
}
for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) {
SSE_L2SQR(l, r, sum, l0, r0);
@ -143,28 +149,28 @@ float calculate(const float *a, const float *b, unsigned size) {
_mm_storeu_ps(unpack, sum);
result += unpack[0] + unpack[1] + unpack[2] + unpack[3];
//nomal distance
// nomal distance
#else
float diff0, diff1, diff2, diff3;
const float* last = a + size;
const float* unroll_group = last - 3;
const float* last = a + size;
const float* unroll_group = last - 3;
/* Process 4 items with each loop for efficiency. */
while (a < unroll_group) {
diff0 = a[0] - b[0];
diff1 = a[1] - b[1];
diff2 = a[2] - b[2];
diff3 = a[3] - b[3];
result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
a += 4;
b += 4;
}
/* Process last 0-3 pixels. Not needed for standard vector lengths. */
while (a < last) {
diff0 = *a++ - *b++;
result += diff0 * diff0;
}
/* Process 4 items with each loop for efficiency. */
while (a < unroll_group) {
diff0 = a[0] - b[0];
diff1 = a[1] - b[1];
diff2 = a[2] - b[2];
diff3 = a[3] - b[3];
result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
a += 4;
b += 4;
}
/* Process last 0-3 pixels. Not needed for standard vector lengths. */
while (a < last) {
diff0 = *a++ - *b++;
result += diff0 * diff0;
}
#endif
#endif
#endif
@ -172,7 +178,6 @@ float calculate(const float *a, const float *b, unsigned size) {
return result;
}
}
}
}
} // namespace algo
} // namespace knowhere
} // namespace zilliz

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <x86intrin.h>
@ -26,14 +25,15 @@
#include "NSG.h"
#include "knowhere/common/Config.h"
namespace zilliz {
namespace knowhere {
namespace algo {
extern int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn);
extern float calculate(const float *a, const float *b, unsigned size);
extern int
InsertIntoPool(Neighbor* addr, unsigned K, Neighbor nn);
extern float
calculate(const float* a, const float* b, unsigned size);
}
}
}
} // namespace algo
} // namespace knowhere
} // namespace zilliz

View File

@ -15,31 +15,31 @@
// specific language governing permissions and limitations
// under the License.
#include <cstring>
#include "NSGIO.h"
#include "knowhere/index/vector_index/nsg/NSGIO.h"
namespace zilliz {
namespace knowhere {
namespace algo {
void write_index(NsgIndex *index, MemoryIOWriter &writer) {
void
write_index(NsgIndex* index, MemoryIOWriter& writer) {
writer(&index->ntotal, sizeof(index->ntotal), 1);
writer(&index->dimension, sizeof(index->dimension), 1);
writer(&index->navigation_point, sizeof(index->navigation_point), 1);
writer(index->ori_data_, sizeof(float) * index->ntotal * index->dimension, 1);
writer(index->ids_, sizeof(long) * index->ntotal, 1);
writer(index->ids_, sizeof(int64_t) * index->ntotal, 1);
for (unsigned i = 0; i < index->ntotal; ++i) {
auto neighbor_num = (node_t) index->nsg[i].size();
auto neighbor_num = (node_t)index->nsg[i].size();
writer(&neighbor_num, sizeof(node_t), 1);
writer(index->nsg[i].data(), neighbor_num * sizeof(node_t), 1);
}
}
NsgIndex *read_index(MemoryIOReader &reader) {
NsgIndex*
read_index(MemoryIOReader& reader) {
size_t ntotal;
size_t dimension;
reader(&ntotal, sizeof(size_t), 1);
@ -48,9 +48,9 @@ NsgIndex *read_index(MemoryIOReader &reader) {
reader(&index->navigation_point, sizeof(index->navigation_point), 1);
index->ori_data_ = new float[index->ntotal * index->dimension];
index->ids_ = new long[index->ntotal];
index->ids_ = new int64_t[index->ntotal];
reader(index->ori_data_, sizeof(float) * index->ntotal * index->dimension, 1);
reader(index->ids_, sizeof(long) * index->ntotal, 1);
reader(index->ids_, sizeof(int64_t) * index->ntotal, 1);
index->nsg.reserve(index->ntotal);
index->nsg.resize(index->ntotal);
@ -66,6 +66,6 @@ NsgIndex *read_index(MemoryIOReader &reader) {
return index;
}
}
}
}
} // namespace algo
} // namespace knowhere
} // namespace zilliz

View File

@ -15,21 +15,21 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "knowhere/index/vector_index/helpers/FaissIO.h"
#include "NSG.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/helpers/FaissIO.h"
namespace zilliz {
namespace knowhere {
namespace algo {
extern void write_index(NsgIndex* index, MemoryIOWriter& writer);
extern NsgIndex* read_index(MemoryIOReader& reader);
extern void
write_index(NsgIndex* index, MemoryIOWriter& writer);
extern NsgIndex*
read_index(MemoryIOReader& reader);
}
}
}
} // namespace algo
} // namespace knowhere
} // namespace zilliz

View File

@ -15,12 +15,10 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <mutex>
namespace zilliz {
namespace knowhere {
namespace algo {
@ -29,21 +27,25 @@ using node_t = int64_t;
// TODO: search use simple neighbor
struct Neighbor {
node_t id; // offset of node in origin data
node_t id; // offset of node in origin data
float distance;
bool has_explored;
Neighbor() = default;
explicit Neighbor(node_t id, float distance, bool f) : id{id}, distance{distance}, has_explored(f) {}
explicit Neighbor(node_t id, float distance) : id{id}, distance{distance}, has_explored(false) {}
explicit Neighbor(node_t id, float distance, bool f) : id{id}, distance{distance}, has_explored(f) {
}
inline bool operator<(const Neighbor &other) const {
explicit Neighbor(node_t id, float distance) : id{id}, distance{distance}, has_explored(false) {
}
inline bool
operator<(const Neighbor& other) const {
return distance < other.distance;
}
};
//struct SimpleNeighbor {
// struct SimpleNeighbor {
// node_t id; // offset of node in origin data
// float distance;
//
@ -57,7 +59,6 @@ struct Neighbor {
typedef std::lock_guard<std::mutex> LockGuard;
}
}
}
} // namespace algo
} // namespace knowhere
} // namespace zilliz

View File

@ -1,6 +1,7 @@
include_directories(${CORE_SOURCE_DIR}/thirdparty)
include_directories(${CORE_SOURCE_DIR}/thirdparty/SPTAG/AnnService)
include_directories(${CORE_SOURCE_DIR}/knowhere)
include_directories(${CORE_SOURCE_DIR})
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
link_directories(${CORE_SOURCE_DIR}/thirdparty/tbb)

View File

@ -1,36 +1,50 @@
#include <random>
#include <iostream>
#include <memory>
#include "SPTAG/AnnService/inc/Core/Common.h"
#include "SPTAG/AnnService/inc/Core/VectorIndex.h"
int
main(int argc, char *argv[]) {
using namespace SPTAG;
const int d = 128;
const int n = 100;
auto p_data = new float[n * d];
auto index = VectorIndex::CreateInstance(IndexAlgoType::KDT, VectorValueType::Float);
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(1.0, 2.0);
for (auto i = 0; i < n; i++) {
for (auto j = 0; j < d; j++) {
p_data[i * d + j] = dist(mt) - 1;
}
}
std::cout << "generate random n * d finished.";
ByteArray data((uint8_t *) p_data, n * d * sizeof(float), true);
auto vectorset = std::make_shared<BasicVectorSet>(data, VectorValueType::Float, d, n);
index->BuildIndex(vectorset, nullptr);
std::cout << index->GetFeatureDim();
}
//// Licensed to the Apache Software Foundation (ASF) under one
//// or more contributor license agreements. See the NOTICE file
//// distributed with this work for additional information
//// regarding copyright ownership. The ASF licenses this file
//// to you under the Apache License, Version 2.0 (the
//// "License"); you may not use this file except in compliance
//// with the License. You may obtain a copy of the License at
////
//// http://www.apache.org/licenses/LICENSE-2.0
////
//// Unless required by applicable law or agreed to in writing,
//// software distributed under the License is distributed on an
//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
//// KIND, either express or implied. See the License for the
//// specific language governing permissions and limitations
//// under the License.
//
//#include <iostream>
//#include <memory>
//#include <random>
//#include <SPTAG/AnnService/inc/Core/Common.h>
//#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
//
// int
// main(int argc, char* argv[]) {
// using namespace SPTAG;
// const int d = 128;
// const int n = 100;
//
// auto p_data = new float[n * d];
//
// auto index = VectorIndex::CreateInstance(IndexAlgoType::KDT, VectorValueType::Float);
//
// std::random_device rd;
// std::mt19937 mt(rd());
// std::uniform_real_distribution<double> dist(1.0, 2.0);
//
// for (auto i = 0; i < n; i++) {
// for (auto j = 0; j < d; j++) {
// p_data[i * d + j] = dist(mt) - 1;
// }
// }
// std::cout << "generate random n * d finished.";
// ByteArray data((uint8_t*)p_data, n * d * sizeof(float), true);
//
// auto vectorset = std::make_shared<BasicVectorSet>(data, VectorValueType::Float, d, n);
// index->BuildIndex(vectorset, nullptr);
//
// std::cout << index->GetFeatureDim();
//}

View File

@ -17,46 +17,45 @@
#include <gtest/gtest.h>
#include <faiss/gpu/StandardGpuResources.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/Index.h>
#include <faiss/AutoTune.h>
#include <faiss/Index.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/StandardGpuResources.h>
#include <faiss/index_io.h>
#include <thread>
#include <chrono>
#include <iostream>
using namespace std::chrono_literals;
#include <thread>
class TestGpuRes {
public:
TestGpuRes() {
res_ = new faiss::gpu::StandardGpuResources;
}
~TestGpuRes() {
delete res_;
delete index_;
}
std::shared_ptr<faiss::Index> Do() {
int d = 128; // dimension
int nb = 100000; // database size
int nq = 100; // nb of queries
std::shared_ptr<faiss::Index>
Do() {
int d = 128; // dimension
int nb = 100000; // database size
int nq = 100; // nb of queries
int nlist = 1638;
float *xb = new float[d * nb];
float *xq = new float[d * nq];
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = drand48();
for (int j = 0; j < d; j++) xb[d * i + j] = drand48();
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = drand48();
for (int j = 0; j < d; j++) xq[d * i + j] = drand48();
xq[d * i] += i / 1000.;
}
@ -68,9 +67,10 @@ class TestGpuRes {
host_index.reset(faiss::gpu::index_gpu_to_cpu(index_));
return host_index;
}
private:
faiss::gpu::GpuResources *res_ = nullptr;
faiss::Index *index_ = nullptr;
faiss::gpu::GpuResources* res_ = nullptr;
faiss::Index* index_ = nullptr;
};
TEST(gpuresource, resource) {
@ -79,30 +79,28 @@ TEST(gpuresource, resource) {
}
TEST(test, resource_re) {
int d = 128; // dimension
int nb = 1000000; // database size
int nq = 100; // nb of queries
int d = 128; // dimension
int nb = 1000000; // database size
int nq = 100; // nb of queries
int nlist = 16384;
int k = 100;
float *xb = new float[d * nb];
float *xq = new float[d * nq];
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = drand48();
for (int j = 0; j < d; j++) xb[d * i + j] = drand48();
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = drand48();
for (int j = 0; j < d; j++) xq[d * i + j] = drand48();
xq[d * i] += i / 1000.;
}
auto elems = nq * k;
auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems);
auto res_dis = (float *) malloc(sizeof(float) * elems);
auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems);
auto res_dis = (float*)malloc(sizeof(float) * elems);
faiss::gpu::StandardGpuResources res;
auto cpu_index = faiss::index_factory(d, "IVF16384, Flat");
@ -117,7 +115,7 @@ TEST(test, resource_re) {
auto load = [&] {
std::cout << "start" << std::endl;
faiss::gpu::StandardGpuResources res;
//res.noTempMemory();
// res.noTempMemory();
for (int l = 0; l < 100; ++l) {
auto x = faiss::gpu::index_cpu_to_gpu(&res, 1, new_index);
delete x;
@ -126,42 +124,42 @@ TEST(test, resource_re) {
};
auto search = [&] {
faiss::gpu::StandardGpuResources res;
auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 1, new_index);
std::cout << "search start" << std::endl;
for (int l = 0; l < 10000; ++l) {
device_index->search(nq,xq,10, res_dis, res_ids);
}
std::cout << "search finish" << std::endl;
delete device_index;
delete cpu_index;
faiss::gpu::StandardGpuResources res;
auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 1, new_index);
std::cout << "search start" << std::endl;
for (int l = 0; l < 10000; ++l) {
device_index->search(nq, xq, 10, res_dis, res_ids);
}
std::cout << "search finish" << std::endl;
delete device_index;
delete cpu_index;
};
load();
search();
std::thread t1(search);
std::this_thread::sleep_for(1s);
std::this_thread::sleep_for(std::chrono::seconds(1));
std::thread t2(load);
t1.join();
t2.join();
std::cout << "finish clone" << std::endl;
//std::this_thread::sleep_for(5s);
// std::this_thread::sleep_for(5s);
//
//auto device_index_2 = faiss::gpu::index_cpu_to_gpu(&res, 1, cpu_index);
//device_index->train(nb, xb);
//device_index->add(nb, xb);
// auto device_index_2 = faiss::gpu::index_cpu_to_gpu(&res, 1, cpu_index);
// device_index->train(nb, xb);
// device_index->add(nb, xb);
//std::cout << "finish clone" << std::endl;
//std::this_thread::sleep_for(5s);
// std::cout << "finish clone" << std::endl;
// std::this_thread::sleep_for(5s);
//std::this_thread::sleep_for(2s);
//std::cout << "start clone" << std::endl;
//auto new_index = faiss::clone_index(device_index);
//std::cout << "start search" << std::endl;
//new_index->search(nq, xq, k, res_dis, res_ids);
// std::this_thread::sleep_for(2s);
// std::cout << "start clone" << std::endl;
// auto new_index = faiss::clone_index(device_index);
// std::cout << "start search" << std::endl;
// new_index->search(nq, xq, k, res_dis, res_ids);
//std::cout << "start clone" << std::endl;
// std::cout << "start clone" << std::endl;
//{
// faiss::gpu::StandardGpuResources res;
// auto cpu_index = faiss::index_factory(d, "IVF1638, Flat");
@ -174,5 +172,5 @@ TEST(test, resource_re) {
// std::cout << "finish clone" << std::endl;
//}
//
//std::cout << "finish clone" << std::endl;
// std::cout << "finish clone" << std::endl;
}

View File

@ -1,134 +1,149 @@
#include <iostream>
#include <sstream>
#include "knowhere/index/vector_index/cpu_kdt_rng.h"
#include "knowhere/index/vector_index/definitions.h"
#include "knowhere/adapter/sptag.h"
#include "knowhere/adapter/structure.h"
using namespace zilliz::knowhere;
DatasetPtr
generate_dataset(int64_t n, int64_t d, int64_t base) {
auto elems = n * d;
auto p_data = (float *) malloc(elems * sizeof(float));
auto p_id = (int64_t *) malloc(elems * sizeof(int64_t));
assert(p_data != nullptr && p_id != nullptr);
for (auto i = 0; i < n; ++i) {
for (auto j = 0; j < d; ++j) {
p_data[i * d + j] = float(base + i);
}
p_id[i] = i;
}
std::vector<int64_t> shape{n, d};
auto tensor = ConstructFloatTensorSmart((uint8_t *) p_data, elems * sizeof(float), shape);
std::vector<TensorPtr> tensors{tensor};
std::vector<FieldPtr> tensor_fields{ConstructFloatField("data")};
auto tensor_schema = std::make_shared<Schema>(tensor_fields);
auto id_array = ConstructInt64ArraySmart((uint8_t *) p_id, n * sizeof(int64_t));
std::vector<ArrayPtr> arrays{id_array};
std::vector<FieldPtr> array_fields{ConstructInt64Field("id")};
auto array_schema = std::make_shared<Schema>(tensor_fields);
auto dataset = std::make_shared<Dataset>(std::move(arrays), array_schema,
std::move(tensors), tensor_schema);
return dataset;
}
DatasetPtr
generate_queries(int64_t n, int64_t d, int64_t k, int64_t base) {
size_t size = sizeof(float) * n * d;
auto v = (float *) malloc(size);
// TODO(lxj): check malloc
for (auto i = 0; i < n; ++i) {
for (auto j = 0; j < d; ++j) {
v[i * d + j] = float(base + i);
}
}
std::vector<TensorPtr> data;
auto buffer = MakeMutableBufferSmart((uint8_t *) v, size);
std::vector<int64_t> shape{n, d};
auto float_type = std::make_shared<arrow::FloatType>();
auto tensor = std::make_shared<Tensor>(float_type, buffer, shape);
data.push_back(tensor);
Config meta;
meta[META_ROWS] = int64_t (n);
meta[META_DIM] = int64_t (d);
meta[META_K] = int64_t (k);
auto type = std::make_shared<arrow::FloatType>();
auto field = std::make_shared<Field>("data", type);
std::vector<FieldPtr> fields{field};
auto schema = std::make_shared<Schema>(fields);
return std::make_shared<Dataset>(data, schema);
}
int
main(int argc, char *argv[]) {
auto kdt_index = std::make_shared<CPUKDTRNG>();
const auto d = 10;
const auto k = 3;
const auto nquery = 10;
// ID [0, 99]
auto train = generate_dataset(100, d, 0);
// ID [100]
auto base = generate_dataset(1, d, 0);
auto queries = generate_queries(nquery, d, k, 0);
// Build Preprocessor
auto preprocessor = kdt_index->BuildPreprocessor(train, Config());
// Set Preprocessor
kdt_index->set_preprocessor(preprocessor);
Config train_config;
train_config["TPTNumber"] = "64";
// Train
kdt_index->Train(train, train_config);
// Add
kdt_index->Add(base, Config());
auto binary = kdt_index->Serialize();
auto new_index = std::make_shared<CPUKDTRNG>();
new_index->Load(binary);
// auto new_index = kdt_index;
Config search_config;
search_config[META_K] = int64_t (k);
// Search
auto result = new_index->Search(queries, search_config);
// Print Result
{
auto ids = result->array()[0];
auto dists = result->array()[1];
std::stringstream ss_id;
std::stringstream ss_dist;
for (auto i = 0; i < nquery; i++) {
for (auto j = 0; j < k; ++j) {
ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
}
ss_id << std::endl;
ss_dist << std::endl;
}
std::cout << "id\n" << ss_id.str() << std::endl;
std::cout << "dist\n" << ss_dist.str() << std::endl;
}
}
//// Licensed to the Apache Software Foundation (ASF) under one
//// or more contributor license agreements. See the NOTICE file
//// distributed with this work for additional information
//// regarding copyright ownership. The ASF licenses this file
//// to you under the Apache License, Version 2.0 (the
//// "License"); you may not use this file except in compliance
//// with the License. You may obtain a copy of the License at
////
//// http://www.apache.org/licenses/LICENSE-2.0
////
//// Unless required by applicable law or agreed to in writing,
//// software distributed under the License is distributed on an
//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
//// KIND, either express or implied. See the License for the
//// specific language governing permissions and limitations
//// under the License.
//
//#include <iostream>
//#include <sstream>
//#include "knowhere/adapter/sptag.h"
//#include "knowhere/adapter/structure.h"
//#include "knowhere/index/vector_index/cpu_kdt_rng.h"
//#include "knowhere/index/vector_index/definitions.h"
//
// namespace {
//
// namespace kn = zilliz::knowhere;
//
//} // namespace
//
// kn::DatasetPtr
// generate_dataset(int64_t n, int64_t d, int64_t base) {
// auto elems = n * d;
// auto p_data = (float*)malloc(elems * sizeof(float));
// auto p_id = (int64_t*)malloc(elems * sizeof(int64_t));
// assert(p_data != nullptr && p_id != nullptr);
//
// for (auto i = 0; i < n; ++i) {
// for (auto j = 0; j < d; ++j) {
// p_data[i * d + j] = float(base + i);
// }
// p_id[i] = i;
// }
//
// std::vector<int64_t> shape{n, d};
// auto tensor = ConstructFloatTensorSmart((uint8_t*)p_data, elems * sizeof(float), shape);
// std::vector<TensorPtr> tensors{tensor};
// std::vector<FieldPtr> tensor_fields{ConstructFloatField("data")};
// auto tensor_schema = std::make_shared<Schema>(tensor_fields);
//
// auto id_array = ConstructInt64ArraySmart((uint8_t*)p_id, n * sizeof(int64_t));
// std::vector<ArrayPtr> arrays{id_array};
// std::vector<FieldPtr> array_fields{ConstructInt64Field("id")};
// auto array_schema = std::make_shared<Schema>(tensor_fields);
//
// auto dataset = std::make_shared<Dataset>(std::move(arrays), array_schema, std::move(tensors), tensor_schema);
//
// return dataset;
//}
//
// kn::DatasetPtr
// generate_queries(int64_t n, int64_t d, int64_t k, int64_t base) {
// size_t size = sizeof(float) * n * d;
// auto v = (float*)malloc(size);
// // TODO(lxj): check malloc
// for (auto i = 0; i < n; ++i) {
// for (auto j = 0; j < d; ++j) {
// v[i * d + j] = float(base + i);
// }
// }
//
// std::vector<TensorPtr> data;
// auto buffer = MakeMutableBufferSmart((uint8_t*)v, size);
// std::vector<int64_t> shape{n, d};
// auto float_type = std::make_shared<arrow::FloatType>();
// auto tensor = std::make_shared<Tensor>(float_type, buffer, shape);
// data.push_back(tensor);
//
// Config meta;
// meta[META_ROWS] = int64_t(n);
// meta[META_DIM] = int64_t(d);
// meta[META_K] = int64_t(k);
//
// auto type = std::make_shared<arrow::FloatType>();
// auto field = std::make_shared<Field>("data", type);
// std::vector<FieldPtr> fields{field};
// auto schema = std::make_shared<Schema>(fields);
//
// return std::make_shared<kn::Dataset>(data, schema);
//}
//
// int
// main(int argc, char* argv[]) {
// auto kdt_index = std::make_shared<CPUKDTRNG>();
//
// const auto d = 10;
// const auto k = 3;
// const auto nquery = 10;
//
// // ID [0, 99]
// auto train = generate_dataset(100, d, 0);
// // ID [100]
// auto base = generate_dataset(1, d, 0);
// auto queries = generate_queries(nquery, d, k, 0);
//
// // Build Preprocessor
// auto preprocessor = kdt_index->BuildPreprocessor(train, Config());
//
// // Set Preprocessor
// kdt_index->set_preprocessor(preprocessor);
//
// Config train_config;
// train_config["TPTNumber"] = "64";
// // Train
// kdt_index->Train(train, train_config);
//
// // Add
// kdt_index->Add(base, Config());
//
// auto binary = kdt_index->Serialize();
// auto new_index = std::make_shared<CPUKDTRNG>();
// new_index->Load(binary);
// // auto new_index = kdt_index;
//
// Config search_config;
// search_config[META_K] = int64_t(k);
//
// // Search
// auto result = new_index->Search(queries, search_config);
//
// // Print Result
// {
// auto ids = result->array()[0];
// auto dists = result->array()[1];
//
// std::stringstream ss_id;
// std::stringstream ss_dist;
// for (auto i = 0; i < nquery; i++) {
// for (auto j = 0; j < k; ++j) {
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
// }
// ss_id << std::endl;
// ss_dist << std::endl;
// }
// std::cout << "id\n" << ss_id.str() << std::endl;
// std::cout << "dist\n" << ss_dist.str() << std::endl;
// }
//}

View File

@ -15,51 +15,51 @@
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <iostream>
#include "knowhere/index/vector_index/IndexIDMAP.h"
#include "knowhere/adapter/Structure.h"
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIDMAP.h"
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "utils.h"
#include "test/utils.h"
namespace {
using namespace zilliz::knowhere;
using namespace zilliz::knowhere::cloner;
namespace kn = zilliz::knowhere;
} // namespace
static int device_id = 0;
class IDMAPTest : public DataGen, public ::testing::Test {
protected:
void SetUp() override {
FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*300, 2);
void
SetUp() override {
kn::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 300, 2);
Init_with_default();
index_ = std::make_shared<IDMAP>();
index_ = std::make_shared<kn::IDMAP>();
}
void TearDown() override {
FaissGpuResourceMgr::GetInstance().Free();
void
TearDown() override {
kn::FaissGpuResourceMgr::GetInstance().Free();
}
protected:
IDMAPPtr index_ = nullptr;
kn::IDMAPPtr index_ = nullptr;
};
void AssertAnns(const DatasetPtr &result,
const int &nq,
const int &k) {
void
AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
}
}
void PrintResult(const DatasetPtr &result,
const int &nq,
const int &k) {
void
PrintResult(const kn::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
auto dists = result->array()[1];
@ -80,10 +80,10 @@ void PrintResult(const DatasetPtr &result,
TEST_F(IDMAPTest, idmap_basic) {
ASSERT_TRUE(!xb.empty());
auto conf = std::make_shared<Cfg>();
auto conf = std::make_shared<kn::Cfg>();
conf->d = dim;
conf->k = k;
conf->metric_type = METRICTYPE::L2;
conf->metric_type = kn::METRICTYPE::L2;
index_->Train(conf);
index_->Add(base_dataset, conf);
@ -97,7 +97,7 @@ TEST_F(IDMAPTest, idmap_basic) {
index_->Seal();
auto binaryset = index_->Serialize();
auto new_index = std::make_shared<IDMAP>();
auto new_index = std::make_shared<kn::IDMAP>();
new_index->Load(binaryset);
auto re_result = index_->Search(query_dataset, conf);
AssertAnns(re_result, nq, k);
@ -105,23 +105,23 @@ TEST_F(IDMAPTest, idmap_basic) {
}
TEST_F(IDMAPTest, idmap_serialize) {
auto serialize = [](const std::string &filename, BinaryPtr &bin, uint8_t *ret) {
auto serialize = [](const std::string& filename, kn::BinaryPtr& bin, uint8_t* ret) {
FileIOWriter writer(filename);
writer(static_cast<void *>(bin->data.get()), bin->size);
writer(static_cast<void*>(bin->data.get()), bin->size);
FileIOReader reader(filename);
reader(ret, bin->size);
};
auto conf = std::make_shared<Cfg>();
auto conf = std::make_shared<kn::Cfg>();
conf->d = dim;
conf->k = k;
conf->metric_type = METRICTYPE::L2;
conf->metric_type = kn::METRICTYPE::L2;
{
// serialize index
index_->Train(conf);
index_->Add(base_dataset, Config());
index_->Add(base_dataset, kn::Config());
auto re_result = index_->Search(query_dataset, conf);
AssertAnns(re_result, nq, k);
PrintResult(re_result, nq, k);
@ -151,10 +151,10 @@ TEST_F(IDMAPTest, idmap_serialize) {
TEST_F(IDMAPTest, copy_test) {
ASSERT_TRUE(!xb.empty());
auto conf = std::make_shared<Cfg>();
auto conf = std::make_shared<kn::Cfg>();
conf->d = dim;
conf->k = k;
conf->metric_type = METRICTYPE::L2;
conf->metric_type = kn::METRICTYPE::L2;
index_->Train(conf);
index_->Add(base_dataset, conf);
@ -164,7 +164,7 @@ TEST_F(IDMAPTest, copy_test) {
ASSERT_TRUE(index_->GetRawIds() != nullptr);
auto result = index_->Search(query_dataset, conf);
AssertAnns(result, nq, k);
//PrintResult(result, nq, k);
// PrintResult(result, nq, k);
{
// clone
@ -175,12 +175,12 @@ TEST_F(IDMAPTest, copy_test) {
{
// cpu to gpu
auto clone_index = CopyCpuToGpu(index_, device_id, conf);
auto clone_index = kn::cloner::CopyCpuToGpu(index_, device_id, conf);
auto clone_result = clone_index->Search(query_dataset, conf);
AssertAnns(clone_result, nq, k);
ASSERT_THROW({ std::static_pointer_cast<GPUIDMAP>(clone_index)->GetRawVectors(); },
ASSERT_THROW({ std::static_pointer_cast<kn::GPUIDMAP>(clone_index)->GetRawVectors(); },
zilliz::knowhere::KnowhereException);
ASSERT_THROW({ std::static_pointer_cast<GPUIDMAP>(clone_index)->GetRawIds(); },
ASSERT_THROW({ std::static_pointer_cast<kn::GPUIDMAP>(clone_index)->GetRawIds(); },
zilliz::knowhere::KnowhereException);
auto binary = clone_index->Serialize();
@ -193,15 +193,15 @@ TEST_F(IDMAPTest, copy_test) {
AssertAnns(clone_gpu_res, nq, k);
// gpu to cpu
auto host_index = CopyGpuToCpu(clone_index, conf);
auto host_index = kn::cloner::CopyGpuToCpu(clone_index, conf);
auto host_result = host_index->Search(query_dataset, conf);
AssertAnns(host_result, nq, k);
ASSERT_TRUE(std::static_pointer_cast<IDMAP>(host_index)->GetRawVectors() != nullptr);
ASSERT_TRUE(std::static_pointer_cast<IDMAP>(host_index)->GetRawIds() != nullptr);
ASSERT_TRUE(std::static_pointer_cast<kn::IDMAP>(host_index)->GetRawVectors() != nullptr);
ASSERT_TRUE(std::static_pointer_cast<kn::IDMAP>(host_index)->GetRawIds() != nullptr);
// gpu to gpu
auto device_index = CopyCpuToGpu(index_, device_id, conf);
auto new_device_index = std::static_pointer_cast<GPUIDMAP>(device_index)->CopyGpuToGpu(device_id, conf);
auto device_index = kn::cloner::CopyCpuToGpu(index_, device_id, conf);
auto new_device_index = std::static_pointer_cast<kn::GPUIDMAP>(device_index)->CopyGpuToGpu(device_id, conf);
auto device_result = new_device_index->Search(query_dataset, conf);
AssertAnns(device_result, nq, k);
}

View File

@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <iostream>
@ -25,21 +24,24 @@
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include "knowhere/adapter/Structure.h"
#include "knowhere/common/Exception.h"
#include "knowhere/common/Timer.h"
#include "knowhere/adapter/Structure.h"
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
#include "knowhere/index/vector_index/IndexGPUIVFPQ.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
#include "knowhere/index/vector_index/IndexGPUIVFSQ.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "utils.h"
#include "test/utils.h"
using namespace zilliz::knowhere;
using namespace zilliz::knowhere::cloner;
namespace {
namespace kn = zilliz::knowhere;
} // namespace
using ::testing::TestWithParam;
using ::testing::Values;
@ -47,23 +49,24 @@ using ::testing::Combine;
constexpr int device_id = 0;
constexpr int64_t DIM = 128;
constexpr int64_t NB = 1000000/100;
constexpr int64_t NB = 1000000 / 100;
constexpr int64_t NQ = 10;
constexpr int64_t K = 10;
IVFIndexPtr IndexFactory(const std::string &type) {
kn::IVFIndexPtr
IndexFactory(const std::string& type) {
if (type == "IVF") {
return std::make_shared<IVF>();
return std::make_shared<kn::IVF>();
} else if (type == "IVFPQ") {
return std::make_shared<IVFPQ>();
return std::make_shared<kn::IVFPQ>();
} else if (type == "GPUIVF") {
return std::make_shared<GPUIVF>(device_id);
return std::make_shared<kn::GPUIVF>(device_id);
} else if (type == "GPUIVFPQ") {
return std::make_shared<GPUIVFPQ>(device_id);
return std::make_shared<kn::GPUIVFPQ>(device_id);
} else if (type == "IVFSQ") {
return std::make_shared<IVFSQ>();
return std::make_shared<kn::IVFSQ>();
} else if (type == "GPUIVFSQ") {
return std::make_shared<GPUIVFSQ>(device_id);
return std::make_shared<kn::GPUIVFSQ>(device_id);
}
}
@ -76,24 +79,25 @@ enum class ParameterType {
class ParamGenerator {
public:
static ParamGenerator& GetInstance(){
static ParamGenerator&
GetInstance() {
static ParamGenerator instance;
return instance;
}
Config Gen(const ParameterType& type){
kn::Config
Gen(const ParameterType& type) {
if (type == ParameterType::ivf) {
auto tempconf = std::make_shared<IVFCfg>();
auto tempconf = std::make_shared<kn::IVFCfg>();
tempconf->d = DIM;
tempconf->gpu_id = device_id;
tempconf->nlist = 100;
tempconf->nprobe = 16;
tempconf->k = K;
tempconf->metric_type = METRICTYPE::L2;
tempconf->metric_type = kn::METRICTYPE::L2;
return tempconf;
}
else if (type == ParameterType::ivfpq) {
auto tempconf = std::make_shared<IVFPQCfg>();
} else if (type == ParameterType::ivfpq) {
auto tempconf = std::make_shared<kn::IVFPQCfg>();
tempconf->d = DIM;
tempconf->gpu_id = device_id;
tempconf->nlist = 100;
@ -101,70 +105,64 @@ class ParamGenerator {
tempconf->k = K;
tempconf->m = 8;
tempconf->nbits = 8;
tempconf->metric_type = METRICTYPE::L2;
tempconf->metric_type = kn::METRICTYPE::L2;
return tempconf;
}
else if (type == ParameterType::ivfsq) {
auto tempconf = std::make_shared<IVFSQCfg>();
} else if (type == ParameterType::ivfsq) {
auto tempconf = std::make_shared<kn::IVFSQCfg>();
tempconf->d = DIM;
tempconf->gpu_id = device_id;
tempconf->nlist = 100;
tempconf->nprobe = 16;
tempconf->k = K;
tempconf->nbits = 8;
tempconf->metric_type = METRICTYPE::L2;
tempconf->metric_type = kn::METRICTYPE::L2;
return tempconf;
}
}
};
class IVFTest
: public DataGen, public TestWithParam<::std::tuple<std::string, ParameterType>> {
class IVFTest : public DataGen, public TestWithParam<::std::tuple<std::string, ParameterType>> {
protected:
void SetUp() override {
void
SetUp() override {
ParameterType parameter_type;
std::tie(index_type, parameter_type) = GetParam();
//Init_with_default();
// Init_with_default();
Generate(DIM, NB, NQ);
index_ = IndexFactory(index_type);
conf = ParamGenerator::GetInstance().Gen(parameter_type);
FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*600, 2);
kn::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
}
void TearDown() override {
FaissGpuResourceMgr::GetInstance().Free();
void
TearDown() override {
kn::FaissGpuResourceMgr::GetInstance().Free();
}
protected:
std::string index_type;
Config conf;
IVFIndexPtr index_ = nullptr;
kn::Config conf;
kn::IVFIndexPtr index_ = nullptr;
};
INSTANTIATE_TEST_CASE_P(IVFParameters, IVFTest,
Values(
std::make_tuple("IVF", ParameterType::ivf),
std::make_tuple("GPUIVF", ParameterType::ivf),
// std::make_tuple("IVFPQ", ParameterType::ivfpq),
// std::make_tuple("GPUIVFPQ", ParameterType::ivfpq),
std::make_tuple("IVFSQ", ParameterType::ivfsq),
std::make_tuple("GPUIVFSQ", ParameterType::ivfsq)
)
);
Values(std::make_tuple("IVF", ParameterType::ivf),
std::make_tuple("GPUIVF", ParameterType::ivf),
// std::make_tuple("IVFPQ", ParameterType::ivfpq),
// std::make_tuple("GPUIVFPQ", ParameterType::ivfpq),
std::make_tuple("IVFSQ", ParameterType::ivfsq),
std::make_tuple("GPUIVFSQ", ParameterType::ivfsq)));
void AssertAnns(const DatasetPtr &result,
const int &nq,
const int &k) {
void
AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
}
}
void PrintResult(const DatasetPtr &result,
const int &nq,
const int &k) {
void
PrintResult(const kn::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
auto dists = result->array()[1];
@ -195,10 +193,10 @@ TEST_P(IVFTest, ivf_basic) {
EXPECT_EQ(index_->Dimension(), dim);
auto result = index_->Search(query_dataset, conf);
AssertAnns(result, nq, conf->k);
//PrintResult(result, nq, k);
// PrintResult(result, nq, k);
}
//TEST_P(IVFTest, gpu_to_cpu) {
// TEST_P(IVFTest, gpu_to_cpu) {
// if (index_type.find("GPU") == std::string::npos) { return; }
//
// // else
@ -223,9 +221,9 @@ TEST_P(IVFTest, ivf_basic) {
//}
TEST_P(IVFTest, ivf_serialize) {
auto serialize = [](const std::string &filename, BinaryPtr &bin, uint8_t *ret) {
auto serialize = [](const std::string& filename, kn::BinaryPtr& bin, uint8_t* ret) {
FileIOWriter writer(filename);
writer(static_cast<void *>(bin->data.get()), bin->size);
writer(static_cast<void*>(bin->data.get()), bin->size);
FileIOReader reader(filename);
reader(ret, bin->size);
@ -292,15 +290,14 @@ TEST_P(IVFTest, clone_test) {
EXPECT_EQ(index_->Dimension(), dim);
auto result = index_->Search(query_dataset, conf);
AssertAnns(result, nq, conf->k);
//PrintResult(result, nq, k);
// PrintResult(result, nq, k);
auto AssertEqual = [&] (DatasetPtr p1, DatasetPtr p2) {
auto AssertEqual = [&](kn::DatasetPtr p1, kn::DatasetPtr p2) {
auto ids_p1 = p1->array()[0];
auto ids_p2 = p2->array()[0];
for (int i = 0; i < nq * k; ++i) {
EXPECT_EQ(*(ids_p2->data()->GetValues<int64_t>(1, i)),
*(ids_p1->data()->GetValues<int64_t>(1, i)));
EXPECT_EQ(*(ids_p2->data()->GetValues<int64_t>(1, i)), *(ids_p1->data()->GetValues<int64_t>(1, i)));
}
};
@ -310,17 +307,19 @@ TEST_P(IVFTest, clone_test) {
auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
if (finder != support_idx_vec.cend()) {
EXPECT_NO_THROW({
auto clone_index = index_->Clone();
auto clone_result = clone_index->Search(query_dataset, conf);
//AssertAnns(result, nq, conf->k);
AssertEqual(result, clone_result);
std::cout << "inplace clone [" << index_type << "] success" << std::endl;
});
auto clone_index = index_->Clone();
auto clone_result = clone_index->Search(query_dataset, conf);
// AssertAnns(result, nq, conf->k);
AssertEqual(result, clone_result);
std::cout << "inplace clone [" << index_type << "] success" << std::endl;
});
} else {
EXPECT_THROW({
std::cout << "inplace clone [" << index_type << "] failed" << std::endl;
auto clone_index = index_->Clone();
}, KnowhereException);
EXPECT_THROW(
{
std::cout << "inplace clone [" << index_type << "] failed" << std::endl;
auto clone_index = index_->Clone();
},
kn::KnowhereException);
}
}
@ -330,16 +329,18 @@ TEST_P(IVFTest, clone_test) {
auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
if (finder != support_idx_vec.cend()) {
EXPECT_NO_THROW({
auto clone_index = CopyGpuToCpu(index_, Config());
auto clone_result = clone_index->Search(query_dataset, conf);
AssertEqual(result, clone_result);
std::cout << "clone G <=> C [" << index_type << "] success" << std::endl;
});
auto clone_index = kn::cloner::CopyGpuToCpu(index_, kn::Config());
auto clone_result = clone_index->Search(query_dataset, conf);
AssertEqual(result, clone_result);
std::cout << "clone G <=> C [" << index_type << "] success" << std::endl;
});
} else {
EXPECT_THROW({
std::cout << "clone G <=> C [" << index_type << "] failed" << std::endl;
auto clone_index = CopyGpuToCpu(index_, Config());
}, KnowhereException);
EXPECT_THROW(
{
std::cout << "clone G <=> C [" << index_type << "] failed" << std::endl;
auto clone_index = kn::cloner::CopyGpuToCpu(index_, kn::Config());
},
kn::KnowhereException);
}
}
@ -349,22 +350,24 @@ TEST_P(IVFTest, clone_test) {
auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
if (finder != support_idx_vec.cend()) {
EXPECT_NO_THROW({
auto clone_index = CopyCpuToGpu(index_, device_id, Config());
auto clone_result = clone_index->Search(query_dataset, conf);
AssertEqual(result, clone_result);
std::cout << "clone C <=> G [" << index_type << "] success" << std::endl;
});
auto clone_index = kn::cloner::CopyCpuToGpu(index_, device_id, kn::Config());
auto clone_result = clone_index->Search(query_dataset, conf);
AssertEqual(result, clone_result);
std::cout << "clone C <=> G [" << index_type << "] success" << std::endl;
});
} else {
EXPECT_THROW({
std::cout << "clone C <=> G [" << index_type << "] failed" << std::endl;
auto clone_index = CopyCpuToGpu(index_, device_id, Config());
}, KnowhereException);
EXPECT_THROW(
{
std::cout << "clone C <=> G [" << index_type << "] failed" << std::endl;
auto clone_index = kn::cloner::CopyCpuToGpu(index_, device_id, kn::Config());
},
kn::KnowhereException);
}
}
}
TEST_P(IVFTest, seal_test) {
//FaissGpuResourceMgr::GetInstance().InitDevice(device_id);
// FaissGpuResourceMgr::GetInstance().InitDevice(device_id);
std::vector<std::string> support_idx_vec{"GPUIVF", "GPUIVFSQ"};
auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
@ -385,44 +388,44 @@ TEST_P(IVFTest, seal_test) {
auto result = index_->Search(query_dataset, conf);
AssertAnns(result, nq, conf->k);
auto cpu_idx = CopyGpuToCpu(index_, Config());
auto cpu_idx = kn::cloner::CopyGpuToCpu(index_, kn::Config());
TimeRecorder tc("CopyToGpu");
CopyCpuToGpu(cpu_idx, device_id, Config());
kn::TimeRecorder tc("CopyToGpu");
kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
auto without_seal = tc.RecordSection("Without seal");
cpu_idx->Seal();
tc.RecordSection("seal cost");
CopyCpuToGpu(cpu_idx, device_id, Config());
kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
auto with_seal = tc.RecordSection("With seal");
ASSERT_GE(without_seal, with_seal);
}
class GPURESTEST
: public DataGen, public ::testing::Test {
class GPURESTEST : public DataGen, public ::testing::Test {
protected:
void SetUp() override {
void
SetUp() override {
Generate(128, 1000000, 1000);
FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*300, 2);
kn::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 300, 2);
k = 100;
elems = nq * k;
ids = (int64_t *) malloc(sizeof(int64_t) * elems);
dis = (float *) malloc(sizeof(float) * elems);
ids = (int64_t*)malloc(sizeof(int64_t) * elems);
dis = (float*)malloc(sizeof(float) * elems);
}
void TearDown() override {
void
TearDown() override {
delete ids;
delete dis;
FaissGpuResourceMgr::GetInstance().Free();
kn::FaissGpuResourceMgr::GetInstance().Free();
}
protected:
std::string index_type;
IVFIndexPtr index_ = nullptr;
kn::IVFIndexPtr index_ = nullptr;
int64_t *ids = nullptr;
float *dis = nullptr;
int64_t* ids = nullptr;
float* dis = nullptr;
int64_t elems = 0;
};
@ -433,16 +436,16 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
assert(!xb.empty());
{
index_ = std::make_shared<GPUIVF>(-1);
ASSERT_EQ(std::dynamic_pointer_cast<GPUIVF>(index_)->GetGpuDevice(), -1);
std::dynamic_pointer_cast<GPUIVF>(index_)->SetGpuDevice(device_id);
ASSERT_EQ(std::dynamic_pointer_cast<GPUIVF>(index_)->GetGpuDevice(), device_id);
index_ = std::make_shared<kn::GPUIVF>(-1);
ASSERT_EQ(std::dynamic_pointer_cast<kn::GPUIVF>(index_)->GetGpuDevice(), -1);
std::dynamic_pointer_cast<kn::GPUIVF>(index_)->SetGpuDevice(device_id);
ASSERT_EQ(std::dynamic_pointer_cast<kn::GPUIVF>(index_)->GetGpuDevice(), device_id);
auto conf = std::make_shared<IVFCfg>();
auto conf = std::make_shared<kn::IVFCfg>();
conf->nlist = 1638;
conf->d = dim;
conf->gpu_id = device_id;
conf->metric_type = METRICTYPE::L2;
conf->metric_type = kn::METRICTYPE::L2;
conf->k = k;
conf->nprobe = 1;
@ -454,7 +457,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dimension(), dim);
TimeRecorder tc("knowere GPUIVF");
kn::TimeRecorder tc("knowere GPUIVF");
for (int i = 0; i < search_count; ++i) {
index_->Search(query_dataset, conf);
if (i > search_count - 6 || i < 5)
@ -462,7 +465,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
}
tc.ElapseFromBegin("search all");
}
FaissGpuResourceMgr::GetInstance().Dump();
kn::FaissGpuResourceMgr::GetInstance().Dump();
{
// IVF-Search
@ -473,7 +476,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
device_index.train(nb, xb.data());
device_index.add(nb, xb.data());
TimeRecorder tc("ori IVF");
kn::TimeRecorder tc("ori IVF");
for (int i = 0; i < search_count; ++i) {
device_index.search(nq, xq.data(), k, dis, ids);
if (i > search_count - 6 || i < 5)
@ -481,7 +484,6 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
}
tc.ElapseFromBegin("search all");
}
}
TEST_F(GPURESTEST, gpuivfsq) {
@ -490,11 +492,11 @@ TEST_F(GPURESTEST, gpuivfsq) {
index_type = "GPUIVFSQ";
index_ = IndexFactory(index_type);
auto conf = std::make_shared<IVFSQCfg>();
auto conf = std::make_shared<kn::IVFSQCfg>();
conf->nlist = 1638;
conf->d = dim;
conf->gpu_id = device_id;
conf->metric_type = METRICTYPE::L2;
conf->metric_type = kn::METRICTYPE::L2;
conf->k = k;
conf->nbits = 8;
conf->nprobe = 1;
@ -507,11 +509,11 @@ TEST_F(GPURESTEST, gpuivfsq) {
auto result = index_->Search(query_dataset, conf);
AssertAnns(result, nq, k);
auto cpu_idx = CopyGpuToCpu(index_, Config());
auto cpu_idx = kn::cloner::CopyGpuToCpu(index_, kn::Config());
cpu_idx->Seal();
TimeRecorder tc("knowhere GPUSQ8");
auto search_idx = CopyCpuToGpu(cpu_idx, device_id, Config());
kn::TimeRecorder tc("knowhere GPUSQ8");
auto search_idx = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
tc.RecordSection("Copy to gpu");
for (int i = 0; i < search_count; ++i) {
search_idx->Search(query_dataset, conf);
@ -523,8 +525,8 @@ TEST_F(GPURESTEST, gpuivfsq) {
{
// Ori gpuivfsq Test
const char *index_description = "IVF1638,SQ8";
faiss::Index *ori_index = faiss::index_factory(dim, index_description, faiss::METRIC_L2);
const char* index_description = "IVF1638,SQ8";
faiss::Index* ori_index = faiss::index_factory(dim, index_description, faiss::METRIC_L2);
faiss::gpu::StandardGpuResources res;
auto device_index = faiss::gpu::index_cpu_to_gpu(&res, device_id, ori_index);
@ -532,7 +534,7 @@ TEST_F(GPURESTEST, gpuivfsq) {
device_index->add(nb, xb.data());
auto cpu_index = faiss::gpu::index_gpu_to_cpu(device_index);
auto idx = dynamic_cast<faiss::IndexIVF *>(cpu_index);
auto idx = dynamic_cast<faiss::IndexIVF*>(cpu_index);
if (idx != nullptr) {
idx->to_readonly();
}
@ -542,8 +544,8 @@ TEST_F(GPURESTEST, gpuivfsq) {
faiss::gpu::GpuClonerOptions option;
option.allInGpu = true;
TimeRecorder tc("ori GPUSQ8");
faiss::Index *search_idx = faiss::gpu::index_cpu_to_gpu(&res, device_id, cpu_index, &option);
kn::TimeRecorder tc("ori GPUSQ8");
faiss::Index* search_idx = faiss::gpu::index_cpu_to_gpu(&res, device_id, cpu_index, &option);
tc.RecordSection("Copy to gpu");
for (int i = 0; i < search_count; ++i) {
search_idx->search(nq, xq.data(), k, dis, ids);
@ -554,7 +556,6 @@ TEST_F(GPURESTEST, gpuivfsq) {
delete cpu_index;
delete search_idx;
}
}
TEST_F(GPURESTEST, copyandsearch) {
@ -564,11 +565,11 @@ TEST_F(GPURESTEST, copyandsearch) {
index_type = "GPUIVFSQ";
index_ = IndexFactory(index_type);
auto conf = std::make_shared<IVFSQCfg>();
auto conf = std::make_shared<kn::IVFSQCfg>();
conf->nlist = 1638;
conf->d = dim;
conf->gpu_id = device_id;
conf->metric_type = METRICTYPE::L2;
conf->metric_type = kn::METRICTYPE::L2;
conf->k = k;
conf->nbits = 8;
conf->nprobe = 1;
@ -581,32 +582,32 @@ TEST_F(GPURESTEST, copyandsearch) {
auto result = index_->Search(query_dataset, conf);
AssertAnns(result, nq, k);
auto cpu_idx = CopyGpuToCpu(index_, Config());
auto cpu_idx = kn::cloner::CopyGpuToCpu(index_, kn::Config());
cpu_idx->Seal();
auto search_idx = CopyCpuToGpu(cpu_idx, device_id, Config());
auto search_idx = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
auto search_func = [&] {
//TimeRecorder tc("search&load");
// TimeRecorder tc("search&load");
for (int i = 0; i < search_count; ++i) {
search_idx->Search(query_dataset, conf);
//if (i > search_count - 6 || i == 0)
// if (i > search_count - 6 || i == 0)
// tc.RecordSection("search once");
}
//tc.ElapseFromBegin("search finish");
// tc.ElapseFromBegin("search finish");
};
auto load_func = [&] {
//TimeRecorder tc("search&load");
// TimeRecorder tc("search&load");
for (int i = 0; i < load_count; ++i) {
CopyCpuToGpu(cpu_idx, device_id, Config());
//if (i > load_count -5 || i < 5)
//tc.RecordSection("Copy to gpu");
kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
// if (i > load_count -5 || i < 5)
// tc.RecordSection("Copy to gpu");
}
//tc.ElapseFromBegin("load finish");
// tc.ElapseFromBegin("load finish");
};
TimeRecorder tc("basic");
CopyCpuToGpu(cpu_idx, device_id, Config());
kn::TimeRecorder tc("basic");
kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
tc.RecordSection("Copy to gpu once");
search_idx->Search(query_dataset, conf);
tc.RecordSection("search once");
@ -626,11 +627,11 @@ TEST_F(GPURESTEST, TrainAndSearch) {
index_type = "GPUIVFSQ";
index_ = IndexFactory(index_type);
auto conf = std::make_shared<IVFSQCfg>();
auto conf = std::make_shared<kn::IVFSQCfg>();
conf->nlist = 1638;
conf->d = dim;
conf->gpu_id = device_id;
conf->metric_type = METRICTYPE::L2;
conf->metric_type = kn::METRICTYPE::L2;
conf->k = k;
conf->nbits = 8;
conf->nprobe = 1;
@ -641,9 +642,9 @@ TEST_F(GPURESTEST, TrainAndSearch) {
auto new_index = IndexFactory(index_type);
new_index->set_index_model(model);
new_index->Add(base_dataset, conf);
auto cpu_idx = CopyGpuToCpu(new_index, Config());
auto cpu_idx = kn::cloner::CopyGpuToCpu(new_index, kn::Config());
cpu_idx->Seal();
auto search_idx = CopyCpuToGpu(cpu_idx, device_id, Config());
auto search_idx = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
constexpr int train_count = 1;
constexpr int search_count = 5000;
@ -655,18 +656,18 @@ TEST_F(GPURESTEST, TrainAndSearch) {
test_idx->Add(base_dataset, conf);
}
};
auto search_stage = [&](VectorIndexPtr& search_idx) {
auto search_stage = [&](kn::VectorIndexPtr& search_idx) {
for (int i = 0; i < search_count; ++i) {
auto result = search_idx->Search(query_dataset, conf);
AssertAnns(result, nq, k);
}
};
//TimeRecorder tc("record");
//train_stage();
//tc.RecordSection("train cost");
//search_stage(search_idx);
//tc.RecordSection("search cost");
// TimeRecorder tc("record");
// train_stage();
// tc.RecordSection("train cost");
// search_stage(search_idx);
// tc.RecordSection("search cost");
{
// search and build parallel
@ -684,7 +685,7 @@ TEST_F(GPURESTEST, TrainAndSearch) {
}
{
// search parallel
auto search_idx_2 = CopyCpuToGpu(cpu_idx, device_id, Config());
auto search_idx_2 = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config());
std::thread search_1(search_stage, std::ref(search_idx));
std::thread search_2(search_stage, std::ref(search_idx_2));
search_1.join();
@ -692,6 +693,4 @@ TEST_F(GPURESTEST, TrainAndSearch) {
}
}
// TODO(lxj): Add exception test

View File

@ -15,13 +15,17 @@
// specific language governing permissions and limitations
// under the License.
#include "knowhere/common/config.h"
using namespace zilliz::knowhere;
namespace {
int main(){
Config cfg;
namespace kn = zilliz::knowhere;
} // namespace
int
main() {
kn::Config cfg;
cfg["size"] = size_t(199);
auto size = cfg.get_with_default("size", 123);

View File

@ -15,35 +15,36 @@
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <iostream>
#include <sstream>
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/adapter/Structure.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexKDT.h"
#include "knowhere/index/vector_index/helpers/Definitions.h"
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/adapter/Structure.h"
#include "utils.h"
#include "test/utils.h"
namespace {
using namespace zilliz::knowhere;
namespace kn = zilliz::knowhere;
} // namespace
using ::testing::TestWithParam;
using ::testing::Values;
using ::testing::Combine;
class KDTTest
: public DataGen, public ::testing::Test {
class KDTTest : public DataGen, public ::testing::Test {
protected:
void SetUp() override {
index_ = std::make_shared<CPUKDTRNG>();
void
SetUp() override {
index_ = std::make_shared<kn::CPUKDTRNG>();
auto tempconf = std::make_shared<KDTCfg>();
auto tempconf = std::make_shared<kn::KDTCfg>();
tempconf->tptnubmber = 1;
tempconf->k = 10;
conf = tempconf;
@ -52,22 +53,20 @@ class KDTTest
}
protected:
Config conf;
std::shared_ptr<CPUKDTRNG> index_ = nullptr;
kn::Config conf;
std::shared_ptr<kn::CPUKDTRNG> index_ = nullptr;
};
void AssertAnns(const DatasetPtr &result,
const int &nq,
const int &k) {
void
AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
}
}
void PrintResult(const DatasetPtr &result,
const int &nq,
const int &k) {
void
PrintResult(const kn::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
auto dists = result->array()[1];
@ -124,25 +123,25 @@ TEST_F(KDTTest, kdt_serialize) {
index_->set_preprocessor(preprocessor);
auto model = index_->Train(base_dataset, conf);
//index_->Add(base_dataset, conf);
// index_->Add(base_dataset, conf);
auto binaryset = index_->Serialize();
auto new_index = std::make_shared<CPUKDTRNG>();
auto new_index = std::make_shared<kn::CPUKDTRNG>();
new_index->Load(binaryset);
auto result = new_index->Search(query_dataset, conf);
AssertAnns(result, nq, k);
PrintResult(result, nq, k);
ASSERT_EQ(new_index->Count(), nb);
ASSERT_EQ(new_index->Dimension(), dim);
ASSERT_THROW({new_index->Clone();}, zilliz::knowhere::KnowhereException);
ASSERT_NO_THROW({new_index->Seal();});
ASSERT_THROW({ new_index->Clone(); }, zilliz::knowhere::KnowhereException);
ASSERT_NO_THROW({ new_index->Seal(); });
{
int fileno = 0;
const std::string &base_name = "/tmp/kdt_serialize_test_bin_";
const std::string& base_name = "/tmp/kdt_serialize_test_bin_";
std::vector<std::string> filename_list;
std::vector<std::pair<std::string, size_t >> meta_list;
for (auto &iter: binaryset.binary_map_) {
const std::string &filename = base_name + std::to_string(fileno);
std::vector<std::pair<std::string, size_t>> meta_list;
for (auto& iter : binaryset.binary_map_) {
const std::string& filename = base_name + std::to_string(fileno);
FileIOWriter writer(filename);
writer(iter.second->data.get(), iter.second->size);
@ -151,7 +150,7 @@ TEST_F(KDTTest, kdt_serialize) {
++fileno;
}
BinarySet load_data_list;
kn::BinarySet load_data_list;
for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) {
auto bin_size = meta_list[i].second;
FileIOReader reader(filename_list[i]);
@ -163,7 +162,7 @@ TEST_F(KDTTest, kdt_serialize) {
load_data_list.Append(meta_list[i].first, data, bin_size);
}
auto new_index = std::make_shared<CPUKDTRNG>();
auto new_index = std::make_shared<kn::CPUKDTRNG>();
new_index->Load(load_data_list);
auto result = new_index->Search(query_dataset, conf);
AssertAnns(result, nq, k);

View File

@ -15,46 +15,46 @@
// specific language governing permissions and limitations
// under the License.
#include <fstream>
#include <iostream>
#include <utils.h>
#include "index.h"
#include "knowhere/index/index.h"
#include "test/utils.h"
//#include <gperftools/profiler.h>
using namespace zilliz::knowhere;
void load_data(std::string &filename, float *&data, unsigned &num,
unsigned &dim) { // load data with sift10K pattern
void
load_data(std::string& filename, float*& data, unsigned& num,
unsigned& dim) { // load data with sift10K pattern
std::ifstream in(filename, std::ios::binary);
if (!in.is_open()) {
std::cout << "open file error" << std::endl;
exit(-1);
}
in.read((char *) &dim, 4);
in.read((char*)&dim, 4);
in.seekg(0, std::ios::end);
std::ios::pos_type ss = in.tellg();
size_t fsize = (size_t) ss;
num = (unsigned) (fsize / (dim + 1) / 4);
data = new float[(size_t) num * (size_t) dim];
size_t fsize = (size_t)ss;
num = (unsigned)(fsize / (dim + 1) / 4);
data = new float[(size_t)num * (size_t)dim];
in.seekg(0, std::ios::beg);
for (size_t i = 0; i < num; i++) {
in.seekg(4, std::ios::cur);
in.read((char *) (data + i * dim), dim * 4);
in.read((char*)(data + i * dim), dim * 4);
}
in.close();
}
void test_distance() {
void
test_distance() {
std::vector<float> xb{1, 2, 3, 4};
std::vector<float> xq{2, 2, 3, 4};
float r = calculate(xb.data(), xq.data(), 4);
std::cout << r << std::endl;
}
int main() {
int
main() {
test_distance();
BuildParams params;
@ -62,16 +62,16 @@ int main() {
params.candidate_pool_size = 100;
params.out_degree = 50;
float *data = nullptr;
long *ids = nullptr;
float* data = nullptr;
int64_t* ids = nullptr;
unsigned ntotal, dim;
std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/siftsmall/siftsmall_base.fvecs";
//std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift/sift_base.fvecs";
// std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift/sift_base.fvecs";
load_data(filename, data, ntotal, dim);
assert(data);
//float x = calculate(data + dim * 0, data + dim * 62, dim);
//std::cout << x << std::endl;
// float x = calculate(data + dim * 0, data + dim * 62, dim);
// std::cout << x << std::endl;
NsgIndex index(dim, ntotal);
@ -81,24 +81,23 @@ int main() {
std::chrono::duration<double> diff = e - s;
std::cout << "indexing time: " << diff.count() << "\n";
int k = 10;
int nq = 1000;
SearchParams s_params;
s_params.search_length = 50;
auto dist = new float[nq*k];
auto ids_b = new long[nq*k];
auto dist = new float[nq * k];
auto ids_b = new int64_t[nq * k];
s = std::chrono::high_resolution_clock::now();
//ProfilerStart("xx.prof");
// ProfilerStart("xx.prof");
index.Search(data, nq, dim, k, dist, ids_b, s_params);
//ProfilerStop();
// ProfilerStop();
e = std::chrono::high_resolution_clock::now();
diff = e - s;
std::cout << "search time: " << diff.count() << "\n";
for (int i = 0; i < k; ++i) {
std::cout << "id " << ids_b[i] << std::endl;
//std::cout << "dist " << dist[i] << std::endl;
// std::cout << "dist " << dist[i] << std::endl;
}
delete[] dist;
@ -106,5 +105,3 @@ int main() {
return 0;
}

View File

@ -15,35 +15,39 @@
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <memory>
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/FaissBaseIndex.h"
#include "knowhere/index/vector_index/IndexNSG.h"
#include "knowhere/index/vector_index/nsg/NSGIO.h"
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
#include "knowhere/index/vector_index/nsg/NSGIO.h"
#include "../utils.h"
#include "test/utils.h"
namespace {
namespace kn = zilliz::knowhere;
} // namespace
using namespace zilliz::knowhere;
using ::testing::TestWithParam;
using ::testing::Values;
using ::testing::Combine;
constexpr int64_t DEVICE_ID = 1;
class NSGInterfaceTest : public DataGen, public ::testing::Test {
class NSGInterfaceTest : public DataGen, public ::testing::Test {
protected:
void SetUp() override {
//Init_with_default();
FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, 1024*1024*200, 1024*1024*600, 2);
void
SetUp() override {
// Init_with_default();
kn::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
Generate(256, 1000000, 1);
index_ = std::make_shared<NSG>();
index_ = std::make_shared<kn::NSG>();
auto tmp_conf = std::make_shared<NSGCfg>();
auto tmp_conf = std::make_shared<kn::NSGCfg>();
tmp_conf->gpu_id = DEVICE_ID;
tmp_conf->knng = 100;
tmp_conf->nprobe = 32;
@ -51,28 +55,28 @@ constexpr int64_t DEVICE_ID = 1;
tmp_conf->search_length = 60;
tmp_conf->out_degree = 70;
tmp_conf->candidate_pool_size = 500;
tmp_conf->metric_type = METRICTYPE::L2;
tmp_conf->metric_type = kn::METRICTYPE::L2;
train_conf = tmp_conf;
auto tmp2_conf = std::make_shared<NSGCfg>();
auto tmp2_conf = std::make_shared<kn::NSGCfg>();
tmp2_conf->k = k;
tmp2_conf->search_length = 30;
search_conf = tmp2_conf;
}
void TearDown() override {
FaissGpuResourceMgr::GetInstance().Free();
void
TearDown() override {
kn::FaissGpuResourceMgr::GetInstance().Free();
}
protected:
std::shared_ptr<NSG> index_;
Config train_conf;
Config search_conf;
std::shared_ptr<kn::NSG> index_;
kn::Config train_conf;
kn::Config search_conf;
};
void AssertAnns(const DatasetPtr &result,
const int &nq,
const int &k) {
void
AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
@ -87,33 +91,32 @@ TEST_F(NSGInterfaceTest, basic_test) {
AssertAnns(result, nq, k);
auto binaryset = index_->Serialize();
auto new_index = std::make_shared<NSG>();
auto new_index = std::make_shared<kn::NSG>();
new_index->Load(binaryset);
auto new_result = new_index->Search(query_dataset, search_conf);
AssertAnns(result, nq, k);
ASSERT_EQ(index_->Count(), nb);
ASSERT_EQ(index_->Dimension(), dim);
ASSERT_THROW({index_->Clone();}, zilliz::knowhere::KnowhereException);
ASSERT_THROW({ index_->Clone(); }, zilliz::knowhere::KnowhereException);
ASSERT_NO_THROW({
index_->Add(base_dataset, Config());
index_->Add(base_dataset, kn::Config());
index_->Seal();
});
{
//std::cout << "k = 1" << std::endl;
//new_index->Search(GenQuery(1), Config::object{{"k", 1}});
//new_index->Search(GenQuery(10), Config::object{{"k", 1}});
//new_index->Search(GenQuery(100), Config::object{{"k", 1}});
//new_index->Search(GenQuery(1000), Config::object{{"k", 1}});
//new_index->Search(GenQuery(10000), Config::object{{"k", 1}});
// std::cout << "k = 1" << std::endl;
// new_index->Search(GenQuery(1), Config::object{{"k", 1}});
// new_index->Search(GenQuery(10), Config::object{{"k", 1}});
// new_index->Search(GenQuery(100), Config::object{{"k", 1}});
// new_index->Search(GenQuery(1000), Config::object{{"k", 1}});
// new_index->Search(GenQuery(10000), Config::object{{"k", 1}});
//std::cout << "k = 5" << std::endl;
//new_index->Search(GenQuery(1), Config::object{{"k", 5}});
//new_index->Search(GenQuery(20), Config::object{{"k", 5}});
//new_index->Search(GenQuery(100), Config::object{{"k", 5}});
//new_index->Search(GenQuery(300), Config::object{{"k", 5}});
//new_index->Search(GenQuery(500), Config::object{{"k", 5}});
// std::cout << "k = 5" << std::endl;
// new_index->Search(GenQuery(1), Config::object{{"k", 5}});
// new_index->Search(GenQuery(20), Config::object{{"k", 5}});
// new_index->Search(GenQuery(100), Config::object{{"k", 5}});
// new_index->Search(GenQuery(300), Config::object{{"k", 5}});
// new_index->Search(GenQuery(500), Config::object{{"k", 5}});
}
}

View File

@ -15,24 +15,35 @@
// specific language governing permissions and limitations
// under the License.
#include "test/utils.h"
#include "utils.h"
#include <memory>
#include <string>
#include <utility>
INITIALIZE_EASYLOGGINGPP
void InitLog() {
namespace {
namespace kn = zilliz::knowhere;
} // namespace
void
InitLog() {
el::Configurations defaultConf;
defaultConf.setToDefault();
defaultConf.set(el::Level::Debug,
el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)");
defaultConf.set(el::Level::Debug, el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)");
el::Loggers::reconfigureLogger("default", defaultConf);
}
void DataGen::Init_with_default() {
void
DataGen::Init_with_default() {
Generate(dim, nb, nq);
}
void DataGen::Generate(const int &dim, const int &nb, const int &nq) {
void
DataGen::Generate(const int& dim, const int& nb, const int& nq) {
this->nb = nb;
this->nq = nq;
this->dim = dim;
@ -43,9 +54,10 @@ void DataGen::Generate(const int &dim, const int &nb, const int &nq) {
base_dataset = generate_dataset(nb, dim, xb.data(), ids.data());
query_dataset = generate_query_dataset(nq, dim, xq.data());
}
zilliz::knowhere::DatasetPtr DataGen::GenQuery(const int &nq) {
zilliz::knowhere::DatasetPtr
DataGen::GenQuery(const int& nq) {
xq.resize(nq * dim);
for (int i = 0; i < nq * dim; ++i) {
xq[i] = xb[i];
@ -53,37 +65,28 @@ zilliz::knowhere::DatasetPtr DataGen::GenQuery(const int &nq) {
return generate_query_dataset(nq, dim, xq.data());
}
void GenAll(const int64_t dim,
const int64_t &nb,
std::vector<float> &xb,
std::vector<int64_t> &ids,
const int64_t &nq,
std::vector<float> &xq) {
void
GenAll(const int64_t dim, const int64_t& nb, std::vector<float>& xb, std::vector<int64_t>& ids, const int64_t& nq,
std::vector<float>& xq) {
xb.resize(nb * dim);
xq.resize(nq * dim);
ids.resize(nb);
GenAll(dim, nb, xb.data(), ids.data(), nq, xq.data());
}
void GenAll(const int64_t &dim,
const int64_t &nb,
float *xb,
int64_t *ids,
const int64_t &nq,
float *xq) {
void
GenAll(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids, const int64_t& nq, float* xq) {
GenBase(dim, nb, xb, ids);
for (int64_t i = 0; i < nq * dim; ++i) {
xq[i] = xb[i];
}
}
void GenBase(const int64_t &dim,
const int64_t &nb,
float *xb,
int64_t *ids) {
void
GenBase(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids) {
for (auto i = 0; i < nb; ++i) {
for (auto j = 0; j < dim; ++j) {
//p_data[i * d + j] = float(base + i);
// p_data[i * d + j] = float(base + i);
xb[i * dim + j] = drand48();
}
xb[dim * i] += i / 1000.;
@ -91,7 +94,7 @@ void GenBase(const int64_t &dim,
}
}
FileIOReader::FileIOReader(const std::string &fname) {
FileIOReader::FileIOReader(const std::string& fname) {
name = fname;
fs = std::fstream(name, std::ios::in | std::ios::binary);
}
@ -100,12 +103,13 @@ FileIOReader::~FileIOReader() {
fs.close();
}
size_t FileIOReader::operator()(void *ptr, size_t size) {
fs.read(reinterpret_cast<char *>(ptr), size);
size_t
FileIOReader::operator()(void* ptr, size_t size) {
fs.read(reinterpret_cast<char*>(ptr), size);
return size;
}
FileIOWriter::FileIOWriter(const std::string &fname) {
FileIOWriter::FileIOWriter(const std::string& fname) {
name = fname;
fs = std::fstream(name, std::ios::out | std::ios::binary);
}
@ -114,39 +118,37 @@ FileIOWriter::~FileIOWriter() {
fs.close();
}
size_t FileIOWriter::operator()(void *ptr, size_t size) {
fs.write(reinterpret_cast<char *>(ptr), size);
size_t
FileIOWriter::operator()(void* ptr, size_t size) {
fs.write(reinterpret_cast<char*>(ptr), size);
return size;
}
using namespace zilliz::knowhere;
DatasetPtr
generate_dataset(int64_t nb, int64_t dim, float *xb, long *ids) {
kn::DatasetPtr
generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids) {
std::vector<int64_t> shape{nb, dim};
auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape);
std::vector<TensorPtr> tensors{tensor};
std::vector<FieldPtr> tensor_fields{ConstructFloatField("data")};
auto tensor_schema = std::make_shared<Schema>(tensor_fields);
auto tensor = kn::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape);
std::vector<kn::TensorPtr> tensors{tensor};
std::vector<kn::FieldPtr> tensor_fields{kn::ConstructFloatField("data")};
auto tensor_schema = std::make_shared<kn::Schema>(tensor_fields);
auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t));
std::vector<ArrayPtr> arrays{id_array};
std::vector<FieldPtr> array_fields{ConstructInt64Field("id")};
auto array_schema = std::make_shared<Schema>(tensor_fields);
auto id_array = kn::ConstructInt64Array((uint8_t*)ids, nb * sizeof(int64_t));
std::vector<kn::ArrayPtr> arrays{id_array};
std::vector<kn::FieldPtr> array_fields{kn::ConstructInt64Field("id")};
auto array_schema = std::make_shared<kn::Schema>(tensor_fields);
auto dataset = std::make_shared<Dataset>(std::move(arrays), array_schema,
std::move(tensors), tensor_schema);
auto dataset = std::make_shared<kn::Dataset>(std::move(arrays), array_schema, std::move(tensors), tensor_schema);
return dataset;
}
DatasetPtr
generate_query_dataset(int64_t nb, int64_t dim, float *xb) {
kn::DatasetPtr
generate_query_dataset(int64_t nb, int64_t dim, float* xb) {
std::vector<int64_t> shape{nb, dim};
auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape);
std::vector<TensorPtr> tensors{tensor};
std::vector<FieldPtr> tensor_fields{ConstructFloatField("data")};
auto tensor_schema = std::make_shared<Schema>(tensor_fields);
auto tensor = kn::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape);
std::vector<kn::TensorPtr> tensors{tensor};
std::vector<kn::FieldPtr> tensor_fields{kn::ConstructFloatField("data")};
auto tensor_schema = std::make_shared<kn::Schema>(tensor_fields);
auto dataset = std::make_shared<Dataset>(std::move(tensors), tensor_schema);
auto dataset = std::make_shared<kn::Dataset>(std::move(tensors), tensor_schema);
return dataset;
}

View File

@ -15,24 +15,27 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <vector>
#include <cstdlib>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <string>
#include <vector>
#include "knowhere/adapter/Structure.h"
#include "knowhere/common/Log.h"
class DataGen {
class DataGen {
protected:
void Init_with_default();
void
Init_with_default();
void Generate(const int &dim, const int &nb, const int &nq);
void
Generate(const int& dim, const int& nb, const int& nq);
zilliz::knowhere::DatasetPtr GenQuery(const int&nq);
zilliz::knowhere::DatasetPtr
GenQuery(const int& nq);
protected:
int nb = 10000;
@ -46,49 +49,41 @@ class DataGen {
zilliz::knowhere::DatasetPtr query_dataset = nullptr;
};
extern void
GenAll(const int64_t dim, const int64_t& nb, std::vector<float>& xb, std::vector<int64_t>& ids, const int64_t& nq,
std::vector<float>& xq);
extern void GenAll(const int64_t dim,
const int64_t &nb,
std::vector<float> &xb,
std::vector<int64_t> &ids,
const int64_t &nq,
std::vector<float> &xq);
extern void
GenAll(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids, const int64_t& nq, float* xq);
extern void GenAll(const int64_t &dim,
const int64_t &nb,
float *xb,
int64_t *ids,
const int64_t &nq,
float *xq);
extern void
GenBase(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids);
extern void GenBase(const int64_t &dim,
const int64_t &nb,
float *xb,
int64_t *ids);
extern void InitLog();
extern void
InitLog();
zilliz::knowhere::DatasetPtr
generate_dataset(int64_t nb, int64_t dim, float *xb, long *ids);
generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids);
zilliz::knowhere::DatasetPtr
generate_query_dataset(int64_t nb, int64_t dim, float *xb);
generate_query_dataset(int64_t nb, int64_t dim, float* xb);
struct FileIOWriter {
std::fstream fs;
std::string name;
FileIOWriter(const std::string &fname);
explicit FileIOWriter(const std::string& fname);
~FileIOWriter();
size_t operator()(void *ptr, size_t size);
size_t
operator()(void* ptr, size_t size);
};
struct FileIOReader {
std::fstream fs;
std::string name;
FileIOReader(const std::string &fname);
explicit FileIOReader(const std::string& fname);
~FileIOReader();
size_t operator()(void *ptr, size_t size);
size_t
operator()(void* ptr, size_t size);
};