fix hnsw code format (#1987)

Signed-off-by: Zhonglei Song <szl_84@sina.com>
pull/1979/head
smiling849 2020-04-19 12:00:45 +08:00 committed by GitHub
parent e8f759c22b
commit 50ddc49332
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 562 additions and 581 deletions

View File

@ -5,166 +5,160 @@
#include <algorithm>
namespace hnswlib {
template<typename dist_t>
class BruteforceSearch : public AlgorithmInterface<dist_t> {
public:
BruteforceSearch(SpaceInterface <dist_t> *s) {
}
BruteforceSearch(SpaceInterface<dist_t> *s, const std::string &location) {
loadIndex(location, s);
}
template<typename dist_t>
class BruteforceSearch : public AlgorithmInterface<dist_t> {
public:
BruteforceSearch(SpaceInterface <dist_t> *s) {
BruteforceSearch(SpaceInterface <dist_t> *s, size_t maxElements) {
maxelements_ = maxElements;
data_size_ = s->get_data_size();
fstdistfunc_ = s->get_dist_func();
dist_func_param_ = s->get_dist_func_param();
size_per_element_ = data_size_ + sizeof(labeltype);
data_ = (char *) malloc(maxElements * size_per_element_);
if (data_ == nullptr)
std::runtime_error("Not enough memory: BruteforceSearch failed to allocate data");
cur_element_count = 0;
}
}
BruteforceSearch(SpaceInterface<dist_t> *s, const std::string &location) {
loadIndex(location, s);
}
~BruteforceSearch() {
free(data_);
}
BruteforceSearch(SpaceInterface <dist_t> *s, size_t maxElements) {
maxelements_ = maxElements;
data_size_ = s->get_data_size();
fstdistfunc_ = s->get_dist_func();
dist_func_param_ = s->get_dist_func_param();
size_per_element_ = data_size_ + sizeof(labeltype);
data_ = (char *) malloc(maxElements * size_per_element_);
if (data_ == nullptr)
std::runtime_error("Not enough memory: BruteforceSearch failed to allocate data");
cur_element_count = 0;
}
char *data_;
size_t maxelements_;
size_t cur_element_count;
size_t size_per_element_;
~BruteforceSearch() {
free(data_);
}
size_t data_size_;
DISTFUNC <dist_t> fstdistfunc_;
void *dist_func_param_;
std::mutex index_lock;
char *data_;
size_t maxelements_;
size_t cur_element_count;
size_t size_per_element_;
std::unordered_map<labeltype,size_t > dict_external_to_internal;
size_t data_size_;
DISTFUNC <dist_t> fstdistfunc_;
void *dist_func_param_;
std::mutex index_lock;
void addPoint(const void *datapoint, labeltype label) {
std::unordered_map<labeltype,size_t > dict_external_to_internal;
int idx;
{
std::unique_lock<std::mutex> lock(index_lock);
void addPoint(const void *datapoint, labeltype label) {
int idx;
{
std::unique_lock<std::mutex> lock(index_lock);
auto search=dict_external_to_internal.find(label);
if (search != dict_external_to_internal.end()) {
idx=search->second;
}
else{
if (cur_element_count >= maxelements_) {
throw std::runtime_error("The number of elements exceeds the specified limit\n");
}
idx=cur_element_count;
dict_external_to_internal[label] = idx;
cur_element_count++;
auto search=dict_external_to_internal.find(label);
if (search != dict_external_to_internal.end()) {
idx=search->second;
}
else{
if (cur_element_count >= maxelements_) {
throw std::runtime_error("The number of elements exceeds the specified limit\n");
}
idx=cur_element_count;
dict_external_to_internal[label] = idx;
cur_element_count++;
}
memcpy(data_ + size_per_element_ * idx + data_size_, &label, sizeof(labeltype));
memcpy(data_ + size_per_element_ * idx, datapoint, data_size_);
};
void removePoint(labeltype cur_external) {
size_t cur_c=dict_external_to_internal[cur_external];
dict_external_to_internal.erase(cur_external);
labeltype label=*((labeltype*)(data_ + size_per_element_ * (cur_element_count-1) + data_size_));
dict_external_to_internal[label]=cur_c;
memcpy(data_ + size_per_element_ * cur_c,
data_ + size_per_element_ * (cur_element_count-1),
data_size_+sizeof(labeltype));
cur_element_count--;
}
memcpy(data_ + size_per_element_ * idx + data_size_, &label, sizeof(labeltype));
memcpy(data_ + size_per_element_ * idx, datapoint, data_size_);
std::priority_queue<std::pair<dist_t, labeltype >>
searchKnn(const void *query_data, size_t k) const {
std::priority_queue<std::pair<dist_t, labeltype >> topResults;
if (cur_element_count == 0) return topResults;
for (int i = 0; i < k; i++) {
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
data_size_))));
}
dist_t lastdist = topResults.top().first;
for (int i = k; i < cur_element_count; i++) {
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
if (dist <= lastdist) {
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
data_size_))));
if (topResults.size() > k)
topResults.pop();
lastdist = topResults.top().first;
}
}
return topResults;
};
template <typename Comp>
std::vector<std::pair<dist_t, labeltype>>
searchKnn(const void* query_data, size_t k, Comp comp) {
std::vector<std::pair<dist_t, labeltype>> result;
if (cur_element_count == 0) return result;
auto ret = searchKnn(query_data, k);
while (!ret.empty()) {
result.push_back(ret.top());
ret.pop();
}
std::sort(result.begin(), result.end(), comp);
return result;
}
void saveIndex(const std::string &location) {
std::ofstream output(location, std::ios::binary);
std::streampos position;
writeBinaryPOD(output, maxelements_);
writeBinaryPOD(output, size_per_element_);
writeBinaryPOD(output, cur_element_count);
output.write(data_, maxelements_ * size_per_element_);
output.close();
}
void loadIndex(const std::string &location, SpaceInterface<dist_t> *s) {
std::ifstream input(location, std::ios::binary);
std::streampos position;
readBinaryPOD(input, maxelements_);
readBinaryPOD(input, size_per_element_);
readBinaryPOD(input, cur_element_count);
data_size_ = s->get_data_size();
fstdistfunc_ = s->get_dist_func();
dist_func_param_ = s->get_dist_func_param();
size_per_element_ = data_size_ + sizeof(labeltype);
data_ = (char *) malloc(maxelements_ * size_per_element_);
if (data_ == nullptr)
std::runtime_error("Not enough memory: loadIndex failed to allocate data");
input.read(data_, maxelements_ * size_per_element_);
input.close();
}
};
void removePoint(labeltype cur_external) {
size_t cur_c=dict_external_to_internal[cur_external];
dict_external_to_internal.erase(cur_external);
labeltype label=*((labeltype*)(data_ + size_per_element_ * (cur_element_count-1) + data_size_));
dict_external_to_internal[label]=cur_c;
memcpy(data_ + size_per_element_ * cur_c,
data_ + size_per_element_ * (cur_element_count-1),
data_size_+sizeof(labeltype));
cur_element_count--;
}
std::priority_queue<std::pair<dist_t, labeltype >>
searchKnn(const void *query_data, size_t k) const {
std::priority_queue<std::pair<dist_t, labeltype >> topResults;
if (cur_element_count == 0) return topResults;
for (int i = 0; i < k; i++) {
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
data_size_))));
}
dist_t lastdist = topResults.top().first;
for (int i = k; i < cur_element_count; i++) {
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
if (dist <= lastdist) {
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
data_size_))));
if (topResults.size() > k)
topResults.pop();
lastdist = topResults.top().first;
}
}
return topResults;
};
template <typename Comp>
std::vector<std::pair<dist_t, labeltype>>
searchKnn(const void* query_data, size_t k, Comp comp) {
std::vector<std::pair<dist_t, labeltype>> result;
if (cur_element_count == 0) return result;
auto ret = searchKnn(query_data, k);
while (!ret.empty()) {
result.push_back(ret.top());
ret.pop();
}
std::sort(result.begin(), result.end(), comp);
return result;
}
void saveIndex(const std::string &location) {
std::ofstream output(location, std::ios::binary);
std::streampos position;
writeBinaryPOD(output, maxelements_);
writeBinaryPOD(output, size_per_element_);
writeBinaryPOD(output, cur_element_count);
output.write(data_, maxelements_ * size_per_element_);
output.close();
}
void loadIndex(const std::string &location, SpaceInterface<dist_t> *s) {
std::ifstream input(location, std::ios::binary);
std::streampos position;
readBinaryPOD(input, maxelements_);
readBinaryPOD(input, size_per_element_);
readBinaryPOD(input, cur_element_count);
data_size_ = s->get_data_size();
fstdistfunc_ = s->get_dist_func();
dist_func_param_ = s->get_dist_func_param();
size_per_element_ = data_size_ + sizeof(labeltype);
data_ = (char *) malloc(maxelements_ * size_per_element_);
if (data_ == nullptr)
std::runtime_error("Not enough memory: loadIndex failed to allocate data");
input.read(data_, maxelements_ * size_per_element_);
input.close();
}
};
}

View File

@ -16,8 +16,7 @@ typedef unsigned int linklistsizeint;
template<typename dist_t>
class HierarchicalNSW : public AlgorithmInterface<dist_t> {
public:
public:
HierarchicalNSW(SpaceInterface<dist_t> *s) {
}

View File

@ -89,8 +89,6 @@ namespace hnswlib {
virtual ~AlgorithmInterface(){
}
};
}
#include "space_l2.h"

View File

@ -3,246 +3,243 @@
namespace hnswlib {
static float
InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
float res = 0;
for (unsigned i = 0; i < qty; i++) {
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
}
return (1.0f - res);
static float
InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
float res = 0;
for (unsigned i = 0; i < qty; i++) {
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
}
return (1.0f - res);
}
#if defined(USE_AVX)
// Favor using AVX if available.
static float
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
static float
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
size_t qty16 = qty / 16;
size_t qty4 = qty / 4;
size_t qty16 = qty / 16;
size_t qty4 = qty / 4;
const float *pEnd1 = pVect1 + 16 * qty16;
const float *pEnd2 = pVect1 + 4 * qty4;
const float *pEnd1 = pVect1 + 16 * qty16;
const float *pEnd2 = pVect1 + 4 * qty4;
__m256 sum256 = _mm256_set1_ps(0);
__m256 sum256 = _mm256_set1_ps(0);
while (pVect1 < pEnd1) {
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
while (pVect1 < pEnd1) {
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
__m256 v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
__m256 v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
__m256 v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
__m256 v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
}
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
}
__m128 v1, v2;
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
__m128 v1, v2;
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
while (pVect1 < pEnd2) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
while (pVect1 < pEnd2) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
return 1.0f - sum;
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
return 1.0f - sum;
}
#elif defined(USE_SSE)
static float
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
static float
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
size_t qty16 = qty / 16;
size_t qty4 = qty / 4;
size_t qty16 = qty / 16;
size_t qty4 = qty / 4;
const float *pEnd1 = pVect1 + 16 * qty16;
const float *pEnd2 = pVect1 + 4 * qty4;
const float *pEnd1 = pVect1 + 16 * qty16;
const float *pEnd2 = pVect1 + 4 * qty4;
__m128 v1, v2;
__m128 sum_prod = _mm_set1_ps(0);
__m128 v1, v2;
__m128 sum_prod = _mm_set1_ps(0);
while (pVect1 < pEnd1) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
while (pVect1 < pEnd1) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
while (pVect1 < pEnd2) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
return 1.0f - sum;
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
while (pVect1 < pEnd2) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
return 1.0f - sum;
}
#endif
#if defined(USE_AVX)
static float
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
static float
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
size_t qty16 = qty / 16;
size_t qty16 = qty / 16;
const float *pEnd1 = pVect1 + 16 * qty16;
const float *pEnd1 = pVect1 + 16 * qty16;
__m256 sum256 = _mm256_set1_ps(0);
__m256 sum256 = _mm256_set1_ps(0);
while (pVect1 < pEnd1) {
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
while (pVect1 < pEnd1) {
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
__m256 v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
__m256 v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
__m256 v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
__m256 v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
}
_mm256_store_ps(TmpRes, sum256);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
return 1.0f - sum;
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
}
_mm256_store_ps(TmpRes, sum256);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
return 1.0f - sum;
}
#elif defined(USE_SSE)
static float
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
static float
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
size_t qty16 = qty / 16;
size_t qty16 = qty / 16;
const float *pEnd1 = pVect1 + 16 * qty16;
const float *pEnd1 = pVect1 + 16 * qty16;
__m128 v1, v2;
__m128 sum_prod = _mm_set1_ps(0);
__m128 v1, v2;
__m128 sum_prod = _mm_set1_ps(0);
while (pVect1 < pEnd1) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
while (pVect1 < pEnd1) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
return 1.0f - sum;
return 1.0f - sum;
}
#endif
class InnerProductSpace : public SpaceInterface<float> {
DISTFUNC<float> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
InnerProductSpace(size_t dim) {
fstdistfunc_ = InnerProduct;
#if defined(USE_AVX) || defined(USE_SSE)
if (dim % 4 == 0)
fstdistfunc_ = InnerProductSIMD4Ext;
if (dim % 16 == 0)
fstdistfunc_ = InnerProductSIMD16Ext;
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
}
#endif
size_t get_data_size() {
return data_size_;
}
class InnerProductSpace : public SpaceInterface<float> {
DISTFUNC<float> get_dist_func() {
return fstdistfunc_;
}
DISTFUNC<float> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
InnerProductSpace(size_t dim) {
fstdistfunc_ = InnerProduct;
#if defined(USE_AVX) || defined(USE_SSE)
if (dim % 4 == 0)
fstdistfunc_ = InnerProductSIMD4Ext;
if (dim % 16 == 0)
fstdistfunc_ = InnerProductSIMD16Ext;
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
}
size_t get_data_size() {
return data_size_;
}
DISTFUNC<float> get_dist_func() {
return fstdistfunc_;
}
void *get_dist_func_param() {
return &dim_;
}
void *get_dist_func_param() {
return &dim_;
}
~InnerProductSpace() {}
};
};
}

View File

@ -3,242 +3,234 @@
namespace hnswlib {
static float
L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) {
//return *((float *)pVect2);
size_t qty = *((size_t *) qty_ptr);
float res = 0;
for (unsigned i = 0; i < qty; i++) {
float t = ((float *) pVect1)[i] - ((float *) pVect2)[i];
res += t * t;
}
return (res);
static float
L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) {
//return *((float *)pVect2);
size_t qty = *((size_t *) qty_ptr);
float res = 0;
for (unsigned i = 0; i < qty; i++) {
float t = ((float *) pVect1)[i] - ((float *) pVect2)[i];
res += t * t;
}
return (res);
}
#if defined(USE_AVX)
// Favor using AVX if available.
static float
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
float PORTABLE_ALIGN32 TmpRes[8];
size_t qty16 = qty >> 4;
// Favor using AVX if available.
static float
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
float PORTABLE_ALIGN32 TmpRes[8];
size_t qty16 = qty >> 4;
const float *pEnd1 = pVect1 + (qty16 << 4);
const float *pEnd1 = pVect1 + (qty16 << 4);
__m256 diff, v1, v2;
__m256 sum = _mm256_set1_ps(0);
__m256 diff, v1, v2;
__m256 sum = _mm256_set1_ps(0);
while (pVect1 < pEnd1) {
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
diff = _mm256_sub_ps(v1, v2);
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
while (pVect1 < pEnd1) {
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
diff = _mm256_sub_ps(v1, v2);
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
diff = _mm256_sub_ps(v1, v2);
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
}
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
diff = _mm256_sub_ps(v1, v2);
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
}
_mm256_store_ps(TmpRes, sum);
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
_mm256_store_ps(TmpRes, sum);
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
return (res);
return (res);
}
#elif defined(USE_SSE)
static float
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
float PORTABLE_ALIGN32 TmpRes[8];
// size_t qty4 = qty >> 2;
size_t qty16 = qty >> 4;
static float
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
float PORTABLE_ALIGN32 TmpRes[8];
// size_t qty4 = qty >> 2;
size_t qty16 = qty >> 4;
const float *pEnd1 = pVect1 + (qty16 << 4);
// const float* pEnd2 = pVect1 + (qty4 << 2);
// const float* pEnd3 = pVect1 + qty;
const float *pEnd1 = pVect1 + (qty16 << 4);
// const float* pEnd2 = pVect1 + (qty4 << 2);
// const float* pEnd3 = pVect1 + qty;
__m128 diff, v1, v2;
__m128 sum = _mm_set1_ps(0);
__m128 diff, v1, v2;
__m128 sum = _mm_set1_ps(0);
while (pVect1 < pEnd1) {
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
while (pVect1 < pEnd1) {
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
}
_mm_store_ps(TmpRes, sum);
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
return (res);
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
}
_mm_store_ps(TmpRes, sum);
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
return (res);
}
#endif
#ifdef USE_SSE
static float
L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
static float
L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
size_t qty = *((size_t *) qty_ptr);
// size_t qty4 = qty >> 2;
size_t qty16 = qty >> 2;
// size_t qty4 = qty >> 2;
size_t qty16 = qty >> 2;
const float *pEnd1 = pVect1 + (qty16 << 2);
const float *pEnd1 = pVect1 + (qty16 << 2);
__m128 diff, v1, v2;
__m128 sum = _mm_set1_ps(0);
__m128 diff, v1, v2;
__m128 sum = _mm_set1_ps(0);
while (pVect1 < pEnd1) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
}
_mm_store_ps(TmpRes, sum);
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
return (res);
while (pVect1 < pEnd1) {
v1 = _mm_loadu_ps(pVect1);
pVect1 += 4;
v2 = _mm_loadu_ps(pVect2);
pVect2 += 4;
diff = _mm_sub_ps(v1, v2);
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
}
_mm_store_ps(TmpRes, sum);
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
return (res);
}
#endif
class L2Space : public SpaceInterface<float> {
DISTFUNC<float> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
L2Space(size_t dim) {
fstdistfunc_ = L2Sqr;
#if defined(USE_SSE) || defined(USE_AVX)
if (dim % 4 == 0)
fstdistfunc_ = L2SqrSIMD4Ext;
if (dim % 16 == 0)
fstdistfunc_ = L2SqrSIMD16Ext;
/*else{
throw runtime_error("Data type not supported!");
}*/
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
}
size_t get_data_size() {
return data_size_;
}
DISTFUNC<float> get_dist_func() {
return fstdistfunc_;
}
void *get_dist_func_param() {
return &dim_;
}
~L2Space() {}
};
static int
L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
int res = 0;
unsigned char *a = (unsigned char *) pVect1;
unsigned char *b = (unsigned char *) pVect2;
/*for (int i = 0; i < qty; i++) {
int t = int((a)[i]) - int((b)[i]);
res += t*t;
class L2Space : public SpaceInterface<float> {
DISTFUNC<float> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
L2Space(size_t dim) {
fstdistfunc_ = L2Sqr;
#if defined(USE_SSE) || defined(USE_AVX)
if (dim % 4 == 0)
fstdistfunc_ = L2SqrSIMD4Ext;
if (dim % 16 == 0)
fstdistfunc_ = L2SqrSIMD16Ext;
/*else{
throw runtime_error("Data type not supported!");
}*/
qty = qty >> 2;
for (size_t i = 0; i < qty; i++) {
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
}
return (res);
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
}
class L2SpaceI : public SpaceInterface<int> {
size_t get_data_size() {
return data_size_;
}
DISTFUNC<int> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
L2SpaceI(size_t dim) {
fstdistfunc_ = L2SqrI;
dim_ = dim;
data_size_ = dim * sizeof(unsigned char);
}
DISTFUNC<float> get_dist_func() {
return fstdistfunc_;
}
size_t get_data_size() {
return data_size_;
}
void *get_dist_func_param() {
return &dim_;
}
DISTFUNC<int> get_dist_func() {
return fstdistfunc_;
}
~L2Space() {}
};
void *get_dist_func_param() {
return &dim_;
}
static int
L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
int res = 0;
unsigned char *a = (unsigned char *) pVect1;
unsigned char *b = (unsigned char *) pVect2;
/*for (int i = 0; i < qty; i++) {
int t = int((a)[i]) - int((b)[i]);
res += t*t;
}*/
~L2SpaceI() {}
};
qty = qty >> 2;
for (size_t i = 0; i < qty; i++) {
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
}
return (res);
}
class L2SpaceI : public SpaceInterface<int> {
DISTFUNC<int> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
L2SpaceI(size_t dim) {
fstdistfunc_ = L2SqrI;
dim_ = dim;
data_size_ = dim * sizeof(unsigned char);
}
size_t get_data_size() {
return data_size_;
}
DISTFUNC<int> get_dist_func() {
return fstdistfunc_;
}
void *get_dist_func_param() {
return &dim_;
}
~L2SpaceI() {}
};
}

View File

@ -4,75 +4,76 @@
#include <string.h>
namespace hnswlib {
typedef unsigned short int vl_type;
typedef unsigned short int vl_type;
class VisitedList {
public:
vl_type curV;
vl_type *mass;
unsigned int numelements;
class VisitedList {
public:
vl_type curV;
vl_type *mass;
unsigned int numelements;
VisitedList(int numelements1) {
curV = -1;
numelements = numelements1;
mass = new vl_type[numelements];
}
VisitedList(int numelements1) {
curV = -1;
numelements = numelements1;
mass = new vl_type[numelements];
}
void reset() {
void reset() {
curV++;
if (curV == 0) {
memset(mass, 0, sizeof(vl_type) * numelements);
curV++;
if (curV == 0) {
memset(mass, 0, sizeof(vl_type) * numelements);
curV++;
}
};
~VisitedList() { delete[] mass; }
}
};
~VisitedList() { delete[] mass; }
};
///////////////////////////////////////////////////////////
//
// Class for multi-threaded pool-management of VisitedLists
//
/////////////////////////////////////////////////////////
class VisitedListPool {
std::deque<VisitedList *> pool;
std::mutex poolguard;
int numelements;
class VisitedListPool {
std::deque<VisitedList *> pool;
std::mutex poolguard;
int numelements;
public:
VisitedListPool(int initmaxpools, int numelements1) {
numelements = numelements1;
for (int i = 0; i < initmaxpools; i++)
pool.push_front(new VisitedList(numelements));
}
public:
VisitedListPool(int initmaxpools, int numelements1) {
numelements = numelements1;
for (int i = 0; i < initmaxpools; i++)
pool.push_front(new VisitedList(numelements));
}
VisitedList *getFreeVisitedList() {
VisitedList *rez;
{
std::unique_lock <std::mutex> lock(poolguard);
if (pool.size() > 0) {
rez = pool.front();
pool.pop_front();
} else {
rez = new VisitedList(numelements);
}
}
rez->reset();
return rez;
};
void releaseVisitedList(VisitedList *vl) {
VisitedList *getFreeVisitedList() {
VisitedList *rez;
{
std::unique_lock <std::mutex> lock(poolguard);
pool.push_front(vl);
};
~VisitedListPool() {
while (pool.size()) {
VisitedList *rez = pool.front();
if (pool.size() > 0) {
rez = pool.front();
pool.pop_front();
delete rez;
} else {
rez = new VisitedList(numelements);
}
};
}
rez->reset();
return rez;
};
void releaseVisitedList(VisitedList *vl) {
std::unique_lock <std::mutex> lock(poolguard);
pool.push_front(vl);
};
~VisitedListPool() {
while (pool.size()) {
VisitedList *rez = pool.front();
pool.pop_front();
delete rez;
}
};
};
}