mirror of https://github.com/milvus-io/milvus.git
parent
e8f759c22b
commit
50ddc49332
|
@ -5,166 +5,160 @@
|
|||
#include <algorithm>
|
||||
|
||||
namespace hnswlib {
|
||||
template<typename dist_t>
|
||||
class BruteforceSearch : public AlgorithmInterface<dist_t> {
|
||||
public:
|
||||
BruteforceSearch(SpaceInterface <dist_t> *s) {
|
||||
|
||||
}
|
||||
BruteforceSearch(SpaceInterface<dist_t> *s, const std::string &location) {
|
||||
loadIndex(location, s);
|
||||
}
|
||||
template<typename dist_t>
|
||||
class BruteforceSearch : public AlgorithmInterface<dist_t> {
|
||||
public:
|
||||
BruteforceSearch(SpaceInterface <dist_t> *s) {
|
||||
|
||||
BruteforceSearch(SpaceInterface <dist_t> *s, size_t maxElements) {
|
||||
maxelements_ = maxElements;
|
||||
data_size_ = s->get_data_size();
|
||||
fstdistfunc_ = s->get_dist_func();
|
||||
dist_func_param_ = s->get_dist_func_param();
|
||||
size_per_element_ = data_size_ + sizeof(labeltype);
|
||||
data_ = (char *) malloc(maxElements * size_per_element_);
|
||||
if (data_ == nullptr)
|
||||
std::runtime_error("Not enough memory: BruteforceSearch failed to allocate data");
|
||||
cur_element_count = 0;
|
||||
}
|
||||
}
|
||||
BruteforceSearch(SpaceInterface<dist_t> *s, const std::string &location) {
|
||||
loadIndex(location, s);
|
||||
}
|
||||
|
||||
~BruteforceSearch() {
|
||||
free(data_);
|
||||
}
|
||||
BruteforceSearch(SpaceInterface <dist_t> *s, size_t maxElements) {
|
||||
maxelements_ = maxElements;
|
||||
data_size_ = s->get_data_size();
|
||||
fstdistfunc_ = s->get_dist_func();
|
||||
dist_func_param_ = s->get_dist_func_param();
|
||||
size_per_element_ = data_size_ + sizeof(labeltype);
|
||||
data_ = (char *) malloc(maxElements * size_per_element_);
|
||||
if (data_ == nullptr)
|
||||
std::runtime_error("Not enough memory: BruteforceSearch failed to allocate data");
|
||||
cur_element_count = 0;
|
||||
}
|
||||
|
||||
char *data_;
|
||||
size_t maxelements_;
|
||||
size_t cur_element_count;
|
||||
size_t size_per_element_;
|
||||
~BruteforceSearch() {
|
||||
free(data_);
|
||||
}
|
||||
|
||||
size_t data_size_;
|
||||
DISTFUNC <dist_t> fstdistfunc_;
|
||||
void *dist_func_param_;
|
||||
std::mutex index_lock;
|
||||
char *data_;
|
||||
size_t maxelements_;
|
||||
size_t cur_element_count;
|
||||
size_t size_per_element_;
|
||||
|
||||
std::unordered_map<labeltype,size_t > dict_external_to_internal;
|
||||
size_t data_size_;
|
||||
DISTFUNC <dist_t> fstdistfunc_;
|
||||
void *dist_func_param_;
|
||||
std::mutex index_lock;
|
||||
|
||||
void addPoint(const void *datapoint, labeltype label) {
|
||||
std::unordered_map<labeltype,size_t > dict_external_to_internal;
|
||||
|
||||
int idx;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(index_lock);
|
||||
void addPoint(const void *datapoint, labeltype label) {
|
||||
|
||||
int idx;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(index_lock);
|
||||
|
||||
|
||||
|
||||
auto search=dict_external_to_internal.find(label);
|
||||
if (search != dict_external_to_internal.end()) {
|
||||
idx=search->second;
|
||||
}
|
||||
else{
|
||||
if (cur_element_count >= maxelements_) {
|
||||
throw std::runtime_error("The number of elements exceeds the specified limit\n");
|
||||
}
|
||||
idx=cur_element_count;
|
||||
dict_external_to_internal[label] = idx;
|
||||
cur_element_count++;
|
||||
auto search=dict_external_to_internal.find(label);
|
||||
if (search != dict_external_to_internal.end()) {
|
||||
idx=search->second;
|
||||
}
|
||||
else{
|
||||
if (cur_element_count >= maxelements_) {
|
||||
throw std::runtime_error("The number of elements exceeds the specified limit\n");
|
||||
}
|
||||
idx=cur_element_count;
|
||||
dict_external_to_internal[label] = idx;
|
||||
cur_element_count++;
|
||||
}
|
||||
memcpy(data_ + size_per_element_ * idx + data_size_, &label, sizeof(labeltype));
|
||||
memcpy(data_ + size_per_element_ * idx, datapoint, data_size_);
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
void removePoint(labeltype cur_external) {
|
||||
size_t cur_c=dict_external_to_internal[cur_external];
|
||||
|
||||
dict_external_to_internal.erase(cur_external);
|
||||
|
||||
labeltype label=*((labeltype*)(data_ + size_per_element_ * (cur_element_count-1) + data_size_));
|
||||
dict_external_to_internal[label]=cur_c;
|
||||
memcpy(data_ + size_per_element_ * cur_c,
|
||||
data_ + size_per_element_ * (cur_element_count-1),
|
||||
data_size_+sizeof(labeltype));
|
||||
cur_element_count--;
|
||||
|
||||
}
|
||||
memcpy(data_ + size_per_element_ * idx + data_size_, &label, sizeof(labeltype));
|
||||
memcpy(data_ + size_per_element_ * idx, datapoint, data_size_);
|
||||
|
||||
|
||||
std::priority_queue<std::pair<dist_t, labeltype >>
|
||||
searchKnn(const void *query_data, size_t k) const {
|
||||
std::priority_queue<std::pair<dist_t, labeltype >> topResults;
|
||||
if (cur_element_count == 0) return topResults;
|
||||
for (int i = 0; i < k; i++) {
|
||||
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
|
||||
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
|
||||
data_size_))));
|
||||
}
|
||||
dist_t lastdist = topResults.top().first;
|
||||
for (int i = k; i < cur_element_count; i++) {
|
||||
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
|
||||
if (dist <= lastdist) {
|
||||
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
|
||||
data_size_))));
|
||||
if (topResults.size() > k)
|
||||
topResults.pop();
|
||||
lastdist = topResults.top().first;
|
||||
}
|
||||
|
||||
}
|
||||
return topResults;
|
||||
};
|
||||
|
||||
template <typename Comp>
|
||||
std::vector<std::pair<dist_t, labeltype>>
|
||||
searchKnn(const void* query_data, size_t k, Comp comp) {
|
||||
std::vector<std::pair<dist_t, labeltype>> result;
|
||||
if (cur_element_count == 0) return result;
|
||||
|
||||
auto ret = searchKnn(query_data, k);
|
||||
|
||||
while (!ret.empty()) {
|
||||
result.push_back(ret.top());
|
||||
ret.pop();
|
||||
}
|
||||
|
||||
std::sort(result.begin(), result.end(), comp);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void saveIndex(const std::string &location) {
|
||||
std::ofstream output(location, std::ios::binary);
|
||||
std::streampos position;
|
||||
|
||||
writeBinaryPOD(output, maxelements_);
|
||||
writeBinaryPOD(output, size_per_element_);
|
||||
writeBinaryPOD(output, cur_element_count);
|
||||
|
||||
output.write(data_, maxelements_ * size_per_element_);
|
||||
|
||||
output.close();
|
||||
}
|
||||
|
||||
void loadIndex(const std::string &location, SpaceInterface<dist_t> *s) {
|
||||
|
||||
|
||||
std::ifstream input(location, std::ios::binary);
|
||||
std::streampos position;
|
||||
|
||||
readBinaryPOD(input, maxelements_);
|
||||
readBinaryPOD(input, size_per_element_);
|
||||
readBinaryPOD(input, cur_element_count);
|
||||
|
||||
data_size_ = s->get_data_size();
|
||||
fstdistfunc_ = s->get_dist_func();
|
||||
dist_func_param_ = s->get_dist_func_param();
|
||||
size_per_element_ = data_size_ + sizeof(labeltype);
|
||||
data_ = (char *) malloc(maxelements_ * size_per_element_);
|
||||
if (data_ == nullptr)
|
||||
std::runtime_error("Not enough memory: loadIndex failed to allocate data");
|
||||
|
||||
input.read(data_, maxelements_ * size_per_element_);
|
||||
|
||||
input.close();
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
void removePoint(labeltype cur_external) {
|
||||
size_t cur_c=dict_external_to_internal[cur_external];
|
||||
|
||||
dict_external_to_internal.erase(cur_external);
|
||||
|
||||
labeltype label=*((labeltype*)(data_ + size_per_element_ * (cur_element_count-1) + data_size_));
|
||||
dict_external_to_internal[label]=cur_c;
|
||||
memcpy(data_ + size_per_element_ * cur_c,
|
||||
data_ + size_per_element_ * (cur_element_count-1),
|
||||
data_size_+sizeof(labeltype));
|
||||
cur_element_count--;
|
||||
}
|
||||
|
||||
std::priority_queue<std::pair<dist_t, labeltype >>
|
||||
searchKnn(const void *query_data, size_t k) const {
|
||||
std::priority_queue<std::pair<dist_t, labeltype >> topResults;
|
||||
if (cur_element_count == 0) return topResults;
|
||||
for (int i = 0; i < k; i++) {
|
||||
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
|
||||
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
|
||||
data_size_))));
|
||||
}
|
||||
dist_t lastdist = topResults.top().first;
|
||||
for (int i = k; i < cur_element_count; i++) {
|
||||
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
|
||||
if (dist <= lastdist) {
|
||||
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
|
||||
data_size_))));
|
||||
if (topResults.size() > k)
|
||||
topResults.pop();
|
||||
lastdist = topResults.top().first;
|
||||
}
|
||||
}
|
||||
return topResults;
|
||||
};
|
||||
|
||||
template <typename Comp>
|
||||
std::vector<std::pair<dist_t, labeltype>>
|
||||
searchKnn(const void* query_data, size_t k, Comp comp) {
|
||||
std::vector<std::pair<dist_t, labeltype>> result;
|
||||
if (cur_element_count == 0) return result;
|
||||
|
||||
auto ret = searchKnn(query_data, k);
|
||||
|
||||
while (!ret.empty()) {
|
||||
result.push_back(ret.top());
|
||||
ret.pop();
|
||||
}
|
||||
|
||||
std::sort(result.begin(), result.end(), comp);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void saveIndex(const std::string &location) {
|
||||
std::ofstream output(location, std::ios::binary);
|
||||
std::streampos position;
|
||||
|
||||
writeBinaryPOD(output, maxelements_);
|
||||
writeBinaryPOD(output, size_per_element_);
|
||||
writeBinaryPOD(output, cur_element_count);
|
||||
|
||||
output.write(data_, maxelements_ * size_per_element_);
|
||||
|
||||
output.close();
|
||||
}
|
||||
|
||||
void loadIndex(const std::string &location, SpaceInterface<dist_t> *s) {
|
||||
std::ifstream input(location, std::ios::binary);
|
||||
std::streampos position;
|
||||
|
||||
readBinaryPOD(input, maxelements_);
|
||||
readBinaryPOD(input, size_per_element_);
|
||||
readBinaryPOD(input, cur_element_count);
|
||||
|
||||
data_size_ = s->get_data_size();
|
||||
fstdistfunc_ = s->get_dist_func();
|
||||
dist_func_param_ = s->get_dist_func_param();
|
||||
size_per_element_ = data_size_ + sizeof(labeltype);
|
||||
data_ = (char *) malloc(maxelements_ * size_per_element_);
|
||||
if (data_ == nullptr)
|
||||
std::runtime_error("Not enough memory: loadIndex failed to allocate data");
|
||||
|
||||
input.read(data_, maxelements_ * size_per_element_);
|
||||
|
||||
input.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -16,8 +16,7 @@ typedef unsigned int linklistsizeint;
|
|||
|
||||
template<typename dist_t>
|
||||
class HierarchicalNSW : public AlgorithmInterface<dist_t> {
|
||||
|
||||
public:
|
||||
public:
|
||||
HierarchicalNSW(SpaceInterface<dist_t> *s) {
|
||||
}
|
||||
|
||||
|
|
|
@ -89,8 +89,6 @@ namespace hnswlib {
|
|||
virtual ~AlgorithmInterface(){
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#include "space_l2.h"
|
||||
|
|
|
@ -3,246 +3,243 @@
|
|||
|
||||
namespace hnswlib {
|
||||
|
||||
static float
|
||||
InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float res = 0;
|
||||
for (unsigned i = 0; i < qty; i++) {
|
||||
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
|
||||
}
|
||||
return (1.0f - res);
|
||||
|
||||
static float
|
||||
InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float res = 0;
|
||||
for (unsigned i = 0; i < qty; i++) {
|
||||
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
|
||||
}
|
||||
return (1.0f - res);
|
||||
|
||||
}
|
||||
|
||||
#if defined(USE_AVX)
|
||||
|
||||
// Favor using AVX if available.
|
||||
static float
|
||||
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
static float
|
||||
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
|
||||
size_t qty16 = qty / 16;
|
||||
size_t qty4 = qty / 4;
|
||||
size_t qty16 = qty / 16;
|
||||
size_t qty4 = qty / 4;
|
||||
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
const float *pEnd2 = pVect1 + 4 * qty4;
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
const float *pEnd2 = pVect1 + 4 * qty4;
|
||||
|
||||
__m256 sum256 = _mm256_set1_ps(0);
|
||||
__m256 sum256 = _mm256_set1_ps(0);
|
||||
|
||||
while (pVect1 < pEnd1) {
|
||||
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
||||
while (pVect1 < pEnd1) {
|
||||
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
||||
|
||||
__m256 v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
__m256 v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
__m256 v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
__m256 v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
}
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
__m128 v1, v2;
|
||||
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
|
||||
__m128 v1, v2;
|
||||
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
|
||||
|
||||
while (pVect1 < pEnd2) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
while (pVect1 < pEnd2) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
_mm_store_ps(TmpRes, sum_prod);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
|
||||
return 1.0f - sum;
|
||||
_mm_store_ps(TmpRes, sum_prod);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
|
||||
return 1.0f - sum;
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE)
|
||||
|
||||
static float
|
||||
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
static float
|
||||
InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
|
||||
size_t qty16 = qty / 16;
|
||||
size_t qty4 = qty / 4;
|
||||
size_t qty16 = qty / 16;
|
||||
size_t qty4 = qty / 4;
|
||||
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
const float *pEnd2 = pVect1 + 4 * qty4;
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
const float *pEnd2 = pVect1 + 4 * qty4;
|
||||
|
||||
__m128 v1, v2;
|
||||
__m128 sum_prod = _mm_set1_ps(0);
|
||||
__m128 v1, v2;
|
||||
__m128 sum_prod = _mm_set1_ps(0);
|
||||
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
while (pVect1 < pEnd2) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
_mm_store_ps(TmpRes, sum_prod);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
|
||||
return 1.0f - sum;
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
while (pVect1 < pEnd2) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
_mm_store_ps(TmpRes, sum_prod);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
|
||||
return 1.0f - sum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(USE_AVX)
|
||||
|
||||
static float
|
||||
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
static float
|
||||
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
|
||||
size_t qty16 = qty / 16;
|
||||
size_t qty16 = qty / 16;
|
||||
|
||||
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
|
||||
__m256 sum256 = _mm256_set1_ps(0);
|
||||
__m256 sum256 = _mm256_set1_ps(0);
|
||||
|
||||
while (pVect1 < pEnd1) {
|
||||
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
||||
while (pVect1 < pEnd1) {
|
||||
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
||||
|
||||
__m256 v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
__m256 v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
__m256 v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
__m256 v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
_mm256_store_ps(TmpRes, sum256);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
||||
|
||||
return 1.0f - sum;
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
|
||||
}
|
||||
|
||||
_mm256_store_ps(TmpRes, sum256);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
||||
|
||||
return 1.0f - sum;
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE)
|
||||
|
||||
static float
|
||||
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
static float
|
||||
InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
|
||||
size_t qty16 = qty / 16;
|
||||
size_t qty16 = qty / 16;
|
||||
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
const float *pEnd1 = pVect1 + 16 * qty16;
|
||||
|
||||
__m128 v1, v2;
|
||||
__m128 sum_prod = _mm_set1_ps(0);
|
||||
__m128 v1, v2;
|
||||
__m128 sum_prod = _mm_set1_ps(0);
|
||||
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
_mm_store_ps(TmpRes, sum_prod);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
|
||||
}
|
||||
_mm_store_ps(TmpRes, sum_prod);
|
||||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
|
||||
return 1.0f - sum;
|
||||
return 1.0f - sum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
class InnerProductSpace : public SpaceInterface<float> {
|
||||
DISTFUNC<float> fstdistfunc_;
|
||||
size_t data_size_;
|
||||
size_t dim_;
|
||||
public:
|
||||
InnerProductSpace(size_t dim) {
|
||||
fstdistfunc_ = InnerProduct;
|
||||
#if defined(USE_AVX) || defined(USE_SSE)
|
||||
if (dim % 4 == 0)
|
||||
fstdistfunc_ = InnerProductSIMD4Ext;
|
||||
if (dim % 16 == 0)
|
||||
fstdistfunc_ = InnerProductSIMD16Ext;
|
||||
#endif
|
||||
dim_ = dim;
|
||||
data_size_ = dim * sizeof(float);
|
||||
}
|
||||
|
||||
#endif
|
||||
size_t get_data_size() {
|
||||
return data_size_;
|
||||
}
|
||||
|
||||
class InnerProductSpace : public SpaceInterface<float> {
|
||||
DISTFUNC<float> get_dist_func() {
|
||||
return fstdistfunc_;
|
||||
}
|
||||
|
||||
DISTFUNC<float> fstdistfunc_;
|
||||
size_t data_size_;
|
||||
size_t dim_;
|
||||
public:
|
||||
InnerProductSpace(size_t dim) {
|
||||
fstdistfunc_ = InnerProduct;
|
||||
#if defined(USE_AVX) || defined(USE_SSE)
|
||||
if (dim % 4 == 0)
|
||||
fstdistfunc_ = InnerProductSIMD4Ext;
|
||||
if (dim % 16 == 0)
|
||||
fstdistfunc_ = InnerProductSIMD16Ext;
|
||||
#endif
|
||||
dim_ = dim;
|
||||
data_size_ = dim * sizeof(float);
|
||||
}
|
||||
|
||||
size_t get_data_size() {
|
||||
return data_size_;
|
||||
}
|
||||
|
||||
DISTFUNC<float> get_dist_func() {
|
||||
return fstdistfunc_;
|
||||
}
|
||||
|
||||
void *get_dist_func_param() {
|
||||
return &dim_;
|
||||
}
|
||||
void *get_dist_func_param() {
|
||||
return &dim_;
|
||||
}
|
||||
|
||||
~InnerProductSpace() {}
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
|
|
|
@ -3,242 +3,234 @@
|
|||
|
||||
namespace hnswlib {
|
||||
|
||||
static float
|
||||
L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
||||
//return *((float *)pVect2);
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float res = 0;
|
||||
for (unsigned i = 0; i < qty; i++) {
|
||||
float t = ((float *) pVect1)[i] - ((float *) pVect2)[i];
|
||||
res += t * t;
|
||||
}
|
||||
return (res);
|
||||
|
||||
static float
|
||||
L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) {
|
||||
//return *((float *)pVect2);
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float res = 0;
|
||||
for (unsigned i = 0; i < qty; i++) {
|
||||
float t = ((float *) pVect1)[i] - ((float *) pVect2)[i];
|
||||
res += t * t;
|
||||
}
|
||||
return (res);
|
||||
}
|
||||
|
||||
#if defined(USE_AVX)
|
||||
|
||||
// Favor using AVX if available.
|
||||
static float
|
||||
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
size_t qty16 = qty >> 4;
|
||||
// Favor using AVX if available.
|
||||
static float
|
||||
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
size_t qty16 = qty >> 4;
|
||||
|
||||
const float *pEnd1 = pVect1 + (qty16 << 4);
|
||||
const float *pEnd1 = pVect1 + (qty16 << 4);
|
||||
|
||||
__m256 diff, v1, v2;
|
||||
__m256 sum = _mm256_set1_ps(0);
|
||||
__m256 diff, v1, v2;
|
||||
__m256 sum = _mm256_set1_ps(0);
|
||||
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
diff = _mm256_sub_ps(v1, v2);
|
||||
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
diff = _mm256_sub_ps(v1, v2);
|
||||
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
||||
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
diff = _mm256_sub_ps(v1, v2);
|
||||
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
||||
}
|
||||
v1 = _mm256_loadu_ps(pVect1);
|
||||
pVect1 += 8;
|
||||
v2 = _mm256_loadu_ps(pVect2);
|
||||
pVect2 += 8;
|
||||
diff = _mm256_sub_ps(v1, v2);
|
||||
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
|
||||
}
|
||||
|
||||
_mm256_store_ps(TmpRes, sum);
|
||||
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
||||
_mm256_store_ps(TmpRes, sum);
|
||||
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
|
||||
|
||||
return (res);
|
||||
return (res);
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE)
|
||||
|
||||
static float
|
||||
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
// size_t qty4 = qty >> 2;
|
||||
size_t qty16 = qty >> 4;
|
||||
static float
|
||||
L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
// size_t qty4 = qty >> 2;
|
||||
size_t qty16 = qty >> 4;
|
||||
|
||||
const float *pEnd1 = pVect1 + (qty16 << 4);
|
||||
// const float* pEnd2 = pVect1 + (qty4 << 2);
|
||||
// const float* pEnd3 = pVect1 + qty;
|
||||
const float *pEnd1 = pVect1 + (qty16 << 4);
|
||||
// const float* pEnd2 = pVect1 + (qty4 << 2);
|
||||
// const float* pEnd3 = pVect1 + qty;
|
||||
|
||||
__m128 diff, v1, v2;
|
||||
__m128 sum = _mm_set1_ps(0);
|
||||
__m128 diff, v1, v2;
|
||||
__m128 sum = _mm_set1_ps(0);
|
||||
|
||||
while (pVect1 < pEnd1) {
|
||||
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
while (pVect1 < pEnd1) {
|
||||
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
}
|
||||
_mm_store_ps(TmpRes, sum);
|
||||
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
|
||||
return (res);
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
}
|
||||
_mm_store_ps(TmpRes, sum);
|
||||
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
|
||||
return (res);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_SSE
|
||||
static float
|
||||
L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
static float
|
||||
L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
|
||||
float PORTABLE_ALIGN32 TmpRes[8];
|
||||
float *pVect1 = (float *) pVect1v;
|
||||
float *pVect2 = (float *) pVect2v;
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
|
||||
|
||||
// size_t qty4 = qty >> 2;
|
||||
size_t qty16 = qty >> 2;
|
||||
// size_t qty4 = qty >> 2;
|
||||
size_t qty16 = qty >> 2;
|
||||
|
||||
const float *pEnd1 = pVect1 + (qty16 << 2);
|
||||
const float *pEnd1 = pVect1 + (qty16 << 2);
|
||||
|
||||
__m128 diff, v1, v2;
|
||||
__m128 sum = _mm_set1_ps(0);
|
||||
__m128 diff, v1, v2;
|
||||
__m128 sum = _mm_set1_ps(0);
|
||||
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
}
|
||||
_mm_store_ps(TmpRes, sum);
|
||||
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
|
||||
return (res);
|
||||
while (pVect1 < pEnd1) {
|
||||
v1 = _mm_loadu_ps(pVect1);
|
||||
pVect1 += 4;
|
||||
v2 = _mm_loadu_ps(pVect2);
|
||||
pVect2 += 4;
|
||||
diff = _mm_sub_ps(v1, v2);
|
||||
sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
|
||||
}
|
||||
_mm_store_ps(TmpRes, sum);
|
||||
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
|
||||
|
||||
return (res);
|
||||
}
|
||||
#endif
|
||||
|
||||
class L2Space : public SpaceInterface<float> {
|
||||
|
||||
DISTFUNC<float> fstdistfunc_;
|
||||
size_t data_size_;
|
||||
size_t dim_;
|
||||
public:
|
||||
L2Space(size_t dim) {
|
||||
fstdistfunc_ = L2Sqr;
|
||||
#if defined(USE_SSE) || defined(USE_AVX)
|
||||
if (dim % 4 == 0)
|
||||
fstdistfunc_ = L2SqrSIMD4Ext;
|
||||
if (dim % 16 == 0)
|
||||
fstdistfunc_ = L2SqrSIMD16Ext;
|
||||
/*else{
|
||||
throw runtime_error("Data type not supported!");
|
||||
}*/
|
||||
#endif
|
||||
dim_ = dim;
|
||||
data_size_ = dim * sizeof(float);
|
||||
}
|
||||
|
||||
size_t get_data_size() {
|
||||
return data_size_;
|
||||
}
|
||||
|
||||
DISTFUNC<float> get_dist_func() {
|
||||
return fstdistfunc_;
|
||||
}
|
||||
|
||||
void *get_dist_func_param() {
|
||||
return &dim_;
|
||||
}
|
||||
|
||||
~L2Space() {}
|
||||
};
|
||||
|
||||
static int
|
||||
L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
|
||||
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
int res = 0;
|
||||
unsigned char *a = (unsigned char *) pVect1;
|
||||
unsigned char *b = (unsigned char *) pVect2;
|
||||
/*for (int i = 0; i < qty; i++) {
|
||||
int t = int((a)[i]) - int((b)[i]);
|
||||
res += t*t;
|
||||
class L2Space : public SpaceInterface<float> {
|
||||
DISTFUNC<float> fstdistfunc_;
|
||||
size_t data_size_;
|
||||
size_t dim_;
|
||||
public:
|
||||
L2Space(size_t dim) {
|
||||
fstdistfunc_ = L2Sqr;
|
||||
#if defined(USE_SSE) || defined(USE_AVX)
|
||||
if (dim % 4 == 0)
|
||||
fstdistfunc_ = L2SqrSIMD4Ext;
|
||||
if (dim % 16 == 0)
|
||||
fstdistfunc_ = L2SqrSIMD16Ext;
|
||||
/*else{
|
||||
throw runtime_error("Data type not supported!");
|
||||
}*/
|
||||
|
||||
qty = qty >> 2;
|
||||
for (size_t i = 0; i < qty; i++) {
|
||||
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
|
||||
|
||||
}
|
||||
|
||||
return (res);
|
||||
|
||||
#endif
|
||||
dim_ = dim;
|
||||
data_size_ = dim * sizeof(float);
|
||||
}
|
||||
|
||||
class L2SpaceI : public SpaceInterface<int> {
|
||||
size_t get_data_size() {
|
||||
return data_size_;
|
||||
}
|
||||
|
||||
DISTFUNC<int> fstdistfunc_;
|
||||
size_t data_size_;
|
||||
size_t dim_;
|
||||
public:
|
||||
L2SpaceI(size_t dim) {
|
||||
fstdistfunc_ = L2SqrI;
|
||||
dim_ = dim;
|
||||
data_size_ = dim * sizeof(unsigned char);
|
||||
}
|
||||
DISTFUNC<float> get_dist_func() {
|
||||
return fstdistfunc_;
|
||||
}
|
||||
|
||||
size_t get_data_size() {
|
||||
return data_size_;
|
||||
}
|
||||
void *get_dist_func_param() {
|
||||
return &dim_;
|
||||
}
|
||||
|
||||
DISTFUNC<int> get_dist_func() {
|
||||
return fstdistfunc_;
|
||||
}
|
||||
~L2Space() {}
|
||||
};
|
||||
|
||||
void *get_dist_func_param() {
|
||||
return &dim_;
|
||||
}
|
||||
static int
|
||||
L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
|
||||
size_t qty = *((size_t *) qty_ptr);
|
||||
int res = 0;
|
||||
unsigned char *a = (unsigned char *) pVect1;
|
||||
unsigned char *b = (unsigned char *) pVect2;
|
||||
/*for (int i = 0; i < qty; i++) {
|
||||
int t = int((a)[i]) - int((b)[i]);
|
||||
res += t*t;
|
||||
}*/
|
||||
|
||||
~L2SpaceI() {}
|
||||
};
|
||||
qty = qty >> 2;
|
||||
for (size_t i = 0; i < qty; i++) {
|
||||
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
res += ((*a) - (*b)) * ((*a) - (*b));
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
class L2SpaceI : public SpaceInterface<int> {
|
||||
DISTFUNC<int> fstdistfunc_;
|
||||
size_t data_size_;
|
||||
size_t dim_;
|
||||
public:
|
||||
L2SpaceI(size_t dim) {
|
||||
fstdistfunc_ = L2SqrI;
|
||||
dim_ = dim;
|
||||
data_size_ = dim * sizeof(unsigned char);
|
||||
}
|
||||
|
||||
size_t get_data_size() {
|
||||
return data_size_;
|
||||
}
|
||||
|
||||
DISTFUNC<int> get_dist_func() {
|
||||
return fstdistfunc_;
|
||||
}
|
||||
|
||||
void *get_dist_func_param() {
|
||||
return &dim_;
|
||||
}
|
||||
|
||||
~L2SpaceI() {}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -4,75 +4,76 @@
|
|||
#include <string.h>
|
||||
|
||||
namespace hnswlib {
|
||||
typedef unsigned short int vl_type;
|
||||
typedef unsigned short int vl_type;
|
||||
|
||||
class VisitedList {
|
||||
public:
|
||||
vl_type curV;
|
||||
vl_type *mass;
|
||||
unsigned int numelements;
|
||||
class VisitedList {
|
||||
public:
|
||||
vl_type curV;
|
||||
vl_type *mass;
|
||||
unsigned int numelements;
|
||||
|
||||
VisitedList(int numelements1) {
|
||||
curV = -1;
|
||||
numelements = numelements1;
|
||||
mass = new vl_type[numelements];
|
||||
}
|
||||
VisitedList(int numelements1) {
|
||||
curV = -1;
|
||||
numelements = numelements1;
|
||||
mass = new vl_type[numelements];
|
||||
}
|
||||
|
||||
void reset() {
|
||||
void reset() {
|
||||
curV++;
|
||||
if (curV == 0) {
|
||||
memset(mass, 0, sizeof(vl_type) * numelements);
|
||||
curV++;
|
||||
if (curV == 0) {
|
||||
memset(mass, 0, sizeof(vl_type) * numelements);
|
||||
curV++;
|
||||
}
|
||||
};
|
||||
|
||||
~VisitedList() { delete[] mass; }
|
||||
}
|
||||
};
|
||||
|
||||
~VisitedList() { delete[] mass; }
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
//
|
||||
// Class for multi-threaded pool-management of VisitedLists
|
||||
//
|
||||
/////////////////////////////////////////////////////////
|
||||
|
||||
class VisitedListPool {
|
||||
std::deque<VisitedList *> pool;
|
||||
std::mutex poolguard;
|
||||
int numelements;
|
||||
class VisitedListPool {
|
||||
std::deque<VisitedList *> pool;
|
||||
std::mutex poolguard;
|
||||
int numelements;
|
||||
|
||||
public:
|
||||
VisitedListPool(int initmaxpools, int numelements1) {
|
||||
numelements = numelements1;
|
||||
for (int i = 0; i < initmaxpools; i++)
|
||||
pool.push_front(new VisitedList(numelements));
|
||||
}
|
||||
public:
|
||||
VisitedListPool(int initmaxpools, int numelements1) {
|
||||
numelements = numelements1;
|
||||
for (int i = 0; i < initmaxpools; i++)
|
||||
pool.push_front(new VisitedList(numelements));
|
||||
}
|
||||
|
||||
VisitedList *getFreeVisitedList() {
|
||||
VisitedList *rez;
|
||||
{
|
||||
std::unique_lock <std::mutex> lock(poolguard);
|
||||
if (pool.size() > 0) {
|
||||
rez = pool.front();
|
||||
pool.pop_front();
|
||||
} else {
|
||||
rez = new VisitedList(numelements);
|
||||
}
|
||||
}
|
||||
rez->reset();
|
||||
return rez;
|
||||
};
|
||||
|
||||
void releaseVisitedList(VisitedList *vl) {
|
||||
VisitedList *getFreeVisitedList() {
|
||||
VisitedList *rez;
|
||||
{
|
||||
std::unique_lock <std::mutex> lock(poolguard);
|
||||
pool.push_front(vl);
|
||||
};
|
||||
|
||||
~VisitedListPool() {
|
||||
while (pool.size()) {
|
||||
VisitedList *rez = pool.front();
|
||||
if (pool.size() > 0) {
|
||||
rez = pool.front();
|
||||
pool.pop_front();
|
||||
delete rez;
|
||||
} else {
|
||||
rez = new VisitedList(numelements);
|
||||
}
|
||||
};
|
||||
}
|
||||
rez->reset();
|
||||
return rez;
|
||||
};
|
||||
|
||||
void releaseVisitedList(VisitedList *vl) {
|
||||
std::unique_lock <std::mutex> lock(poolguard);
|
||||
pool.push_front(vl);
|
||||
};
|
||||
|
||||
~VisitedListPool() {
|
||||
while (pool.size()) {
|
||||
VisitedList *rez = pool.front();
|
||||
pool.pop_front();
|
||||
delete rez;
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue