diff --git a/core/src/index/thirdparty/hnswlib/bruteforce.h b/core/src/index/thirdparty/hnswlib/bruteforce.h index 5b1bd655ac..3f20856a54 100644 --- a/core/src/index/thirdparty/hnswlib/bruteforce.h +++ b/core/src/index/thirdparty/hnswlib/bruteforce.h @@ -5,166 +5,160 @@ #include namespace hnswlib { - template - class BruteforceSearch : public AlgorithmInterface { - public: - BruteforceSearch(SpaceInterface *s) { - } - BruteforceSearch(SpaceInterface *s, const std::string &location) { - loadIndex(location, s); - } +template +class BruteforceSearch : public AlgorithmInterface { + public: + BruteforceSearch(SpaceInterface *s) { - BruteforceSearch(SpaceInterface *s, size_t maxElements) { - maxelements_ = maxElements; - data_size_ = s->get_data_size(); - fstdistfunc_ = s->get_dist_func(); - dist_func_param_ = s->get_dist_func_param(); - size_per_element_ = data_size_ + sizeof(labeltype); - data_ = (char *) malloc(maxElements * size_per_element_); - if (data_ == nullptr) - std::runtime_error("Not enough memory: BruteforceSearch failed to allocate data"); - cur_element_count = 0; - } + } + BruteforceSearch(SpaceInterface *s, const std::string &location) { + loadIndex(location, s); + } - ~BruteforceSearch() { - free(data_); - } + BruteforceSearch(SpaceInterface *s, size_t maxElements) { + maxelements_ = maxElements; + data_size_ = s->get_data_size(); + fstdistfunc_ = s->get_dist_func(); + dist_func_param_ = s->get_dist_func_param(); + size_per_element_ = data_size_ + sizeof(labeltype); + data_ = (char *) malloc(maxElements * size_per_element_); + if (data_ == nullptr) + std::runtime_error("Not enough memory: BruteforceSearch failed to allocate data"); + cur_element_count = 0; + } - char *data_; - size_t maxelements_; - size_t cur_element_count; - size_t size_per_element_; + ~BruteforceSearch() { + free(data_); + } - size_t data_size_; - DISTFUNC fstdistfunc_; - void *dist_func_param_; - std::mutex index_lock; + char *data_; + size_t maxelements_; + size_t cur_element_count; + size_t size_per_element_; - std::unordered_map dict_external_to_internal; + size_t data_size_; + DISTFUNC fstdistfunc_; + void *dist_func_param_; + std::mutex index_lock; - void addPoint(const void *datapoint, labeltype label) { + std::unordered_map dict_external_to_internal; - int idx; - { - std::unique_lock lock(index_lock); + void addPoint(const void *datapoint, labeltype label) { + + int idx; + { + std::unique_lock lock(index_lock); - auto search=dict_external_to_internal.find(label); - if (search != dict_external_to_internal.end()) { - idx=search->second; - } - else{ - if (cur_element_count >= maxelements_) { - throw std::runtime_error("The number of elements exceeds the specified limit\n"); - } - idx=cur_element_count; - dict_external_to_internal[label] = idx; - cur_element_count++; + auto search=dict_external_to_internal.find(label); + if (search != dict_external_to_internal.end()) { + idx=search->second; + } + else{ + if (cur_element_count >= maxelements_) { + throw std::runtime_error("The number of elements exceeds the specified limit\n"); } + idx=cur_element_count; + dict_external_to_internal[label] = idx; + cur_element_count++; } - memcpy(data_ + size_per_element_ * idx + data_size_, &label, sizeof(labeltype)); - memcpy(data_ + size_per_element_ * idx, datapoint, data_size_); - - - - - }; - - void removePoint(labeltype cur_external) { - size_t cur_c=dict_external_to_internal[cur_external]; - - dict_external_to_internal.erase(cur_external); - - labeltype label=*((labeltype*)(data_ + size_per_element_ * (cur_element_count-1) + data_size_)); - dict_external_to_internal[label]=cur_c; - memcpy(data_ + size_per_element_ * cur_c, - data_ + size_per_element_ * (cur_element_count-1), - data_size_+sizeof(labeltype)); - cur_element_count--; - } + memcpy(data_ + size_per_element_ * idx + data_size_, &label, sizeof(labeltype)); + memcpy(data_ + size_per_element_ * idx, datapoint, data_size_); - std::priority_queue> - searchKnn(const void *query_data, size_t k) const { - std::priority_queue> topResults; - if (cur_element_count == 0) return topResults; - for (int i = 0; i < k; i++) { - dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); - topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + - data_size_)))); - } - dist_t lastdist = topResults.top().first; - for (int i = k; i < cur_element_count; i++) { - dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); - if (dist <= lastdist) { - topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + - data_size_)))); - if (topResults.size() > k) - topResults.pop(); - lastdist = topResults.top().first; - } - } - return topResults; - }; - - template - std::vector> - searchKnn(const void* query_data, size_t k, Comp comp) { - std::vector> result; - if (cur_element_count == 0) return result; - - auto ret = searchKnn(query_data, k); - - while (!ret.empty()) { - result.push_back(ret.top()); - ret.pop(); - } - - std::sort(result.begin(), result.end(), comp); - - return result; - } - - void saveIndex(const std::string &location) { - std::ofstream output(location, std::ios::binary); - std::streampos position; - - writeBinaryPOD(output, maxelements_); - writeBinaryPOD(output, size_per_element_); - writeBinaryPOD(output, cur_element_count); - - output.write(data_, maxelements_ * size_per_element_); - - output.close(); - } - - void loadIndex(const std::string &location, SpaceInterface *s) { - - - std::ifstream input(location, std::ios::binary); - std::streampos position; - - readBinaryPOD(input, maxelements_); - readBinaryPOD(input, size_per_element_); - readBinaryPOD(input, cur_element_count); - - data_size_ = s->get_data_size(); - fstdistfunc_ = s->get_dist_func(); - dist_func_param_ = s->get_dist_func_param(); - size_per_element_ = data_size_ + sizeof(labeltype); - data_ = (char *) malloc(maxelements_ * size_per_element_); - if (data_ == nullptr) - std::runtime_error("Not enough memory: loadIndex failed to allocate data"); - - input.read(data_, maxelements_ * size_per_element_); - - input.close(); - - } }; + + void removePoint(labeltype cur_external) { + size_t cur_c=dict_external_to_internal[cur_external]; + + dict_external_to_internal.erase(cur_external); + + labeltype label=*((labeltype*)(data_ + size_per_element_ * (cur_element_count-1) + data_size_)); + dict_external_to_internal[label]=cur_c; + memcpy(data_ + size_per_element_ * cur_c, + data_ + size_per_element_ * (cur_element_count-1), + data_size_+sizeof(labeltype)); + cur_element_count--; + } + + std::priority_queue> + searchKnn(const void *query_data, size_t k) const { + std::priority_queue> topResults; + if (cur_element_count == 0) return topResults; + for (int i = 0; i < k; i++) { + dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); + topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + + data_size_)))); + } + dist_t lastdist = topResults.top().first; + for (int i = k; i < cur_element_count; i++) { + dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); + if (dist <= lastdist) { + topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + + data_size_)))); + if (topResults.size() > k) + topResults.pop(); + lastdist = topResults.top().first; + } + } + return topResults; + }; + + template + std::vector> + searchKnn(const void* query_data, size_t k, Comp comp) { + std::vector> result; + if (cur_element_count == 0) return result; + + auto ret = searchKnn(query_data, k); + + while (!ret.empty()) { + result.push_back(ret.top()); + ret.pop(); + } + + std::sort(result.begin(), result.end(), comp); + + return result; + } + + void saveIndex(const std::string &location) { + std::ofstream output(location, std::ios::binary); + std::streampos position; + + writeBinaryPOD(output, maxelements_); + writeBinaryPOD(output, size_per_element_); + writeBinaryPOD(output, cur_element_count); + + output.write(data_, maxelements_ * size_per_element_); + + output.close(); + } + + void loadIndex(const std::string &location, SpaceInterface *s) { + std::ifstream input(location, std::ios::binary); + std::streampos position; + + readBinaryPOD(input, maxelements_); + readBinaryPOD(input, size_per_element_); + readBinaryPOD(input, cur_element_count); + + data_size_ = s->get_data_size(); + fstdistfunc_ = s->get_dist_func(); + dist_func_param_ = s->get_dist_func_param(); + size_per_element_ = data_size_ + sizeof(labeltype); + data_ = (char *) malloc(maxelements_ * size_per_element_); + if (data_ == nullptr) + std::runtime_error("Not enough memory: loadIndex failed to allocate data"); + + input.read(data_, maxelements_ * size_per_element_); + + input.close(); + } +}; } diff --git a/core/src/index/thirdparty/hnswlib/hnswalg.h b/core/src/index/thirdparty/hnswlib/hnswalg.h index 96c42e6bd4..244fea6cc1 100644 --- a/core/src/index/thirdparty/hnswlib/hnswalg.h +++ b/core/src/index/thirdparty/hnswlib/hnswalg.h @@ -16,8 +16,7 @@ typedef unsigned int linklistsizeint; template class HierarchicalNSW : public AlgorithmInterface { - -public: + public: HierarchicalNSW(SpaceInterface *s) { } diff --git a/core/src/index/thirdparty/hnswlib/hnswlib.h b/core/src/index/thirdparty/hnswlib/hnswlib.h index ca69daedf3..f394247369 100644 --- a/core/src/index/thirdparty/hnswlib/hnswlib.h +++ b/core/src/index/thirdparty/hnswlib/hnswlib.h @@ -89,8 +89,6 @@ namespace hnswlib { virtual ~AlgorithmInterface(){ } }; - - } #include "space_l2.h" diff --git a/core/src/index/thirdparty/hnswlib/space_ip.h b/core/src/index/thirdparty/hnswlib/space_ip.h index e94674730c..4e0770a3c5 100644 --- a/core/src/index/thirdparty/hnswlib/space_ip.h +++ b/core/src/index/thirdparty/hnswlib/space_ip.h @@ -3,246 +3,243 @@ namespace hnswlib { - static float - InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) { - size_t qty = *((size_t *) qty_ptr); - float res = 0; - for (unsigned i = 0; i < qty; i++) { - res += ((float *) pVect1)[i] * ((float *) pVect2)[i]; - } - return (1.0f - res); - +static float +InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) { + size_t qty = *((size_t *) qty_ptr); + float res = 0; + for (unsigned i = 0; i < qty; i++) { + res += ((float *) pVect1)[i] * ((float *) pVect2)[i]; } + return (1.0f - res); + +} #if defined(USE_AVX) // Favor using AVX if available. - static float - InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float PORTABLE_ALIGN32 TmpRes[8]; - float *pVect1 = (float *) pVect1v; - float *pVect2 = (float *) pVect2v; - size_t qty = *((size_t *) qty_ptr); +static float +InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float PORTABLE_ALIGN32 TmpRes[8]; + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); - size_t qty16 = qty / 16; - size_t qty4 = qty / 4; + size_t qty16 = qty / 16; + size_t qty4 = qty / 4; - const float *pEnd1 = pVect1 + 16 * qty16; - const float *pEnd2 = pVect1 + 4 * qty4; + const float *pEnd1 = pVect1 + 16 * qty16; + const float *pEnd2 = pVect1 + 4 * qty4; - __m256 sum256 = _mm256_set1_ps(0); + __m256 sum256 = _mm256_set1_ps(0); - while (pVect1 < pEnd1) { - //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); + while (pVect1 < pEnd1) { + //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); - __m256 v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - __m256 v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); + __m256 v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + __m256 v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); - v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); - } + v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); + } - __m128 v1, v2; - __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); + __m128 v1, v2; + __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); - while (pVect1 < pEnd2) { - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - } + while (pVect1 < pEnd2) { + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + } - _mm_store_ps(TmpRes, sum_prod); - float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];; - return 1.0f - sum; + _mm_store_ps(TmpRes, sum_prod); + float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];; + return 1.0f - sum; } #elif defined(USE_SSE) - static float - InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float PORTABLE_ALIGN32 TmpRes[8]; - float *pVect1 = (float *) pVect1v; - float *pVect2 = (float *) pVect2v; - size_t qty = *((size_t *) qty_ptr); +static float +InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float PORTABLE_ALIGN32 TmpRes[8]; + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); - size_t qty16 = qty / 16; - size_t qty4 = qty / 4; + size_t qty16 = qty / 16; + size_t qty4 = qty / 4; - const float *pEnd1 = pVect1 + 16 * qty16; - const float *pEnd2 = pVect1 + 4 * qty4; + const float *pEnd1 = pVect1 + 16 * qty16; + const float *pEnd2 = pVect1 + 4 * qty4; - __m128 v1, v2; - __m128 sum_prod = _mm_set1_ps(0); + __m128 v1, v2; + __m128 sum_prod = _mm_set1_ps(0); - while (pVect1 < pEnd1) { - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + while (pVect1 < pEnd1) { + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - } - - while (pVect1 < pEnd2) { - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - } - - _mm_store_ps(TmpRes, sum_prod); - float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - - return 1.0f - sum; + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); } + while (pVect1 < pEnd2) { + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + } + + _mm_store_ps(TmpRes, sum_prod); + float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; + + return 1.0f - sum; +} + #endif #if defined(USE_AVX) - static float - InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float PORTABLE_ALIGN32 TmpRes[8]; - float *pVect1 = (float *) pVect1v; - float *pVect2 = (float *) pVect2v; - size_t qty = *((size_t *) qty_ptr); +static float +InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float PORTABLE_ALIGN32 TmpRes[8]; + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); - size_t qty16 = qty / 16; + size_t qty16 = qty / 16; - const float *pEnd1 = pVect1 + 16 * qty16; + const float *pEnd1 = pVect1 + 16 * qty16; - __m256 sum256 = _mm256_set1_ps(0); + __m256 sum256 = _mm256_set1_ps(0); - while (pVect1 < pEnd1) { - //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); + while (pVect1 < pEnd1) { + //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); - __m256 v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - __m256 v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); + __m256 v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + __m256 v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); - v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); - } - - _mm256_store_ps(TmpRes, sum256); - float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; - - return 1.0f - sum; + v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); } + _mm256_store_ps(TmpRes, sum256); + float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; + + return 1.0f - sum; +} + #elif defined(USE_SSE) - static float - InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float PORTABLE_ALIGN32 TmpRes[8]; - float *pVect1 = (float *) pVect1v; - float *pVect2 = (float *) pVect2v; - size_t qty = *((size_t *) qty_ptr); +static float +InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float PORTABLE_ALIGN32 TmpRes[8]; + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); - size_t qty16 = qty / 16; + size_t qty16 = qty / 16; - const float *pEnd1 = pVect1 + 16 * qty16; + const float *pEnd1 = pVect1 + 16 * qty16; - __m128 v1, v2; - __m128 sum_prod = _mm_set1_ps(0); + __m128 v1, v2; + __m128 sum_prod = _mm_set1_ps(0); - while (pVect1 < pEnd1) { - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + while (pVect1 < pEnd1) { + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); - } - _mm_store_ps(TmpRes, sum_prod); - float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); + } + _mm_store_ps(TmpRes, sum_prod); + float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - return 1.0f - sum; + return 1.0f - sum; +} + +#endif + +class InnerProductSpace : public SpaceInterface { + DISTFUNC fstdistfunc_; + size_t data_size_; + size_t dim_; + public: + InnerProductSpace(size_t dim) { + fstdistfunc_ = InnerProduct; +#if defined(USE_AVX) || defined(USE_SSE) + if (dim % 4 == 0) + fstdistfunc_ = InnerProductSIMD4Ext; + if (dim % 16 == 0) + fstdistfunc_ = InnerProductSIMD16Ext; +#endif + dim_ = dim; + data_size_ = dim * sizeof(float); } -#endif + size_t get_data_size() { + return data_size_; + } - class InnerProductSpace : public SpaceInterface { + DISTFUNC get_dist_func() { + return fstdistfunc_; + } - DISTFUNC fstdistfunc_; - size_t data_size_; - size_t dim_; - public: - InnerProductSpace(size_t dim) { - fstdistfunc_ = InnerProduct; - #if defined(USE_AVX) || defined(USE_SSE) - if (dim % 4 == 0) - fstdistfunc_ = InnerProductSIMD4Ext; - if (dim % 16 == 0) - fstdistfunc_ = InnerProductSIMD16Ext; -#endif - dim_ = dim; - data_size_ = dim * sizeof(float); - } - - size_t get_data_size() { - return data_size_; - } - - DISTFUNC get_dist_func() { - return fstdistfunc_; - } - - void *get_dist_func_param() { - return &dim_; - } + void *get_dist_func_param() { + return &dim_; + } ~InnerProductSpace() {} - }; - - +}; } diff --git a/core/src/index/thirdparty/hnswlib/space_l2.h b/core/src/index/thirdparty/hnswlib/space_l2.h index 4d3ac69ac4..7e276c5518 100644 --- a/core/src/index/thirdparty/hnswlib/space_l2.h +++ b/core/src/index/thirdparty/hnswlib/space_l2.h @@ -3,242 +3,234 @@ namespace hnswlib { - static float - L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) { - //return *((float *)pVect2); - size_t qty = *((size_t *) qty_ptr); - float res = 0; - for (unsigned i = 0; i < qty; i++) { - float t = ((float *) pVect1)[i] - ((float *) pVect2)[i]; - res += t * t; - } - return (res); - +static float +L2Sqr(const void *pVect1, const void *pVect2, const void *qty_ptr) { + //return *((float *)pVect2); + size_t qty = *((size_t *) qty_ptr); + float res = 0; + for (unsigned i = 0; i < qty; i++) { + float t = ((float *) pVect1)[i] - ((float *) pVect2)[i]; + res += t * t; } + return (res); +} #if defined(USE_AVX) - // Favor using AVX if available. - static float - L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float *pVect1 = (float *) pVect1v; - float *pVect2 = (float *) pVect2v; - size_t qty = *((size_t *) qty_ptr); - float PORTABLE_ALIGN32 TmpRes[8]; - size_t qty16 = qty >> 4; +// Favor using AVX if available. +static float +L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); + float PORTABLE_ALIGN32 TmpRes[8]; + size_t qty16 = qty >> 4; - const float *pEnd1 = pVect1 + (qty16 << 4); + const float *pEnd1 = pVect1 + (qty16 << 4); - __m256 diff, v1, v2; - __m256 sum = _mm256_set1_ps(0); + __m256 diff, v1, v2; + __m256 sum = _mm256_set1_ps(0); - while (pVect1 < pEnd1) { - v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - diff = _mm256_sub_ps(v1, v2); - sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); + while (pVect1 < pEnd1) { + v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + diff = _mm256_sub_ps(v1, v2); + sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); - v1 = _mm256_loadu_ps(pVect1); - pVect1 += 8; - v2 = _mm256_loadu_ps(pVect2); - pVect2 += 8; - diff = _mm256_sub_ps(v1, v2); - sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); - } + v1 = _mm256_loadu_ps(pVect1); + pVect1 += 8; + v2 = _mm256_loadu_ps(pVect2); + pVect2 += 8; + diff = _mm256_sub_ps(v1, v2); + sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); + } - _mm256_store_ps(TmpRes, sum); - float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; + _mm256_store_ps(TmpRes, sum); + float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; - return (res); + return (res); } #elif defined(USE_SSE) - static float - L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float *pVect1 = (float *) pVect1v; - float *pVect2 = (float *) pVect2v; - size_t qty = *((size_t *) qty_ptr); - float PORTABLE_ALIGN32 TmpRes[8]; - // size_t qty4 = qty >> 2; - size_t qty16 = qty >> 4; +static float +L2SqrSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); + float PORTABLE_ALIGN32 TmpRes[8]; + // size_t qty4 = qty >> 2; + size_t qty16 = qty >> 4; - const float *pEnd1 = pVect1 + (qty16 << 4); - // const float* pEnd2 = pVect1 + (qty4 << 2); - // const float* pEnd3 = pVect1 + qty; + const float *pEnd1 = pVect1 + (qty16 << 4); + // const float* pEnd2 = pVect1 + (qty4 << 2); + // const float* pEnd3 = pVect1 + qty; - __m128 diff, v1, v2; - __m128 sum = _mm_set1_ps(0); + __m128 diff, v1, v2; + __m128 sum = _mm_set1_ps(0); - while (pVect1 < pEnd1) { - //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + while (pVect1 < pEnd1) { + //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - } - _mm_store_ps(TmpRes, sum); - float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - - return (res); + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); } + _mm_store_ps(TmpRes, sum); + float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; + + return (res); +} #endif #ifdef USE_SSE - static float - L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float PORTABLE_ALIGN32 TmpRes[8]; - float *pVect1 = (float *) pVect1v; - float *pVect2 = (float *) pVect2v; - size_t qty = *((size_t *) qty_ptr); +static float +L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + float PORTABLE_ALIGN32 TmpRes[8]; + float *pVect1 = (float *) pVect1v; + float *pVect2 = (float *) pVect2v; + size_t qty = *((size_t *) qty_ptr); - // size_t qty4 = qty >> 2; - size_t qty16 = qty >> 2; + // size_t qty4 = qty >> 2; + size_t qty16 = qty >> 2; - const float *pEnd1 = pVect1 + (qty16 << 2); + const float *pEnd1 = pVect1 + (qty16 << 2); - __m128 diff, v1, v2; - __m128 sum = _mm_set1_ps(0); + __m128 diff, v1, v2; + __m128 sum = _mm_set1_ps(0); - while (pVect1 < pEnd1) { - v1 = _mm_loadu_ps(pVect1); - pVect1 += 4; - v2 = _mm_loadu_ps(pVect2); - pVect2 += 4; - diff = _mm_sub_ps(v1, v2); - sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); - } - _mm_store_ps(TmpRes, sum); - float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - - return (res); + while (pVect1 < pEnd1) { + v1 = _mm_loadu_ps(pVect1); + pVect1 += 4; + v2 = _mm_loadu_ps(pVect2); + pVect2 += 4; + diff = _mm_sub_ps(v1, v2); + sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); } + _mm_store_ps(TmpRes, sum); + float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; + + return (res); +} #endif - class L2Space : public SpaceInterface { - - DISTFUNC fstdistfunc_; - size_t data_size_; - size_t dim_; - public: - L2Space(size_t dim) { - fstdistfunc_ = L2Sqr; - #if defined(USE_SSE) || defined(USE_AVX) - if (dim % 4 == 0) - fstdistfunc_ = L2SqrSIMD4Ext; - if (dim % 16 == 0) - fstdistfunc_ = L2SqrSIMD16Ext; - /*else{ - throw runtime_error("Data type not supported!"); - }*/ - #endif - dim_ = dim; - data_size_ = dim * sizeof(float); - } - - size_t get_data_size() { - return data_size_; - } - - DISTFUNC get_dist_func() { - return fstdistfunc_; - } - - void *get_dist_func_param() { - return &dim_; - } - - ~L2Space() {} - }; - - static int - L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) { - - size_t qty = *((size_t *) qty_ptr); - int res = 0; - unsigned char *a = (unsigned char *) pVect1; - unsigned char *b = (unsigned char *) pVect2; - /*for (int i = 0; i < qty; i++) { - int t = int((a)[i]) - int((b)[i]); - res += t*t; +class L2Space : public SpaceInterface { + DISTFUNC fstdistfunc_; + size_t data_size_; + size_t dim_; + public: + L2Space(size_t dim) { + fstdistfunc_ = L2Sqr; +#if defined(USE_SSE) || defined(USE_AVX) + if (dim % 4 == 0) + fstdistfunc_ = L2SqrSIMD4Ext; + if (dim % 16 == 0) + fstdistfunc_ = L2SqrSIMD16Ext; + /*else{ + throw runtime_error("Data type not supported!"); }*/ - - qty = qty >> 2; - for (size_t i = 0; i < qty; i++) { - - res += ((*a) - (*b)) * ((*a) - (*b)); - a++; - b++; - res += ((*a) - (*b)) * ((*a) - (*b)); - a++; - b++; - res += ((*a) - (*b)) * ((*a) - (*b)); - a++; - b++; - res += ((*a) - (*b)) * ((*a) - (*b)); - a++; - b++; - - - } - - return (res); - +#endif + dim_ = dim; + data_size_ = dim * sizeof(float); } - class L2SpaceI : public SpaceInterface { + size_t get_data_size() { + return data_size_; + } - DISTFUNC fstdistfunc_; - size_t data_size_; - size_t dim_; - public: - L2SpaceI(size_t dim) { - fstdistfunc_ = L2SqrI; - dim_ = dim; - data_size_ = dim * sizeof(unsigned char); - } + DISTFUNC get_dist_func() { + return fstdistfunc_; + } - size_t get_data_size() { - return data_size_; - } + void *get_dist_func_param() { + return &dim_; + } - DISTFUNC get_dist_func() { - return fstdistfunc_; - } + ~L2Space() {} +}; - void *get_dist_func_param() { - return &dim_; - } +static int +L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) { + size_t qty = *((size_t *) qty_ptr); + int res = 0; + unsigned char *a = (unsigned char *) pVect1; + unsigned char *b = (unsigned char *) pVect2; + /*for (int i = 0; i < qty; i++) { + int t = int((a)[i]) - int((b)[i]); + res += t*t; + }*/ - ~L2SpaceI() {} - }; + qty = qty >> 2; + for (size_t i = 0; i < qty; i++) { + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + res += ((*a) - (*b)) * ((*a) - (*b)); + a++; + b++; + } + + return (res); +} + +class L2SpaceI : public SpaceInterface { + DISTFUNC fstdistfunc_; + size_t data_size_; + size_t dim_; + public: + L2SpaceI(size_t dim) { + fstdistfunc_ = L2SqrI; + dim_ = dim; + data_size_ = dim * sizeof(unsigned char); + } + + size_t get_data_size() { + return data_size_; + } + + DISTFUNC get_dist_func() { + return fstdistfunc_; + } + + void *get_dist_func_param() { + return &dim_; + } + + ~L2SpaceI() {} +}; } diff --git a/core/src/index/thirdparty/hnswlib/visited_list_pool.h b/core/src/index/thirdparty/hnswlib/visited_list_pool.h index 6b0f445878..457f73433d 100644 --- a/core/src/index/thirdparty/hnswlib/visited_list_pool.h +++ b/core/src/index/thirdparty/hnswlib/visited_list_pool.h @@ -4,75 +4,76 @@ #include namespace hnswlib { - typedef unsigned short int vl_type; +typedef unsigned short int vl_type; - class VisitedList { - public: - vl_type curV; - vl_type *mass; - unsigned int numelements; +class VisitedList { + public: + vl_type curV; + vl_type *mass; + unsigned int numelements; - VisitedList(int numelements1) { - curV = -1; - numelements = numelements1; - mass = new vl_type[numelements]; - } + VisitedList(int numelements1) { + curV = -1; + numelements = numelements1; + mass = new vl_type[numelements]; + } - void reset() { + void reset() { + curV++; + if (curV == 0) { + memset(mass, 0, sizeof(vl_type) * numelements); curV++; - if (curV == 0) { - memset(mass, 0, sizeof(vl_type) * numelements); - curV++; - } - }; - - ~VisitedList() { delete[] mass; } + } }; + + ~VisitedList() { delete[] mass; } +}; + /////////////////////////////////////////////////////////// // // Class for multi-threaded pool-management of VisitedLists // ///////////////////////////////////////////////////////// - class VisitedListPool { - std::deque pool; - std::mutex poolguard; - int numelements; +class VisitedListPool { + std::deque pool; + std::mutex poolguard; + int numelements; - public: - VisitedListPool(int initmaxpools, int numelements1) { - numelements = numelements1; - for (int i = 0; i < initmaxpools; i++) - pool.push_front(new VisitedList(numelements)); - } + public: + VisitedListPool(int initmaxpools, int numelements1) { + numelements = numelements1; + for (int i = 0; i < initmaxpools; i++) + pool.push_front(new VisitedList(numelements)); + } - VisitedList *getFreeVisitedList() { - VisitedList *rez; - { - std::unique_lock lock(poolguard); - if (pool.size() > 0) { - rez = pool.front(); - pool.pop_front(); - } else { - rez = new VisitedList(numelements); - } - } - rez->reset(); - return rez; - }; - - void releaseVisitedList(VisitedList *vl) { + VisitedList *getFreeVisitedList() { + VisitedList *rez; + { std::unique_lock lock(poolguard); - pool.push_front(vl); - }; - - ~VisitedListPool() { - while (pool.size()) { - VisitedList *rez = pool.front(); + if (pool.size() > 0) { + rez = pool.front(); pool.pop_front(); - delete rez; + } else { + rez = new VisitedList(numelements); } - }; + } + rez->reset(); + return rez; }; + + void releaseVisitedList(VisitedList *vl) { + std::unique_lock lock(poolguard); + pool.push_front(vl); + }; + + ~VisitedListPool() { + while (pool.size()) { + VisitedList *rez = pool.front(); + pool.pop_front(); + delete rez; + } + }; +}; }