mirror of https://github.com/milvus-io/milvus.git
fix performance (#2499)
* optimize sq_get_distance_computer Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * add sq_select_inverted_list_scanner_ref Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * add sq_select_inverted_list_scanner_avx Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * add sq_select_inverted_list_scanner_avx512 Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * optimize Codec Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * optimize ScalarQuantizerCodec_avx.h Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * code format Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * optimize ScalarQuantizerCodec_avx512.h Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * update changelog Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * clean code Signed-off-by: yudong.cai <yudong.cai@zilliz.com>pull/2501/head
parent
9f7995cbba
commit
8254e9ed67
|
@ -14,6 +14,7 @@ Please mark all change in change log and use the issue from GitHub
|
|||
|
||||
## Improvement
|
||||
- \#2381 Upgrade FAISS to 1.6.3
|
||||
- \#2429 Fix Milvus 0.9.1 performance degrade issue
|
||||
- \#2441 Improve Knowhere code coverage
|
||||
- \#2466 optimize k-selection implemention of faiss gpu version
|
||||
- \#2495 Add creating lock file failure reason.
|
||||
|
|
|
@ -26,10 +26,9 @@ fvec_func_ptr fvec_L2sqr = fvec_L2sqr_avx;
|
|||
fvec_func_ptr fvec_L1 = fvec_L1_avx;
|
||||
fvec_func_ptr fvec_Linf = fvec_Linf_avx;
|
||||
|
||||
sq_get_func_ptr sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx;
|
||||
sq_get_func_ptr sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx;
|
||||
sq_sel_func_ptr sq_sel_quantizer = sq_select_quantizer_avx;
|
||||
|
||||
sq_get_distance_computer_func_ptr sq_get_distance_computer = sq_get_distance_computer_avx;
|
||||
sq_sel_quantizer_func_ptr sq_sel_quantizer = sq_select_quantizer_avx;
|
||||
sq_sel_inv_list_scanner_func_ptr sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_avx;
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
|
@ -68,9 +67,9 @@ bool hook_init(std::string& cpu_flag) {
|
|||
fvec_Linf = fvec_Linf_avx512;
|
||||
|
||||
/* for IVFSQ */
|
||||
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx512;
|
||||
sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx512;
|
||||
sq_get_distance_computer = sq_get_distance_computer_avx512;
|
||||
sq_sel_quantizer = sq_select_quantizer_avx512;
|
||||
sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_avx512;
|
||||
|
||||
cpu_flag = "AVX512";
|
||||
} else if (support_avx2()) {
|
||||
|
@ -81,9 +80,9 @@ bool hook_init(std::string& cpu_flag) {
|
|||
fvec_Linf = fvec_Linf_avx;
|
||||
|
||||
/* for IVFSQ */
|
||||
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx;
|
||||
sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx;
|
||||
sq_get_distance_computer = sq_get_distance_computer_avx;
|
||||
sq_sel_quantizer = sq_select_quantizer_avx;
|
||||
sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_avx;
|
||||
|
||||
cpu_flag = "AVX2";
|
||||
} else if (support_sse()) {
|
||||
|
@ -94,9 +93,9 @@ bool hook_init(std::string& cpu_flag) {
|
|||
fvec_Linf = fvec_Linf_sse;
|
||||
|
||||
/* for IVFSQ */
|
||||
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_sse;
|
||||
sq_get_distance_computer_IP = sq_get_distance_computer_IP_sse;
|
||||
sq_sel_quantizer = sq_select_quantizer_sse;
|
||||
sq_get_distance_computer = sq_get_distance_computer_ref;
|
||||
sq_sel_quantizer = sq_select_quantizer_ref;
|
||||
sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_ref;
|
||||
|
||||
cpu_flag = "SSE42";
|
||||
} else {
|
||||
|
|
|
@ -6,15 +6,17 @@
|
|||
#include <vector>
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
#include <faiss/impl/ScalarQuantizer.h>
|
||||
#include <faiss/impl/ScalarQuantizerOp.h>
|
||||
#include <faiss/MetricType.h>
|
||||
|
||||
namespace faiss {
|
||||
|
||||
typedef float (*fvec_func_ptr)(const float*, const float*, size_t);
|
||||
|
||||
typedef SQDistanceComputer* (*sq_get_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
|
||||
typedef Quantizer* (*sq_sel_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
|
||||
|
||||
typedef SQDistanceComputer* (*sq_get_distance_computer_func_ptr)(MetricType, QuantizerType, size_t, const std::vector<float>&);
|
||||
typedef Quantizer* (*sq_sel_quantizer_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
|
||||
typedef InvertedListScanner* (*sq_sel_inv_list_scanner_func_ptr)(MetricType, const ScalarQuantizer*, const Index*, size_t, bool, bool);
|
||||
|
||||
extern bool faiss_use_avx512;
|
||||
extern bool faiss_use_avx2;
|
||||
|
@ -25,9 +27,9 @@ extern fvec_func_ptr fvec_L2sqr;
|
|||
extern fvec_func_ptr fvec_L1;
|
||||
extern fvec_func_ptr fvec_Linf;
|
||||
|
||||
extern sq_get_func_ptr sq_get_distance_computer_L2;
|
||||
extern sq_get_func_ptr sq_get_distance_computer_IP;
|
||||
extern sq_sel_func_ptr sq_sel_quantizer;
|
||||
extern sq_get_distance_computer_func_ptr sq_get_distance_computer;
|
||||
extern sq_sel_quantizer_func_ptr sq_sel_quantizer;
|
||||
extern sq_sel_inv_list_scanner_func_ptr sq_sel_inv_list_scanner;
|
||||
|
||||
extern bool support_avx512();
|
||||
extern bool support_avx2();
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include <faiss/FaissHook.h>
|
||||
#include <faiss/utils/utils.h>
|
||||
#include <faiss/impl/FaissAssert.h>
|
||||
#include <faiss/impl/ScalarQuantizerCodec.h>
|
||||
|
||||
namespace faiss {
|
||||
|
||||
|
@ -160,11 +159,7 @@ ScalarQuantizer::get_distance_computer (MetricType metric) const
|
|||
{
|
||||
FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
|
||||
/* use hook to decide use AVX512 or not */
|
||||
if (metric == METRIC_L2) {
|
||||
return sq_get_distance_computer_L2(qtype, d, trained);
|
||||
} else {
|
||||
return sq_get_distance_computer_IP(qtype, d, trained);
|
||||
}
|
||||
return sq_get_distance_computer(metric, qtype, d, trained);
|
||||
}
|
||||
|
||||
|
||||
|
@ -175,276 +170,13 @@ ScalarQuantizer::get_distance_computer (MetricType metric) const
|
|||
* IndexScalarQuantizer as well.
|
||||
********************************************************************/
|
||||
|
||||
namespace {
|
||||
|
||||
template<class DCClass>
|
||||
struct IVFSQScannerIP: InvertedListScanner {
|
||||
DCClass dc;
|
||||
bool store_pairs, by_residual;
|
||||
|
||||
size_t code_size;
|
||||
|
||||
idx_t list_no; /// current list (set to 0 for Flat index
|
||||
float accu0; /// added to all distances
|
||||
|
||||
IVFSQScannerIP(int d, const std::vector<float> & trained,
|
||||
size_t code_size, bool store_pairs,
|
||||
bool by_residual):
|
||||
dc(d, trained), store_pairs(store_pairs),
|
||||
by_residual(by_residual),
|
||||
code_size(code_size), list_no(0), accu0(0)
|
||||
{}
|
||||
|
||||
|
||||
void set_query (const float *query) override {
|
||||
dc.set_query (query);
|
||||
}
|
||||
|
||||
void set_list (idx_t list_no, float coarse_dis) override {
|
||||
this->list_no = list_no;
|
||||
accu0 = by_residual ? coarse_dis : 0;
|
||||
}
|
||||
|
||||
float distance_to_code (const uint8_t *code) const final {
|
||||
return accu0 + dc.query_to_code (code);
|
||||
}
|
||||
|
||||
size_t scan_codes (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float *simi, idx_t *idxi,
|
||||
size_t k,
|
||||
ConcurrentBitsetPtr bitset) const override
|
||||
{
|
||||
size_t nup = 0;
|
||||
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
if(!bitset || !bitset->test(ids[j])){
|
||||
float accu = accu0 + dc.query_to_code (codes);
|
||||
|
||||
if (accu > simi [0]) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
minheap_swap_top (k, simi, idxi, accu, id);
|
||||
nup++;
|
||||
}
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
return nup;
|
||||
}
|
||||
|
||||
void scan_codes_range (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float radius,
|
||||
RangeQueryResult & res,
|
||||
ConcurrentBitsetPtr bitset = nullptr) const override
|
||||
{
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
float accu = accu0 + dc.query_to_code (codes);
|
||||
if (accu > radius) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
res.add (accu, id);
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class DCClass>
|
||||
struct IVFSQScannerL2: InvertedListScanner {
|
||||
DCClass dc;
|
||||
|
||||
bool store_pairs, by_residual;
|
||||
size_t code_size;
|
||||
const Index *quantizer;
|
||||
idx_t list_no; /// current inverted list
|
||||
const float *x; /// current query
|
||||
|
||||
std::vector<float> tmp;
|
||||
|
||||
IVFSQScannerL2(int d, const std::vector<float> & trained,
|
||||
size_t code_size, const Index *quantizer,
|
||||
bool store_pairs, bool by_residual):
|
||||
dc(d, trained), store_pairs(store_pairs), by_residual(by_residual),
|
||||
code_size(code_size), quantizer(quantizer),
|
||||
list_no (0), x (nullptr), tmp (d)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void set_query (const float *query) override {
|
||||
x = query;
|
||||
if (!quantizer) {
|
||||
dc.set_query (query);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void set_list (idx_t list_no, float /*coarse_dis*/) override {
|
||||
if (by_residual) {
|
||||
this->list_no = list_no;
|
||||
// shift of x_in wrt centroid
|
||||
quantizer->Index::compute_residual (x, tmp.data(), list_no);
|
||||
dc.set_query (tmp.data ());
|
||||
} else {
|
||||
dc.set_query (x);
|
||||
}
|
||||
}
|
||||
|
||||
float distance_to_code (const uint8_t *code) const final {
|
||||
return dc.query_to_code (code);
|
||||
}
|
||||
|
||||
size_t scan_codes (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float *simi, idx_t *idxi,
|
||||
size_t k,
|
||||
ConcurrentBitsetPtr bitset) const override
|
||||
{
|
||||
size_t nup = 0;
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
if(!bitset || !bitset->test(ids[j])){
|
||||
float dis = dc.query_to_code (codes);
|
||||
|
||||
if (dis < simi [0]) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
maxheap_swap_top (k, simi, idxi, dis, id);
|
||||
nup++;
|
||||
}
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
return nup;
|
||||
}
|
||||
|
||||
void scan_codes_range (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float radius,
|
||||
RangeQueryResult & res,
|
||||
ConcurrentBitsetPtr bitset = nullptr) const override
|
||||
{
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
float dis = dc.query_to_code (codes);
|
||||
if (dis < radius) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
res.add (dis, id);
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class DCClass>
|
||||
InvertedListScanner* sel2_InvertedListScanner
|
||||
(const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool r)
|
||||
{
|
||||
if (DCClass::Sim::metric_type == METRIC_L2) {
|
||||
return new IVFSQScannerL2<DCClass>(sq->d, sq->trained, sq->code_size,
|
||||
quantizer, store_pairs, r);
|
||||
} else if (DCClass::Sim::metric_type == METRIC_INNER_PRODUCT) {
|
||||
return new IVFSQScannerIP<DCClass>(sq->d, sq->trained, sq->code_size,
|
||||
store_pairs, r);
|
||||
} else {
|
||||
FAISS_THROW_MSG("unsupported metric type");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Similarity, class Codec, bool uniform>
|
||||
InvertedListScanner* sel12_InvertedListScanner
|
||||
(const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool r)
|
||||
{
|
||||
constexpr int SIMDWIDTH = Similarity::simdwidth;
|
||||
using QuantizerClass = QuantizerTemplate<Codec, uniform, SIMDWIDTH>;
|
||||
using DCClass = DCTemplate<QuantizerClass, Similarity, SIMDWIDTH>;
|
||||
return sel2_InvertedListScanner<DCClass> (sq, quantizer, store_pairs, r);
|
||||
}
|
||||
|
||||
|
||||
template<class Similarity>
|
||||
InvertedListScanner* sel1_InvertedListScanner
|
||||
(const ScalarQuantizer *sq, const Index *quantizer,
|
||||
bool store_pairs, bool r)
|
||||
{
|
||||
constexpr int SIMDWIDTH = Similarity::simdwidth;
|
||||
switch(sq->qtype) {
|
||||
case QuantizerType::QT_8bit_uniform:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec8bit, true>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_4bit_uniform:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec4bit, true>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_8bit:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec8bit, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_4bit:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec4bit, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_6bit:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec6bit, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_fp16:
|
||||
return sel2_InvertedListScanner
|
||||
<DCTemplate<QuantizerFP16<SIMDWIDTH>, Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_8bit_direct:
|
||||
if (sq->d % 16 == 0) {
|
||||
return sel2_InvertedListScanner
|
||||
<DistanceComputerByte<Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
} else {
|
||||
return sel2_InvertedListScanner
|
||||
<DCTemplate<Quantizer8bitDirect<SIMDWIDTH>,
|
||||
Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
}
|
||||
}
|
||||
|
||||
FAISS_THROW_MSG ("unknown qtype");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template<int SIMDWIDTH>
|
||||
InvertedListScanner* sel0_InvertedListScanner
|
||||
(MetricType mt, const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool by_residual)
|
||||
{
|
||||
if (mt == METRIC_L2) {
|
||||
return sel1_InvertedListScanner<SimilarityL2<SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, by_residual);
|
||||
} else if (mt == METRIC_INNER_PRODUCT) {
|
||||
return sel1_InvertedListScanner<SimilarityIP<SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, by_residual);
|
||||
} else {
|
||||
FAISS_THROW_MSG("unsupported metric type");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
InvertedListScanner* ScalarQuantizer::select_InvertedListScanner
|
||||
(MetricType mt, const Index *quantizer,
|
||||
bool store_pairs, bool by_residual) const
|
||||
{
|
||||
if (d % 16 == 0 && support_avx512()) {
|
||||
return sel0_InvertedListScanner<16>
|
||||
(mt, this, quantizer, store_pairs, by_residual);
|
||||
} if (d % 8 == 0) {
|
||||
return sel0_InvertedListScanner<8>
|
||||
(mt, this, quantizer, store_pairs, by_residual);
|
||||
} else {
|
||||
return sel0_InvertedListScanner<1>
|
||||
(mt, this, quantizer, store_pairs, by_residual);
|
||||
}
|
||||
/* use hook to decide use AVX512 or not */
|
||||
return sq_sel_inv_list_scanner(mt, this, quantizer, d, store_pairs, by_residual);
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -77,6 +77,166 @@ struct ScalarQuantizer {
|
|||
|
||||
};
|
||||
|
||||
template<class DCClass>
|
||||
struct IVFSQScannerIP: InvertedListScanner {
|
||||
DCClass dc;
|
||||
bool store_pairs, by_residual;
|
||||
|
||||
size_t code_size;
|
||||
|
||||
idx_t list_no; /// current list (set to 0 for Flat index
|
||||
float accu0; /// added to all distances
|
||||
|
||||
IVFSQScannerIP(int d, const std::vector<float> & trained,
|
||||
size_t code_size, bool store_pairs,
|
||||
bool by_residual):
|
||||
dc(d, trained), store_pairs(store_pairs),
|
||||
by_residual(by_residual),
|
||||
code_size(code_size), list_no(0), accu0(0)
|
||||
{}
|
||||
|
||||
|
||||
void set_query (const float *query) override {
|
||||
dc.set_query (query);
|
||||
}
|
||||
|
||||
void set_list (idx_t list_no, float coarse_dis) override {
|
||||
this->list_no = list_no;
|
||||
accu0 = by_residual ? coarse_dis : 0;
|
||||
}
|
||||
|
||||
float distance_to_code (const uint8_t *code) const final {
|
||||
return accu0 + dc.query_to_code (code);
|
||||
}
|
||||
|
||||
size_t scan_codes (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float *simi, idx_t *idxi,
|
||||
size_t k,
|
||||
ConcurrentBitsetPtr bitset) const override
|
||||
{
|
||||
size_t nup = 0;
|
||||
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
if(!bitset || !bitset->test(ids[j])){
|
||||
float accu = accu0 + dc.query_to_code (codes);
|
||||
|
||||
if (accu > simi [0]) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
minheap_swap_top (k, simi, idxi, accu, id);
|
||||
nup++;
|
||||
}
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
return nup;
|
||||
}
|
||||
|
||||
void scan_codes_range (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float radius,
|
||||
RangeQueryResult & res,
|
||||
ConcurrentBitsetPtr bitset = nullptr) const override
|
||||
{
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
float accu = accu0 + dc.query_to_code (codes);
|
||||
if (accu > radius) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
res.add (accu, id);
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class DCClass>
|
||||
struct IVFSQScannerL2: InvertedListScanner {
|
||||
DCClass dc;
|
||||
|
||||
bool store_pairs, by_residual;
|
||||
size_t code_size;
|
||||
const Index *quantizer;
|
||||
idx_t list_no; /// current inverted list
|
||||
const float *x; /// current query
|
||||
|
||||
std::vector<float> tmp;
|
||||
|
||||
IVFSQScannerL2(int d, const std::vector<float> & trained,
|
||||
size_t code_size, const Index *quantizer,
|
||||
bool store_pairs, bool by_residual):
|
||||
dc(d, trained), store_pairs(store_pairs), by_residual(by_residual),
|
||||
code_size(code_size), quantizer(quantizer),
|
||||
list_no (0), x (nullptr), tmp (d)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void set_query (const float *query) override {
|
||||
x = query;
|
||||
if (!quantizer) {
|
||||
dc.set_query (query);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void set_list (idx_t list_no, float /*coarse_dis*/) override {
|
||||
if (by_residual) {
|
||||
this->list_no = list_no;
|
||||
// shift of x_in wrt centroid
|
||||
quantizer->Index::compute_residual (x, tmp.data(), list_no);
|
||||
dc.set_query (tmp.data ());
|
||||
} else {
|
||||
dc.set_query (x);
|
||||
}
|
||||
}
|
||||
|
||||
float distance_to_code (const uint8_t *code) const final {
|
||||
return dc.query_to_code (code);
|
||||
}
|
||||
|
||||
size_t scan_codes (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float *simi, idx_t *idxi,
|
||||
size_t k,
|
||||
ConcurrentBitsetPtr bitset) const override
|
||||
{
|
||||
size_t nup = 0;
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
if(!bitset || !bitset->test(ids[j])){
|
||||
float dis = dc.query_to_code (codes);
|
||||
|
||||
if (dis < simi [0]) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
maxheap_swap_top (k, simi, idxi, dis, id);
|
||||
nup++;
|
||||
}
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
return nup;
|
||||
}
|
||||
|
||||
void scan_codes_range (size_t list_size,
|
||||
const uint8_t *codes,
|
||||
const idx_t *ids,
|
||||
float radius,
|
||||
RangeQueryResult & res,
|
||||
ConcurrentBitsetPtr bitset = nullptr) const override
|
||||
{
|
||||
for (size_t j = 0; j < list_size; j++) {
|
||||
float dis = dc.query_to_code (codes);
|
||||
if (dis < radius) {
|
||||
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
||||
res.add (dis, id);
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include <faiss/utils/utils.h>
|
||||
#include <faiss/impl/FaissAssert.h>
|
||||
#include <faiss/impl/ScalarQuantizer.h>
|
||||
#include <faiss/impl/ScalarQuantizerOp.h>
|
||||
|
||||
namespace faiss {
|
||||
|
@ -311,22 +312,6 @@ struct SimilarityL2<1> {
|
|||
}
|
||||
};
|
||||
|
||||
/* as same as SimilarityL2<1>, let build pass */
|
||||
template<>
|
||||
struct SimilarityL2<8> : SimilarityL2<1> {
|
||||
static constexpr int simdwidth = 1;
|
||||
static constexpr MetricType metric_type = METRIC_L2;
|
||||
explicit SimilarityL2 (const float * y) : SimilarityL2<1>(y) {}
|
||||
};
|
||||
|
||||
/* as same as SimilarityL2<1>, let build pass */
|
||||
template<>
|
||||
struct SimilarityL2<16> : SimilarityL2<1> {
|
||||
static constexpr int simdwidth = 1;
|
||||
static constexpr MetricType metric_type = METRIC_L2;
|
||||
explicit SimilarityL2 (const float * y) : SimilarityL2<1>(y) {}
|
||||
};
|
||||
|
||||
|
||||
template<int SIMDWIDTH>
|
||||
struct SimilarityIP {};
|
||||
|
@ -360,22 +345,6 @@ struct SimilarityIP<1> {
|
|||
}
|
||||
};
|
||||
|
||||
/* as same as SimilarityIP<1>, let build pass */
|
||||
template<>
|
||||
struct SimilarityIP<8> : SimilarityIP<1> {
|
||||
static constexpr int simdwidth = 1;
|
||||
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
|
||||
explicit SimilarityIP (const float * y) : SimilarityIP<1>(y) {}
|
||||
};
|
||||
|
||||
/* as same as SimilarityIP<1>, let build pass */
|
||||
template<>
|
||||
struct SimilarityIP<16> : SimilarityIP<1> {
|
||||
static constexpr int simdwidth = 1;
|
||||
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
|
||||
explicit SimilarityIP (const float * y) : SimilarityIP<1>(y) {}
|
||||
};
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* DistanceComputer: combines a similarity and a quantizer to do
|
||||
|
@ -544,5 +513,91 @@ SQDistanceComputer *select_distance_computer (
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
template<class DCClass>
|
||||
InvertedListScanner* sel2_InvertedListScanner (
|
||||
const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool r)
|
||||
{
|
||||
if (DCClass::Sim::metric_type == METRIC_L2) {
|
||||
return new IVFSQScannerL2<DCClass>(sq->d, sq->trained, sq->code_size,
|
||||
quantizer, store_pairs, r);
|
||||
} else if (DCClass::Sim::metric_type == METRIC_INNER_PRODUCT) {
|
||||
return new IVFSQScannerIP<DCClass>(sq->d, sq->trained, sq->code_size,
|
||||
store_pairs, r);
|
||||
} else {
|
||||
FAISS_THROW_MSG("unsupported metric type");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Similarity, class Codec, bool uniform>
|
||||
InvertedListScanner* sel12_InvertedListScanner (
|
||||
const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool r)
|
||||
{
|
||||
constexpr int SIMDWIDTH = Similarity::simdwidth;
|
||||
using QuantizerClass = QuantizerTemplate<Codec, uniform, SIMDWIDTH>;
|
||||
using DCClass = DCTemplate<QuantizerClass, Similarity, SIMDWIDTH>;
|
||||
return sel2_InvertedListScanner<DCClass> (sq, quantizer, store_pairs, r);
|
||||
}
|
||||
|
||||
|
||||
template<class Similarity>
|
||||
InvertedListScanner* sel1_InvertedListScanner (
|
||||
const ScalarQuantizer *sq, const Index *quantizer,
|
||||
bool store_pairs, bool r)
|
||||
{
|
||||
constexpr int SIMDWIDTH = Similarity::simdwidth;
|
||||
switch(sq->qtype) {
|
||||
case QuantizerType::QT_8bit_uniform:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec8bit, true>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_4bit_uniform:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec4bit, true>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_8bit:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec8bit, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_4bit:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec4bit, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_6bit:
|
||||
return sel12_InvertedListScanner
|
||||
<Similarity, Codec6bit, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_fp16:
|
||||
return sel2_InvertedListScanner
|
||||
<DCTemplate<QuantizerFP16<SIMDWIDTH>, Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_8bit_direct:
|
||||
if (sq->d % 16 == 0) {
|
||||
return sel2_InvertedListScanner
|
||||
<DistanceComputerByte<Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
} else {
|
||||
return sel2_InvertedListScanner
|
||||
<DCTemplate<Quantizer8bitDirect<SIMDWIDTH>,
|
||||
Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
}
|
||||
}
|
||||
|
||||
FAISS_THROW_MSG ("unknown qtype");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template<int SIMDWIDTH>
|
||||
InvertedListScanner* sel0_InvertedListScanner (
|
||||
MetricType mt, const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool by_residual)
|
||||
{
|
||||
if (mt == METRIC_L2) {
|
||||
return sel1_InvertedListScanner<SimilarityL2<SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, by_residual);
|
||||
} else if (mt == METRIC_INNER_PRODUCT) {
|
||||
return sel1_InvertedListScanner<SimilarityIP<SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, by_residual);
|
||||
} else {
|
||||
FAISS_THROW_MSG("unsupported metric type");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -11,41 +11,24 @@
|
|||
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#ifdef __SSE__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include <faiss/utils/utils.h>
|
||||
#include <faiss/impl/FaissAssert.h>
|
||||
#include <faiss/impl/ScalarQuantizer.h>
|
||||
#include <faiss/impl/ScalarQuantizerCodec.h>
|
||||
#include <faiss/impl/ScalarQuantizerOp.h>
|
||||
|
||||
namespace faiss {
|
||||
|
||||
|
||||
#ifdef __AVX__
|
||||
#define USE_AVX
|
||||
#endif
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* Codec: converts between values in [0, 1] and an index in a code
|
||||
* array. The "i" parameter is the vector component index (not byte
|
||||
* index).
|
||||
*/
|
||||
|
||||
struct Codec8bit_avx {
|
||||
static void encode_component (float x, uint8_t *code, int i) {
|
||||
code[i] = (int)(255 * x);
|
||||
}
|
||||
|
||||
static float decode_component (const uint8_t *code, int i) {
|
||||
return (code[i] + 0.5f) / 255.0f;
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
struct Codec8bit_avx : public Codec8bit {
|
||||
static __m256 decode_8_components (const uint8_t *code, int i) {
|
||||
uint64_t c8 = *(uint64_t*)(code + i);
|
||||
__m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8));
|
||||
|
@ -59,20 +42,9 @@ struct Codec8bit_avx {
|
|||
__m256 one_255 = _mm256_set1_ps (1.f / 255.f);
|
||||
return f8 * one_255;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
struct Codec4bit_avx {
|
||||
static void encode_component (float x, uint8_t *code, int i) {
|
||||
code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
|
||||
}
|
||||
|
||||
static float decode_component (const uint8_t *code, int i) {
|
||||
return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
struct Codec4bit_avx : public Codec4bit {
|
||||
static __m256 decode_8_components (const uint8_t *code, int i) {
|
||||
uint32_t c4 = *(uint32_t*)(code + (i >> 1));
|
||||
uint32_t mask = 0x0f0f0f0f;
|
||||
|
@ -92,54 +64,9 @@ struct Codec4bit_avx {
|
|||
__m256 one_255 = _mm256_set1_ps (1.f / 15.f);
|
||||
return f8 * one_255;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
struct Codec6bit_avx {
|
||||
static void encode_component (float x, uint8_t *code, int i) {
|
||||
int bits = (int)(x * 63.0);
|
||||
code += (i >> 2) * 3;
|
||||
switch(i & 3) {
|
||||
case 0:
|
||||
code[0] |= bits;
|
||||
break;
|
||||
case 1:
|
||||
code[0] |= bits << 6;
|
||||
code[1] |= bits >> 2;
|
||||
break;
|
||||
case 2:
|
||||
code[1] |= bits << 4;
|
||||
code[2] |= bits >> 4;
|
||||
break;
|
||||
case 3:
|
||||
code[2] |= bits << 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static float decode_component (const uint8_t *code, int i) {
|
||||
uint8_t bits;
|
||||
code += (i >> 2) * 3;
|
||||
switch(i & 3) {
|
||||
case 0:
|
||||
bits = code[0] & 0x3f;
|
||||
break;
|
||||
case 1:
|
||||
bits = code[0] >> 6;
|
||||
bits |= (code[1] & 0xf) << 2;
|
||||
break;
|
||||
case 2:
|
||||
bits = code[1] >> 4;
|
||||
bits |= (code[2] & 3) << 4;
|
||||
break;
|
||||
case 3:
|
||||
bits = code[2] >> 2;
|
||||
break;
|
||||
}
|
||||
return (bits + 0.5f) / 63.0f;
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
struct Codec6bit_avx : public Codec6bit {
|
||||
static __m256 decode_8_components (const uint8_t *code, int i) {
|
||||
return _mm256_set_ps
|
||||
(decode_component(code, i + 7),
|
||||
|
@ -151,127 +78,51 @@ struct Codec6bit_avx {
|
|||
decode_component(code, i + 1),
|
||||
decode_component(code, i + 0));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* Quantizer: normalizes scalar vector components, then passes them
|
||||
* through a codec
|
||||
*******************************************************************/
|
||||
|
||||
|
||||
template<class Codec, bool uniform, int SIMD>
|
||||
struct QuantizerTemplate_avx {};
|
||||
|
||||
|
||||
template<class Codec>
|
||||
struct QuantizerTemplate_avx<Codec, true, 1>: Quantizer {
|
||||
const size_t d;
|
||||
const float vmin, vdiff;
|
||||
|
||||
QuantizerTemplate_avx(size_t d, const std::vector<float> &trained):
|
||||
d(d), vmin(trained[0]), vdiff(trained[1])
|
||||
{
|
||||
}
|
||||
|
||||
void encode_vector(const float* x, uint8_t* code) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = (x[i] - vmin) / vdiff;
|
||||
if (xi < 0) {
|
||||
xi = 0;
|
||||
}
|
||||
if (xi > 1.0) {
|
||||
xi = 1.0;
|
||||
}
|
||||
Codec::encode_component(xi, code, i);
|
||||
}
|
||||
}
|
||||
|
||||
void decode_vector(const uint8_t* code, float* x) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = Codec::decode_component(code, i);
|
||||
x[i] = vmin + xi * vdiff;
|
||||
}
|
||||
}
|
||||
|
||||
float reconstruct_component (const uint8_t * code, int i) const
|
||||
{
|
||||
float xi = Codec::decode_component (code, i);
|
||||
return vmin + xi * vdiff;
|
||||
}
|
||||
struct QuantizerTemplate_avx<Codec, true, 1> : public QuantizerTemplate<Codec, true, 1> {
|
||||
QuantizerTemplate_avx(size_t d, const std::vector<float> &trained) :
|
||||
QuantizerTemplate<Codec, true, 1> (d, trained) {}
|
||||
};
|
||||
|
||||
|
||||
|
||||
#ifdef USE_AVX
|
||||
|
||||
template<class Codec>
|
||||
struct QuantizerTemplate_avx<Codec, true, 8>: QuantizerTemplate_avx<Codec, true, 1> {
|
||||
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained):
|
||||
QuantizerTemplate_avx<Codec, true, 1> (d, trained) {}
|
||||
struct QuantizerTemplate_avx<Codec, true, 8> : public QuantizerTemplate<Codec, true, 1> {
|
||||
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained) :
|
||||
QuantizerTemplate<Codec, true, 1> (d, trained) {}
|
||||
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const
|
||||
{
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
|
||||
__m256 xi = Codec::decode_8_components (code, i);
|
||||
return _mm256_set1_ps(this->vmin) + xi * _mm256_set1_ps (this->vdiff);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
template<class Codec>
|
||||
struct QuantizerTemplate_avx<Codec, false, 1>: Quantizer {
|
||||
const size_t d;
|
||||
const float *vmin, *vdiff;
|
||||
|
||||
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained):
|
||||
d(d), vmin(trained.data()), vdiff(trained.data() + d) {}
|
||||
|
||||
void encode_vector(const float* x, uint8_t* code) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = (x[i] - vmin[i]) / vdiff[i];
|
||||
if (xi < 0)
|
||||
xi = 0;
|
||||
if (xi > 1.0)
|
||||
xi = 1.0;
|
||||
Codec::encode_component(xi, code, i);
|
||||
}
|
||||
}
|
||||
|
||||
void decode_vector(const uint8_t* code, float* x) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = Codec::decode_component(code, i);
|
||||
x[i] = vmin[i] + xi * vdiff[i];
|
||||
}
|
||||
}
|
||||
|
||||
float reconstruct_component (const uint8_t * code, int i) const
|
||||
{
|
||||
float xi = Codec::decode_component (code, i);
|
||||
return vmin[i] + xi * vdiff[i];
|
||||
}
|
||||
struct QuantizerTemplate_avx<Codec, false, 1> : public QuantizerTemplate<Codec, false, 1> {
|
||||
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained) :
|
||||
QuantizerTemplate<Codec, false, 1> (d, trained) {}
|
||||
};
|
||||
|
||||
|
||||
#ifdef USE_AVX
|
||||
|
||||
template<class Codec>
|
||||
struct QuantizerTemplate_avx<Codec, false, 8>: QuantizerTemplate_avx<Codec, false, 1> {
|
||||
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained):
|
||||
QuantizerTemplate_avx<Codec, false, 1> (d, trained) {}
|
||||
struct QuantizerTemplate_avx<Codec, false, 8>: public QuantizerTemplate<Codec, false, 1> {
|
||||
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained) :
|
||||
QuantizerTemplate<Codec, false, 1> (d, trained) {}
|
||||
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const
|
||||
{
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
|
||||
__m256 xi = Codec::decode_8_components (code, i);
|
||||
return _mm256_loadu_ps (this->vmin + i) + xi * _mm256_loadu_ps (this->vdiff + i);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/*******************************************************************
|
||||
* FP16 quantizer
|
||||
|
@ -281,45 +132,22 @@ template<int SIMDWIDTH>
|
|||
struct QuantizerFP16_avx {};
|
||||
|
||||
template<>
|
||||
struct QuantizerFP16_avx<1>: Quantizer {
|
||||
const size_t d;
|
||||
|
||||
QuantizerFP16_avx(size_t d, const std::vector<float> & /* unused */):
|
||||
d(d) {}
|
||||
|
||||
void encode_vector(const float* x, uint8_t* code) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
((uint16_t*)code)[i] = encode_fp16(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void decode_vector(const uint8_t* code, float* x) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
x[i] = decode_fp16(((uint16_t*)code)[i]);
|
||||
}
|
||||
}
|
||||
|
||||
float reconstruct_component (const uint8_t * code, int i) const
|
||||
{
|
||||
return decode_fp16(((uint16_t*)code)[i]);
|
||||
}
|
||||
struct QuantizerFP16_avx<1> : public QuantizerFP16<1> {
|
||||
QuantizerFP16_avx (size_t d, const std::vector<float> &unused) :
|
||||
QuantizerFP16<1> (d, unused) {}
|
||||
};
|
||||
|
||||
#ifdef USE_AVX
|
||||
|
||||
template<>
|
||||
struct QuantizerFP16_avx<8>: QuantizerFP16_avx<1> {
|
||||
struct QuantizerFP16_avx<8>: public QuantizerFP16<1> {
|
||||
QuantizerFP16_avx (size_t d, const std::vector<float> &trained):
|
||||
QuantizerFP16_avx<1> (d, trained) {}
|
||||
QuantizerFP16<1> (d, trained) {}
|
||||
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const
|
||||
{
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
|
||||
__m128i codei = _mm_loadu_si128 ((const __m128i*)(code + 2 * i));
|
||||
return _mm256_cvtph_ps (codei);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/*******************************************************************
|
||||
* 8bit_direct quantizer
|
||||
|
@ -329,75 +157,46 @@ template<int SIMDWIDTH>
|
|||
struct Quantizer8bitDirect_avx {};
|
||||
|
||||
template<>
|
||||
struct Quantizer8bitDirect_avx<1>: Quantizer {
|
||||
const size_t d;
|
||||
|
||||
Quantizer8bitDirect_avx(size_t d, const std::vector<float> & /* unused */):
|
||||
d(d) {}
|
||||
|
||||
|
||||
void encode_vector(const float* x, uint8_t* code) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
code[i] = (uint8_t)x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void decode_vector(const uint8_t* code, float* x) const final {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
x[i] = code[i];
|
||||
}
|
||||
}
|
||||
|
||||
float reconstruct_component (const uint8_t * code, int i) const
|
||||
{
|
||||
return code[i];
|
||||
}
|
||||
struct Quantizer8bitDirect_avx<1> : public Quantizer8bitDirect<1> {
|
||||
Quantizer8bitDirect_avx (size_t d, const std::vector<float> &unused) :
|
||||
Quantizer8bitDirect(d, unused) {}
|
||||
};
|
||||
|
||||
#ifdef USE_AVX
|
||||
|
||||
template<>
|
||||
struct Quantizer8bitDirect_avx<8>: Quantizer8bitDirect_avx<1> {
|
||||
Quantizer8bitDirect_avx (size_t d, const std::vector<float> &trained):
|
||||
Quantizer8bitDirect_avx<1> (d, trained) {}
|
||||
struct Quantizer8bitDirect_avx<8>: public Quantizer8bitDirect<1> {
|
||||
Quantizer8bitDirect_avx (size_t d, const std::vector<float> &trained) :
|
||||
Quantizer8bitDirect<1> (d, trained) {}
|
||||
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const
|
||||
{
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
|
||||
__m128i x8 = _mm_loadl_epi64((__m128i*)(code + i)); // 8 * int8
|
||||
__m256i y8 = _mm256_cvtepu8_epi32 (x8); // 8 * int32
|
||||
return _mm256_cvtepi32_ps (y8); // 8 * float32
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
template<int SIMDWIDTH>
|
||||
Quantizer *select_quantizer_1_avx (
|
||||
QuantizerType qtype,
|
||||
size_t d, const std::vector<float> & trained)
|
||||
{
|
||||
Quantizer *select_quantizer_1_avx (QuantizerType qtype, size_t d,
|
||||
const std::vector<float> & trained) {
|
||||
switch(qtype) {
|
||||
case QuantizerType::QT_8bit:
|
||||
return new QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_6bit:
|
||||
return new QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_4bit:
|
||||
return new QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_8bit_uniform:
|
||||
return new QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_4bit_uniform:
|
||||
return new QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_fp16:
|
||||
return new QuantizerFP16_avx<SIMDWIDTH> (d, trained);
|
||||
case QuantizerType::QT_8bit_direct:
|
||||
return new Quantizer8bitDirect_avx<SIMDWIDTH> (d, trained);
|
||||
case QuantizerType::QT_8bit:
|
||||
return new QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_6bit:
|
||||
return new QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_4bit:
|
||||
return new QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_8bit_uniform:
|
||||
return new QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_4bit_uniform:
|
||||
return new QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_fp16:
|
||||
return new QuantizerFP16_avx<SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_8bit_direct:
|
||||
return new Quantizer8bitDirect_avx<SIMDWIDTH>(d, trained);
|
||||
}
|
||||
FAISS_THROW_MSG ("unknown qtype");
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* Similarity: gets vector components and computes a similarity wrt. a
|
||||
* query vector stored in the object. The data fields just encapsulate
|
||||
|
@ -407,42 +206,14 @@ Quantizer *select_quantizer_1_avx (
|
|||
template<int SIMDWIDTH>
|
||||
struct SimilarityL2_avx {};
|
||||
|
||||
|
||||
template<>
|
||||
struct SimilarityL2_avx<1> {
|
||||
struct SimilarityL2_avx<1> : public SimilarityL2<1> {
|
||||
static constexpr int simdwidth = 1;
|
||||
static constexpr MetricType metric_type = METRIC_L2;
|
||||
|
||||
const float *y, *yi;
|
||||
|
||||
explicit SimilarityL2_avx (const float * y): y(y) {}
|
||||
|
||||
/******* scalar accumulator *******/
|
||||
|
||||
float accu;
|
||||
|
||||
void begin () {
|
||||
accu = 0;
|
||||
yi = y;
|
||||
}
|
||||
|
||||
void add_component (float x) {
|
||||
float tmp = *yi++ - x;
|
||||
accu += tmp * tmp;
|
||||
}
|
||||
|
||||
void add_component_2 (float x1, float x2) {
|
||||
float tmp = x1 - x2;
|
||||
accu += tmp * tmp;
|
||||
}
|
||||
|
||||
float result () {
|
||||
return accu;
|
||||
}
|
||||
explicit SimilarityL2_avx (const float * y) : SimilarityL2<1>(y) {}
|
||||
};
|
||||
|
||||
|
||||
#ifdef USE_AVX
|
||||
template<>
|
||||
struct SimilarityL2_avx<8> {
|
||||
static constexpr int simdwidth = 8;
|
||||
|
@ -480,51 +251,18 @@ struct SimilarityL2_avx<8> {
|
|||
}
|
||||
};
|
||||
|
||||
/* as same as SimilarityL2<8>, let build pass */
|
||||
template<>
|
||||
struct SimilarityL2_avx<16> : SimilarityL2_avx<8>{
|
||||
static constexpr int simdwidth = 8;
|
||||
static constexpr MetricType metric_type = METRIC_L2;
|
||||
explicit SimilarityL2_avx (const float * y) : SimilarityL2_avx<8>(y) {}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
template<int SIMDWIDTH>
|
||||
struct SimilarityIP_avx {};
|
||||
|
||||
|
||||
template<>
|
||||
struct SimilarityIP_avx<1> {
|
||||
struct SimilarityIP_avx<1> : public SimilarityIP<1> {
|
||||
static constexpr int simdwidth = 1;
|
||||
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
|
||||
const float *y, *yi;
|
||||
|
||||
float accu;
|
||||
|
||||
explicit SimilarityIP_avx (const float * y):
|
||||
y (y) {}
|
||||
|
||||
void begin () {
|
||||
accu = 0;
|
||||
yi = y;
|
||||
}
|
||||
|
||||
void add_component (float x) {
|
||||
accu += *yi++ * x;
|
||||
}
|
||||
|
||||
void add_component_2 (float x1, float x2) {
|
||||
accu += x1 * x2;
|
||||
}
|
||||
|
||||
float result () {
|
||||
return accu;
|
||||
}
|
||||
explicit SimilarityIP_avx (const float * y) : SimilarityIP<1>(y) {}
|
||||
};
|
||||
|
||||
#ifdef USE_AVX
|
||||
|
||||
template<>
|
||||
struct SimilarityIP_avx<8> {
|
||||
static constexpr int simdwidth = 8;
|
||||
|
@ -534,8 +272,7 @@ struct SimilarityIP_avx<8> {
|
|||
|
||||
float accu;
|
||||
|
||||
explicit SimilarityIP_avx (const float * y):
|
||||
y (y) {}
|
||||
explicit SimilarityIP_avx (const float * y): y (y) {}
|
||||
|
||||
__m256 accu8;
|
||||
|
||||
|
@ -564,15 +301,6 @@ struct SimilarityIP_avx<8> {
|
|||
}
|
||||
};
|
||||
|
||||
/* as same as SimilarityIP<8>, let build pass */
|
||||
template<>
|
||||
struct SimilarityIP_avx<16> : SimilarityIP_avx<8> {
|
||||
static constexpr int simdwidth = 8;
|
||||
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
|
||||
explicit SimilarityIP_avx (const float * y) : SimilarityIP_avx<8>(y) {}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* DistanceComputer: combines a similarity and a quantizer to do
|
||||
|
@ -583,69 +311,19 @@ template<class Quantizer, class Similarity, int SIMDWIDTH>
|
|||
struct DCTemplate_avx : SQDistanceComputer {};
|
||||
|
||||
template<class Quantizer, class Similarity>
|
||||
struct DCTemplate_avx<Quantizer, Similarity, 1> : SQDistanceComputer
|
||||
{
|
||||
using Sim = Similarity;
|
||||
|
||||
Quantizer quant;
|
||||
|
||||
DCTemplate_avx(size_t d, const std::vector<float> &trained):
|
||||
quant(d, trained)
|
||||
{}
|
||||
|
||||
float compute_distance(const float* x, const uint8_t* code) const {
|
||||
Similarity sim(x);
|
||||
sim.begin();
|
||||
for (size_t i = 0; i < quant.d; i++) {
|
||||
float xi = quant.reconstruct_component(code, i);
|
||||
sim.add_component(xi);
|
||||
}
|
||||
return sim.result();
|
||||
}
|
||||
|
||||
float compute_code_distance(const uint8_t* code1, const uint8_t* code2)
|
||||
const {
|
||||
Similarity sim(nullptr);
|
||||
sim.begin();
|
||||
for (size_t i = 0; i < quant.d; i++) {
|
||||
float x1 = quant.reconstruct_component(code1, i);
|
||||
float x2 = quant.reconstruct_component(code2, i);
|
||||
sim.add_component_2(x1, x2);
|
||||
}
|
||||
return sim.result();
|
||||
}
|
||||
|
||||
void set_query (const float *x) final {
|
||||
q = x;
|
||||
}
|
||||
|
||||
/// compute distance of vector i to current query
|
||||
float operator () (idx_t i) final {
|
||||
return compute_distance (q, codes + i * code_size);
|
||||
}
|
||||
|
||||
float symmetric_dis (idx_t i, idx_t j) override {
|
||||
return compute_code_distance (codes + i * code_size,
|
||||
codes + j * code_size);
|
||||
}
|
||||
|
||||
float query_to_code (const uint8_t * code) const {
|
||||
return compute_distance (q, code);
|
||||
}
|
||||
struct DCTemplate_avx<Quantizer, Similarity, 1> : public DCTemplate<Quantizer, Similarity, 1> {
|
||||
DCTemplate_avx(size_t d, const std::vector<float> &trained) :
|
||||
DCTemplate<Quantizer, Similarity, 1>(d, trained) {}
|
||||
};
|
||||
|
||||
#ifdef USE_AVX
|
||||
|
||||
template<class Quantizer, class Similarity>
|
||||
struct DCTemplate_avx<Quantizer, Similarity, 8> : SQDistanceComputer
|
||||
{
|
||||
struct DCTemplate_avx<Quantizer, Similarity, 8> : SQDistanceComputer {
|
||||
using Sim = Similarity;
|
||||
|
||||
Quantizer quant;
|
||||
|
||||
DCTemplate_avx(size_t d, const std::vector<float> &trained):
|
||||
quant(d, trained)
|
||||
{}
|
||||
quant(d, trained) {}
|
||||
|
||||
float compute_distance(const float* x, const uint8_t* code) const {
|
||||
Similarity sim(x);
|
||||
|
@ -688,9 +366,6 @@ struct DCTemplate_avx<Quantizer, Similarity, 8> : SQDistanceComputer
|
|||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* DistanceComputerByte: computes distances in the integer domain
|
||||
|
@ -700,58 +375,11 @@ template<class Similarity, int SIMDWIDTH>
|
|||
struct DistanceComputerByte_avx : SQDistanceComputer {};
|
||||
|
||||
template<class Similarity>
|
||||
struct DistanceComputerByte_avx<Similarity, 1> : SQDistanceComputer {
|
||||
using Sim = Similarity;
|
||||
|
||||
int d;
|
||||
std::vector<uint8_t> tmp;
|
||||
|
||||
DistanceComputerByte_avx(int d, const std::vector<float> &): d(d), tmp(d) {
|
||||
}
|
||||
|
||||
int compute_code_distance(const uint8_t* code1, const uint8_t* code2)
|
||||
const {
|
||||
int accu = 0;
|
||||
for (int i = 0; i < d; i++) {
|
||||
if (Sim::metric_type == METRIC_INNER_PRODUCT) {
|
||||
accu += int(code1[i]) * code2[i];
|
||||
} else {
|
||||
int diff = int(code1[i]) - code2[i];
|
||||
accu += diff * diff;
|
||||
}
|
||||
}
|
||||
return accu;
|
||||
}
|
||||
|
||||
void set_query (const float *x) final {
|
||||
for (int i = 0; i < d; i++) {
|
||||
tmp[i] = int(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int compute_distance(const float* x, const uint8_t* code) {
|
||||
set_query(x);
|
||||
return compute_code_distance(tmp.data(), code);
|
||||
}
|
||||
|
||||
/// compute distance of vector i to current query
|
||||
float operator () (idx_t i) final {
|
||||
return compute_distance (q, codes + i * code_size);
|
||||
}
|
||||
|
||||
float symmetric_dis (idx_t i, idx_t j) override {
|
||||
return compute_code_distance (codes + i * code_size,
|
||||
codes + j * code_size);
|
||||
}
|
||||
|
||||
float query_to_code (const uint8_t * code) const {
|
||||
return compute_code_distance (tmp.data(), code);
|
||||
}
|
||||
struct DistanceComputerByte_avx<Similarity, 1> : public DistanceComputerByte<Similarity, 1> {
|
||||
DistanceComputerByte_avx(int d, const std::vector<float> &unused) :
|
||||
DistanceComputerByte<Similarity, 1>(d, unused) {}
|
||||
};
|
||||
|
||||
#ifdef USE_AVX
|
||||
|
||||
|
||||
template<class Similarity>
|
||||
struct DistanceComputerByte_avx<Similarity, 8> : SQDistanceComputer {
|
||||
using Sim = Similarity;
|
||||
|
@ -759,11 +387,9 @@ struct DistanceComputerByte_avx<Similarity, 8> : SQDistanceComputer {
|
|||
int d;
|
||||
std::vector<uint8_t> tmp;
|
||||
|
||||
DistanceComputerByte_avx(int d, const std::vector<float> &): d(d), tmp(d) {
|
||||
}
|
||||
DistanceComputerByte_avx(int d, const std::vector<float> &): d(d), tmp(d) {}
|
||||
|
||||
int compute_code_distance(const uint8_t* code1, const uint8_t* code2)
|
||||
const {
|
||||
int compute_code_distance(const uint8_t* code1, const uint8_t* code2) const {
|
||||
// __m256i accu = _mm256_setzero_ps ();
|
||||
__m256i accu = _mm256_setzero_si256 ();
|
||||
for (int i = 0; i < d; i += 16) {
|
||||
|
@ -819,14 +445,12 @@ struct DistanceComputerByte_avx<Similarity, 8> : SQDistanceComputer {
|
|||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/*******************************************************************
|
||||
* select_distance_computer: runtime selection of template
|
||||
* specialization
|
||||
*******************************************************************/
|
||||
|
||||
|
||||
template<class Sim>
|
||||
SQDistanceComputer *select_distance_computer_avx (
|
||||
QuantizerType qtype,
|
||||
|
@ -834,41 +458,119 @@ SQDistanceComputer *select_distance_computer_avx (
|
|||
{
|
||||
constexpr int SIMDWIDTH = Sim::simdwidth;
|
||||
switch(qtype) {
|
||||
case QuantizerType::QT_8bit_uniform:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_8bit_uniform:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
|
||||
case QuantizerType::QT_4bit_uniform:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_4bit_uniform:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
|
||||
case QuantizerType::QT_8bit:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_8bit:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
|
||||
case QuantizerType::QT_6bit:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_6bit:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
|
||||
case QuantizerType::QT_4bit:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
case QuantizerType::QT_4bit:
|
||||
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>,
|
||||
Sim, SIMDWIDTH>(d, trained);
|
||||
|
||||
case QuantizerType::QT_fp16:
|
||||
return new DCTemplate_avx
|
||||
<QuantizerFP16_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
|
||||
|
||||
case QuantizerType::QT_8bit_direct:
|
||||
if (d % 16 == 0) {
|
||||
return new DistanceComputerByte_avx<Sim, SIMDWIDTH>(d, trained);
|
||||
} else {
|
||||
case QuantizerType::QT_fp16:
|
||||
return new DCTemplate_avx
|
||||
<Quantizer8bitDirect_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
|
||||
}
|
||||
<QuantizerFP16_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
|
||||
|
||||
case QuantizerType::QT_8bit_direct:
|
||||
if (d % 16 == 0) {
|
||||
return new DistanceComputerByte_avx<Sim, SIMDWIDTH>(d, trained);
|
||||
} else {
|
||||
return new DCTemplate_avx
|
||||
<Quantizer8bitDirect_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
|
||||
}
|
||||
}
|
||||
FAISS_THROW_MSG ("unknown qtype");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template<class DCClass>
|
||||
InvertedListScanner* sel2_InvertedListScanner_avx (
|
||||
const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool r)
|
||||
{
|
||||
return sel2_InvertedListScanner<DCClass> (sq, quantizer, store_pairs, r);
|
||||
}
|
||||
|
||||
template<class Similarity, class Codec, bool uniform>
|
||||
InvertedListScanner* sel12_InvertedListScanner_avx (
|
||||
const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool r)
|
||||
{
|
||||
constexpr int SIMDWIDTH = Similarity::simdwidth;
|
||||
using QuantizerClass = QuantizerTemplate_avx<Codec, uniform, SIMDWIDTH>;
|
||||
using DCClass = DCTemplate_avx<QuantizerClass, Similarity, SIMDWIDTH>;
|
||||
return sel2_InvertedListScanner_avx<DCClass> (sq, quantizer, store_pairs, r);
|
||||
}
|
||||
|
||||
|
||||
template<class Similarity>
|
||||
InvertedListScanner* sel1_InvertedListScanner_avx (
|
||||
const ScalarQuantizer *sq, const Index *quantizer,
|
||||
bool store_pairs, bool r)
|
||||
{
|
||||
constexpr int SIMDWIDTH = Similarity::simdwidth;
|
||||
switch(sq->qtype) {
|
||||
case QuantizerType::QT_8bit_uniform:
|
||||
return sel12_InvertedListScanner_avx
|
||||
<Similarity, Codec8bit_avx, true>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_4bit_uniform:
|
||||
return sel12_InvertedListScanner_avx
|
||||
<Similarity, Codec4bit_avx, true>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_8bit:
|
||||
return sel12_InvertedListScanner_avx
|
||||
<Similarity, Codec8bit_avx, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_4bit:
|
||||
return sel12_InvertedListScanner_avx
|
||||
<Similarity, Codec4bit_avx, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_6bit:
|
||||
return sel12_InvertedListScanner_avx
|
||||
<Similarity, Codec6bit_avx, false>(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_fp16:
|
||||
return sel2_InvertedListScanner_avx
|
||||
<DCTemplate_avx<QuantizerFP16_avx<SIMDWIDTH>, Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
case QuantizerType::QT_8bit_direct:
|
||||
if (sq->d % 16 == 0) {
|
||||
return sel2_InvertedListScanner_avx
|
||||
<DistanceComputerByte_avx<Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
} else {
|
||||
return sel2_InvertedListScanner_avx
|
||||
<DCTemplate_avx<Quantizer8bitDirect_avx<SIMDWIDTH>,
|
||||
Similarity, SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, r);
|
||||
}
|
||||
}
|
||||
|
||||
FAISS_THROW_MSG ("unknown qtype");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template<int SIMDWIDTH>
|
||||
InvertedListScanner* sel0_InvertedListScanner_avx (
|
||||
MetricType mt, const ScalarQuantizer *sq,
|
||||
const Index *quantizer, bool store_pairs, bool by_residual)
|
||||
{
|
||||
if (mt == METRIC_L2) {
|
||||
return sel1_InvertedListScanner_avx<SimilarityL2_avx<SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, by_residual);
|
||||
} else if (mt == METRIC_INNER_PRODUCT) {
|
||||
return sel1_InvertedListScanner_avx<SimilarityIP_avx<SIMDWIDTH> >
|
||||
(sq, quantizer, store_pairs, by_residual);
|
||||
} else {
|
||||
FAISS_THROW_MSG("unsupported metric type");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace faiss
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -18,18 +18,22 @@ namespace faiss {
|
|||
|
||||
/* SSE */
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_L2_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
return select_distance_computer<SimilarityL2<1>> (qtype, dim, trained);
|
||||
}
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_IP_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
return select_distance_computer<SimilarityIP<1>> (qtype, dim, trained);
|
||||
sq_get_distance_computer_ref (MetricType metric, QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (metric == METRIC_L2) {
|
||||
return select_distance_computer<SimilarityL2<1>>(qtype, dim, trained);
|
||||
} else {
|
||||
return select_distance_computer<SimilarityIP<1>>(qtype, dim, trained);
|
||||
}
|
||||
}
|
||||
|
||||
Quantizer *
|
||||
sq_select_quantizer_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
sq_select_quantizer_ref (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
return select_quantizer_1<1> (qtype, dim, trained);
|
||||
}
|
||||
|
||||
InvertedListScanner*
|
||||
sq_select_inverted_list_scanner_ref (MetricType mt, const ScalarQuantizer *sq, const Index *quantizer, size_t dim, bool store_pairs, bool by_residual) {
|
||||
return sel0_InvertedListScanner<1> (mt, sq, quantizer, store_pairs, by_residual);
|
||||
}
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -9,17 +9,33 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <faiss/impl/ScalarQuantizer.h>
|
||||
#include <faiss/impl/ScalarQuantizerOp.h>
|
||||
#include <faiss/MetricType.h>
|
||||
|
||||
namespace faiss {
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_L2_sse(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_IP_sse(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
sq_get_distance_computer_ref(
|
||||
MetricType metric,
|
||||
QuantizerType qtype,
|
||||
size_t dim,
|
||||
const std::vector<float>& trained);
|
||||
|
||||
Quantizer *
|
||||
sq_select_quantizer_sse(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
sq_select_quantizer_ref(
|
||||
QuantizerType qtype,
|
||||
size_t dim,
|
||||
const std::vector<float>& trained);
|
||||
|
||||
InvertedListScanner*
|
||||
sq_select_inverted_list_scanner_ref(
|
||||
MetricType mt,
|
||||
const ScalarQuantizer *sq,
|
||||
const Index *quantizer,
|
||||
size_t dim,
|
||||
bool store_pairs,
|
||||
bool by_residual);
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -17,25 +17,24 @@ namespace faiss {
|
|||
********************************************************************/
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_L2_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx<SimilarityL2_avx<8>>(qtype, dim, trained);
|
||||
sq_get_distance_computer_avx (MetricType metric, QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (metric == METRIC_L2) {
|
||||
if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx<SimilarityL2_avx<8>>(qtype, dim, trained);
|
||||
} else {
|
||||
return select_distance_computer_avx<SimilarityL2_avx<1>>(qtype, dim, trained);
|
||||
}
|
||||
} else {
|
||||
return select_distance_computer_avx<SimilarityL2_avx<1>>(qtype, dim, trained);
|
||||
}
|
||||
}
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_IP_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx<SimilarityIP_avx<8>>(qtype, dim, trained);
|
||||
} else {
|
||||
return select_distance_computer_avx<SimilarityIP_avx<1>>(qtype, dim, trained);
|
||||
if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx<SimilarityIP_avx<8>>(qtype, dim, trained);
|
||||
} else {
|
||||
return select_distance_computer_avx<SimilarityIP_avx<1>>(qtype, dim, trained);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Quantizer *
|
||||
sq_select_quantizer_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
sq_select_quantizer_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (dim % 8 == 0) {
|
||||
return select_quantizer_1_avx<8>(qtype, dim, trained);
|
||||
} else {
|
||||
|
@ -43,4 +42,13 @@ sq_select_quantizer_avx(QuantizerType qtype, size_t dim, const std::vector<float
|
|||
}
|
||||
}
|
||||
|
||||
InvertedListScanner*
|
||||
sq_select_inverted_list_scanner_avx (MetricType mt, const ScalarQuantizer *sq, const Index *quantizer, size_t dim, bool store_pairs, bool by_residual) {
|
||||
if (dim % 8 == 0) {
|
||||
return sel0_InvertedListScanner_avx<8> (mt, sq, quantizer, store_pairs, by_residual);
|
||||
} else {
|
||||
return sel0_InvertedListScanner_avx<1> (mt, sq, quantizer, store_pairs, by_residual);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -10,18 +10,32 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <faiss/impl/ScalarQuantizer.h>
|
||||
#include <faiss/impl/ScalarQuantizerOp.h>
|
||||
#include <faiss/MetricType.h>
|
||||
|
||||
namespace faiss {
|
||||
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_L2_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_IP_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
sq_get_distance_computer_avx(
|
||||
MetricType metric,
|
||||
QuantizerType qtype,
|
||||
size_t dim,
|
||||
const std::vector<float>& trained);
|
||||
|
||||
Quantizer *
|
||||
sq_select_quantizer_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
sq_select_quantizer_avx(
|
||||
QuantizerType qtype,
|
||||
size_t dim,
|
||||
const std::vector<float>& trained);
|
||||
|
||||
InvertedListScanner*
|
||||
sq_select_inverted_list_scanner_avx(
|
||||
MetricType mt,
|
||||
const ScalarQuantizer *sq,
|
||||
const Index *quantizer,
|
||||
size_t dim,
|
||||
bool store_pairs,
|
||||
bool by_residual);
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -17,24 +17,23 @@ namespace faiss {
|
|||
********************************************************************/
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_L2_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (dim % 16 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<16>> (qtype, dim, trained);
|
||||
} else if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<8>> (qtype, dim, trained);
|
||||
sq_get_distance_computer_avx512 (MetricType metric, QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (metric == METRIC_L2) {
|
||||
if (dim % 16 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<16>>(qtype, dim, trained);
|
||||
} else if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<8>>(qtype, dim, trained);
|
||||
} else {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<1>>(qtype, dim, trained);
|
||||
}
|
||||
} else {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<1>> (qtype, dim, trained);
|
||||
}
|
||||
}
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_IP_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
|
||||
if (dim % 16 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<16>> (qtype, dim, trained);
|
||||
} else if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityIP_avx512<8>> (qtype, dim, trained);
|
||||
} else {
|
||||
return select_distance_computer_avx512<SimilarityIP_avx512<1>> (qtype, dim, trained);
|
||||
if (dim % 16 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityL2_avx512<16>>(qtype, dim, trained);
|
||||
} else if (dim % 8 == 0) {
|
||||
return select_distance_computer_avx512<SimilarityIP_avx512<8>>(qtype, dim, trained);
|
||||
} else {
|
||||
return select_distance_computer_avx512<SimilarityIP_avx512<1>>(qtype, dim, trained);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,5 +48,15 @@ sq_select_quantizer_avx512 (QuantizerType qtype, size_t dim, const std::vector<f
|
|||
}
|
||||
}
|
||||
|
||||
InvertedListScanner*
|
||||
sq_select_inverted_list_scanner_avx512 (MetricType mt, const ScalarQuantizer *sq, const Index *quantizer, size_t dim, bool store_pairs, bool by_residual) {
|
||||
if (dim % 16 == 0) {
|
||||
return sel0_InvertedListScanner_avx512<16> (mt, sq, quantizer, store_pairs, by_residual);
|
||||
} else if (dim % 8 == 0) {
|
||||
return sel0_InvertedListScanner_avx512<8> (mt, sq, quantizer, store_pairs, by_residual);
|
||||
} else {
|
||||
return sel0_InvertedListScanner_avx512<1> (mt, sq, quantizer, store_pairs, by_residual);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace faiss
|
||||
|
|
|
@ -10,17 +10,32 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <faiss/impl/ScalarQuantizer.h>
|
||||
#include <faiss/impl/ScalarQuantizerOp.h>
|
||||
#include <faiss/MetricType.h>
|
||||
|
||||
namespace faiss {
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_L2_avx512(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
|
||||
SQDistanceComputer *
|
||||
sq_get_distance_computer_IP_avx512(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
sq_get_distance_computer_avx512(
|
||||
MetricType metric,
|
||||
QuantizerType qtype,
|
||||
size_t dim,
|
||||
const std::vector<float>& trained);
|
||||
|
||||
Quantizer *
|
||||
sq_select_quantizer_avx512(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
|
||||
sq_select_quantizer_avx512(
|
||||
QuantizerType qtype,
|
||||
size_t dim,
|
||||
const std::vector<float>& trained);
|
||||
|
||||
InvertedListScanner*
|
||||
sq_select_inverted_list_scanner_avx512(
|
||||
MetricType mt,
|
||||
const ScalarQuantizer *sq,
|
||||
const Index *quantizer,
|
||||
size_t dim,
|
||||
bool store_pairs,
|
||||
bool by_residual);
|
||||
|
||||
} // namespace faiss
|
||||
|
|
Loading…
Reference in New Issue