fix performance (#2499)

* optimize sq_get_distance_computer

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* add sq_select_inverted_list_scanner_ref

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* add sq_select_inverted_list_scanner_avx

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* add sq_select_inverted_list_scanner_avx512

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* optimize Codec

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* optimize ScalarQuantizerCodec_avx.h

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* code format

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* optimize ScalarQuantizerCodec_avx512.h

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* update changelog

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

* clean code

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
pull/2501/head
Cai Yudong 2020-06-06 15:41:57 +08:00 committed by GitHub
parent 9f7995cbba
commit 8254e9ed67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 741 additions and 1551 deletions

View File

@ -14,6 +14,7 @@ Please mark all change in change log and use the issue from GitHub
## Improvement
- \#2381 Upgrade FAISS to 1.6.3
- \#2429 Fix Milvus 0.9.1 performance degrade issue
- \#2441 Improve Knowhere code coverage
- \#2466 optimize k-selection implemention of faiss gpu version
- \#2495 Add creating lock file failure reason.

View File

@ -26,10 +26,9 @@ fvec_func_ptr fvec_L2sqr = fvec_L2sqr_avx;
fvec_func_ptr fvec_L1 = fvec_L1_avx;
fvec_func_ptr fvec_Linf = fvec_Linf_avx;
sq_get_func_ptr sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx;
sq_get_func_ptr sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx;
sq_sel_func_ptr sq_sel_quantizer = sq_select_quantizer_avx;
sq_get_distance_computer_func_ptr sq_get_distance_computer = sq_get_distance_computer_avx;
sq_sel_quantizer_func_ptr sq_sel_quantizer = sq_select_quantizer_avx;
sq_sel_inv_list_scanner_func_ptr sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_avx;
/*****************************************************************************/
@ -68,9 +67,9 @@ bool hook_init(std::string& cpu_flag) {
fvec_Linf = fvec_Linf_avx512;
/* for IVFSQ */
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx512;
sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx512;
sq_get_distance_computer = sq_get_distance_computer_avx512;
sq_sel_quantizer = sq_select_quantizer_avx512;
sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_avx512;
cpu_flag = "AVX512";
} else if (support_avx2()) {
@ -81,9 +80,9 @@ bool hook_init(std::string& cpu_flag) {
fvec_Linf = fvec_Linf_avx;
/* for IVFSQ */
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx;
sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx;
sq_get_distance_computer = sq_get_distance_computer_avx;
sq_sel_quantizer = sq_select_quantizer_avx;
sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_avx;
cpu_flag = "AVX2";
} else if (support_sse()) {
@ -94,9 +93,9 @@ bool hook_init(std::string& cpu_flag) {
fvec_Linf = fvec_Linf_sse;
/* for IVFSQ */
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_sse;
sq_get_distance_computer_IP = sq_get_distance_computer_IP_sse;
sq_sel_quantizer = sq_select_quantizer_sse;
sq_get_distance_computer = sq_get_distance_computer_ref;
sq_sel_quantizer = sq_select_quantizer_ref;
sq_sel_inv_list_scanner = sq_select_inverted_list_scanner_ref;
cpu_flag = "SSE42";
} else {

View File

@ -6,15 +6,17 @@
#include <vector>
#include <stddef.h>
#include <string>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
#include <faiss/MetricType.h>
namespace faiss {
typedef float (*fvec_func_ptr)(const float*, const float*, size_t);
typedef SQDistanceComputer* (*sq_get_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
typedef Quantizer* (*sq_sel_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
typedef SQDistanceComputer* (*sq_get_distance_computer_func_ptr)(MetricType, QuantizerType, size_t, const std::vector<float>&);
typedef Quantizer* (*sq_sel_quantizer_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
typedef InvertedListScanner* (*sq_sel_inv_list_scanner_func_ptr)(MetricType, const ScalarQuantizer*, const Index*, size_t, bool, bool);
extern bool faiss_use_avx512;
extern bool faiss_use_avx2;
@ -25,9 +27,9 @@ extern fvec_func_ptr fvec_L2sqr;
extern fvec_func_ptr fvec_L1;
extern fvec_func_ptr fvec_Linf;
extern sq_get_func_ptr sq_get_distance_computer_L2;
extern sq_get_func_ptr sq_get_distance_computer_IP;
extern sq_sel_func_ptr sq_sel_quantizer;
extern sq_get_distance_computer_func_ptr sq_get_distance_computer;
extern sq_sel_quantizer_func_ptr sq_sel_quantizer;
extern sq_sel_inv_list_scanner_func_ptr sq_sel_inv_list_scanner;
extern bool support_avx512();
extern bool support_avx2();

View File

@ -16,7 +16,6 @@
#include <faiss/FaissHook.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/ScalarQuantizerCodec.h>
namespace faiss {
@ -160,11 +159,7 @@ ScalarQuantizer::get_distance_computer (MetricType metric) const
{
FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
/* use hook to decide use AVX512 or not */
if (metric == METRIC_L2) {
return sq_get_distance_computer_L2(qtype, d, trained);
} else {
return sq_get_distance_computer_IP(qtype, d, trained);
}
return sq_get_distance_computer(metric, qtype, d, trained);
}
@ -175,276 +170,13 @@ ScalarQuantizer::get_distance_computer (MetricType metric) const
* IndexScalarQuantizer as well.
********************************************************************/
namespace {
template<class DCClass>
struct IVFSQScannerIP: InvertedListScanner {
DCClass dc;
bool store_pairs, by_residual;
size_t code_size;
idx_t list_no; /// current list (set to 0 for Flat index
float accu0; /// added to all distances
IVFSQScannerIP(int d, const std::vector<float> & trained,
size_t code_size, bool store_pairs,
bool by_residual):
dc(d, trained), store_pairs(store_pairs),
by_residual(by_residual),
code_size(code_size), list_no(0), accu0(0)
{}
void set_query (const float *query) override {
dc.set_query (query);
}
void set_list (idx_t list_no, float coarse_dis) override {
this->list_no = list_no;
accu0 = by_residual ? coarse_dis : 0;
}
float distance_to_code (const uint8_t *code) const final {
return accu0 + dc.query_to_code (code);
}
size_t scan_codes (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float *simi, idx_t *idxi,
size_t k,
ConcurrentBitsetPtr bitset) const override
{
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
if(!bitset || !bitset->test(ids[j])){
float accu = accu0 + dc.query_to_code (codes);
if (accu > simi [0]) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
minheap_swap_top (k, simi, idxi, accu, id);
nup++;
}
}
codes += code_size;
}
return nup;
}
void scan_codes_range (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult & res,
ConcurrentBitsetPtr bitset = nullptr) const override
{
for (size_t j = 0; j < list_size; j++) {
float accu = accu0 + dc.query_to_code (codes);
if (accu > radius) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
res.add (accu, id);
}
codes += code_size;
}
}
};
template<class DCClass>
struct IVFSQScannerL2: InvertedListScanner {
DCClass dc;
bool store_pairs, by_residual;
size_t code_size;
const Index *quantizer;
idx_t list_no; /// current inverted list
const float *x; /// current query
std::vector<float> tmp;
IVFSQScannerL2(int d, const std::vector<float> & trained,
size_t code_size, const Index *quantizer,
bool store_pairs, bool by_residual):
dc(d, trained), store_pairs(store_pairs), by_residual(by_residual),
code_size(code_size), quantizer(quantizer),
list_no (0), x (nullptr), tmp (d)
{
}
void set_query (const float *query) override {
x = query;
if (!quantizer) {
dc.set_query (query);
}
}
void set_list (idx_t list_no, float /*coarse_dis*/) override {
if (by_residual) {
this->list_no = list_no;
// shift of x_in wrt centroid
quantizer->Index::compute_residual (x, tmp.data(), list_no);
dc.set_query (tmp.data ());
} else {
dc.set_query (x);
}
}
float distance_to_code (const uint8_t *code) const final {
return dc.query_to_code (code);
}
size_t scan_codes (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float *simi, idx_t *idxi,
size_t k,
ConcurrentBitsetPtr bitset) const override
{
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
if(!bitset || !bitset->test(ids[j])){
float dis = dc.query_to_code (codes);
if (dis < simi [0]) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
maxheap_swap_top (k, simi, idxi, dis, id);
nup++;
}
}
codes += code_size;
}
return nup;
}
void scan_codes_range (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult & res,
ConcurrentBitsetPtr bitset = nullptr) const override
{
for (size_t j = 0; j < list_size; j++) {
float dis = dc.query_to_code (codes);
if (dis < radius) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
res.add (dis, id);
}
codes += code_size;
}
}
};
template<class DCClass>
InvertedListScanner* sel2_InvertedListScanner
(const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool r)
{
if (DCClass::Sim::metric_type == METRIC_L2) {
return new IVFSQScannerL2<DCClass>(sq->d, sq->trained, sq->code_size,
quantizer, store_pairs, r);
} else if (DCClass::Sim::metric_type == METRIC_INNER_PRODUCT) {
return new IVFSQScannerIP<DCClass>(sq->d, sq->trained, sq->code_size,
store_pairs, r);
} else {
FAISS_THROW_MSG("unsupported metric type");
}
}
template<class Similarity, class Codec, bool uniform>
InvertedListScanner* sel12_InvertedListScanner
(const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool r)
{
constexpr int SIMDWIDTH = Similarity::simdwidth;
using QuantizerClass = QuantizerTemplate<Codec, uniform, SIMDWIDTH>;
using DCClass = DCTemplate<QuantizerClass, Similarity, SIMDWIDTH>;
return sel2_InvertedListScanner<DCClass> (sq, quantizer, store_pairs, r);
}
template<class Similarity>
InvertedListScanner* sel1_InvertedListScanner
(const ScalarQuantizer *sq, const Index *quantizer,
bool store_pairs, bool r)
{
constexpr int SIMDWIDTH = Similarity::simdwidth;
switch(sq->qtype) {
case QuantizerType::QT_8bit_uniform:
return sel12_InvertedListScanner
<Similarity, Codec8bit, true>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_4bit_uniform:
return sel12_InvertedListScanner
<Similarity, Codec4bit, true>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_8bit:
return sel12_InvertedListScanner
<Similarity, Codec8bit, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_4bit:
return sel12_InvertedListScanner
<Similarity, Codec4bit, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_6bit:
return sel12_InvertedListScanner
<Similarity, Codec6bit, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_fp16:
return sel2_InvertedListScanner
<DCTemplate<QuantizerFP16<SIMDWIDTH>, Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
case QuantizerType::QT_8bit_direct:
if (sq->d % 16 == 0) {
return sel2_InvertedListScanner
<DistanceComputerByte<Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
} else {
return sel2_InvertedListScanner
<DCTemplate<Quantizer8bitDirect<SIMDWIDTH>,
Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
}
}
FAISS_THROW_MSG ("unknown qtype");
return nullptr;
}
template<int SIMDWIDTH>
InvertedListScanner* sel0_InvertedListScanner
(MetricType mt, const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool by_residual)
{
if (mt == METRIC_L2) {
return sel1_InvertedListScanner<SimilarityL2<SIMDWIDTH> >
(sq, quantizer, store_pairs, by_residual);
} else if (mt == METRIC_INNER_PRODUCT) {
return sel1_InvertedListScanner<SimilarityIP<SIMDWIDTH> >
(sq, quantizer, store_pairs, by_residual);
} else {
FAISS_THROW_MSG("unsupported metric type");
}
}
} // anonymous namespace
InvertedListScanner* ScalarQuantizer::select_InvertedListScanner
(MetricType mt, const Index *quantizer,
bool store_pairs, bool by_residual) const
{
if (d % 16 == 0 && support_avx512()) {
return sel0_InvertedListScanner<16>
(mt, this, quantizer, store_pairs, by_residual);
} if (d % 8 == 0) {
return sel0_InvertedListScanner<8>
(mt, this, quantizer, store_pairs, by_residual);
} else {
return sel0_InvertedListScanner<1>
(mt, this, quantizer, store_pairs, by_residual);
}
/* use hook to decide use AVX512 or not */
return sq_sel_inv_list_scanner(mt, this, quantizer, d, store_pairs, by_residual);
}
} // namespace faiss

View File

@ -77,6 +77,166 @@ struct ScalarQuantizer {
};
template<class DCClass>
struct IVFSQScannerIP: InvertedListScanner {
DCClass dc;
bool store_pairs, by_residual;
size_t code_size;
idx_t list_no; /// current list (set to 0 for Flat index
float accu0; /// added to all distances
IVFSQScannerIP(int d, const std::vector<float> & trained,
size_t code_size, bool store_pairs,
bool by_residual):
dc(d, trained), store_pairs(store_pairs),
by_residual(by_residual),
code_size(code_size), list_no(0), accu0(0)
{}
void set_query (const float *query) override {
dc.set_query (query);
}
void set_list (idx_t list_no, float coarse_dis) override {
this->list_no = list_no;
accu0 = by_residual ? coarse_dis : 0;
}
float distance_to_code (const uint8_t *code) const final {
return accu0 + dc.query_to_code (code);
}
size_t scan_codes (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float *simi, idx_t *idxi,
size_t k,
ConcurrentBitsetPtr bitset) const override
{
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
if(!bitset || !bitset->test(ids[j])){
float accu = accu0 + dc.query_to_code (codes);
if (accu > simi [0]) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
minheap_swap_top (k, simi, idxi, accu, id);
nup++;
}
}
codes += code_size;
}
return nup;
}
void scan_codes_range (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult & res,
ConcurrentBitsetPtr bitset = nullptr) const override
{
for (size_t j = 0; j < list_size; j++) {
float accu = accu0 + dc.query_to_code (codes);
if (accu > radius) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
res.add (accu, id);
}
codes += code_size;
}
}
};
template<class DCClass>
struct IVFSQScannerL2: InvertedListScanner {
DCClass dc;
bool store_pairs, by_residual;
size_t code_size;
const Index *quantizer;
idx_t list_no; /// current inverted list
const float *x; /// current query
std::vector<float> tmp;
IVFSQScannerL2(int d, const std::vector<float> & trained,
size_t code_size, const Index *quantizer,
bool store_pairs, bool by_residual):
dc(d, trained), store_pairs(store_pairs), by_residual(by_residual),
code_size(code_size), quantizer(quantizer),
list_no (0), x (nullptr), tmp (d)
{
}
void set_query (const float *query) override {
x = query;
if (!quantizer) {
dc.set_query (query);
}
}
void set_list (idx_t list_no, float /*coarse_dis*/) override {
if (by_residual) {
this->list_no = list_no;
// shift of x_in wrt centroid
quantizer->Index::compute_residual (x, tmp.data(), list_no);
dc.set_query (tmp.data ());
} else {
dc.set_query (x);
}
}
float distance_to_code (const uint8_t *code) const final {
return dc.query_to_code (code);
}
size_t scan_codes (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float *simi, idx_t *idxi,
size_t k,
ConcurrentBitsetPtr bitset) const override
{
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
if(!bitset || !bitset->test(ids[j])){
float dis = dc.query_to_code (codes);
if (dis < simi [0]) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
maxheap_swap_top (k, simi, idxi, dis, id);
nup++;
}
}
codes += code_size;
}
return nup;
}
void scan_codes_range (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult & res,
ConcurrentBitsetPtr bitset = nullptr) const override
{
for (size_t j = 0; j < list_size; j++) {
float dis = dc.query_to_code (codes);
if (dis < radius) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
res.add (dis, id);
}
codes += code_size;
}
}
};
} // namespace faiss

View File

@ -15,6 +15,7 @@
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
@ -311,22 +312,6 @@ struct SimilarityL2<1> {
}
};
/* as same as SimilarityL2<1>, let build pass */
template<>
struct SimilarityL2<8> : SimilarityL2<1> {
static constexpr int simdwidth = 1;
static constexpr MetricType metric_type = METRIC_L2;
explicit SimilarityL2 (const float * y) : SimilarityL2<1>(y) {}
};
/* as same as SimilarityL2<1>, let build pass */
template<>
struct SimilarityL2<16> : SimilarityL2<1> {
static constexpr int simdwidth = 1;
static constexpr MetricType metric_type = METRIC_L2;
explicit SimilarityL2 (const float * y) : SimilarityL2<1>(y) {}
};
template<int SIMDWIDTH>
struct SimilarityIP {};
@ -360,22 +345,6 @@ struct SimilarityIP<1> {
}
};
/* as same as SimilarityIP<1>, let build pass */
template<>
struct SimilarityIP<8> : SimilarityIP<1> {
static constexpr int simdwidth = 1;
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
explicit SimilarityIP (const float * y) : SimilarityIP<1>(y) {}
};
/* as same as SimilarityIP<1>, let build pass */
template<>
struct SimilarityIP<16> : SimilarityIP<1> {
static constexpr int simdwidth = 1;
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
explicit SimilarityIP (const float * y) : SimilarityIP<1>(y) {}
};
/*******************************************************************
* DistanceComputer: combines a similarity and a quantizer to do
@ -544,5 +513,91 @@ SQDistanceComputer *select_distance_computer (
return nullptr;
}
template<class DCClass>
InvertedListScanner* sel2_InvertedListScanner (
const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool r)
{
if (DCClass::Sim::metric_type == METRIC_L2) {
return new IVFSQScannerL2<DCClass>(sq->d, sq->trained, sq->code_size,
quantizer, store_pairs, r);
} else if (DCClass::Sim::metric_type == METRIC_INNER_PRODUCT) {
return new IVFSQScannerIP<DCClass>(sq->d, sq->trained, sq->code_size,
store_pairs, r);
} else {
FAISS_THROW_MSG("unsupported metric type");
}
}
template<class Similarity, class Codec, bool uniform>
InvertedListScanner* sel12_InvertedListScanner (
const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool r)
{
constexpr int SIMDWIDTH = Similarity::simdwidth;
using QuantizerClass = QuantizerTemplate<Codec, uniform, SIMDWIDTH>;
using DCClass = DCTemplate<QuantizerClass, Similarity, SIMDWIDTH>;
return sel2_InvertedListScanner<DCClass> (sq, quantizer, store_pairs, r);
}
template<class Similarity>
InvertedListScanner* sel1_InvertedListScanner (
const ScalarQuantizer *sq, const Index *quantizer,
bool store_pairs, bool r)
{
constexpr int SIMDWIDTH = Similarity::simdwidth;
switch(sq->qtype) {
case QuantizerType::QT_8bit_uniform:
return sel12_InvertedListScanner
<Similarity, Codec8bit, true>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_4bit_uniform:
return sel12_InvertedListScanner
<Similarity, Codec4bit, true>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_8bit:
return sel12_InvertedListScanner
<Similarity, Codec8bit, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_4bit:
return sel12_InvertedListScanner
<Similarity, Codec4bit, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_6bit:
return sel12_InvertedListScanner
<Similarity, Codec6bit, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_fp16:
return sel2_InvertedListScanner
<DCTemplate<QuantizerFP16<SIMDWIDTH>, Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
case QuantizerType::QT_8bit_direct:
if (sq->d % 16 == 0) {
return sel2_InvertedListScanner
<DistanceComputerByte<Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
} else {
return sel2_InvertedListScanner
<DCTemplate<Quantizer8bitDirect<SIMDWIDTH>,
Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
}
}
FAISS_THROW_MSG ("unknown qtype");
return nullptr;
}
template<int SIMDWIDTH>
InvertedListScanner* sel0_InvertedListScanner (
MetricType mt, const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool by_residual)
{
if (mt == METRIC_L2) {
return sel1_InvertedListScanner<SimilarityL2<SIMDWIDTH> >
(sq, quantizer, store_pairs, by_residual);
} else if (mt == METRIC_INNER_PRODUCT) {
return sel1_InvertedListScanner<SimilarityIP<SIMDWIDTH> >
(sq, quantizer, store_pairs, by_residual);
} else {
FAISS_THROW_MSG("unsupported metric type");
}
}
} // namespace faiss

View File

@ -11,41 +11,24 @@
#include <cstdio>
#include <algorithm>
#include <omp.h>
#ifdef __SSE__
#include <immintrin.h>
#endif
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerCodec.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
#ifdef __AVX__
#define USE_AVX
#endif
/*******************************************************************
* Codec: converts between values in [0, 1] and an index in a code
* array. The "i" parameter is the vector component index (not byte
* index).
*/
struct Codec8bit_avx {
static void encode_component (float x, uint8_t *code, int i) {
code[i] = (int)(255 * x);
}
static float decode_component (const uint8_t *code, int i) {
return (code[i] + 0.5f) / 255.0f;
}
#ifdef USE_AVX
struct Codec8bit_avx : public Codec8bit {
static __m256 decode_8_components (const uint8_t *code, int i) {
uint64_t c8 = *(uint64_t*)(code + i);
__m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8));
@ -59,20 +42,9 @@ struct Codec8bit_avx {
__m256 one_255 = _mm256_set1_ps (1.f / 255.f);
return f8 * one_255;
}
#endif
};
struct Codec4bit_avx {
static void encode_component (float x, uint8_t *code, int i) {
code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
}
static float decode_component (const uint8_t *code, int i) {
return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
}
#ifdef USE_AVX
struct Codec4bit_avx : public Codec4bit {
static __m256 decode_8_components (const uint8_t *code, int i) {
uint32_t c4 = *(uint32_t*)(code + (i >> 1));
uint32_t mask = 0x0f0f0f0f;
@ -92,54 +64,9 @@ struct Codec4bit_avx {
__m256 one_255 = _mm256_set1_ps (1.f / 15.f);
return f8 * one_255;
}
#endif
};
struct Codec6bit_avx {
static void encode_component (float x, uint8_t *code, int i) {
int bits = (int)(x * 63.0);
code += (i >> 2) * 3;
switch(i & 3) {
case 0:
code[0] |= bits;
break;
case 1:
code[0] |= bits << 6;
code[1] |= bits >> 2;
break;
case 2:
code[1] |= bits << 4;
code[2] |= bits >> 4;
break;
case 3:
code[2] |= bits << 2;
break;
}
}
static float decode_component (const uint8_t *code, int i) {
uint8_t bits;
code += (i >> 2) * 3;
switch(i & 3) {
case 0:
bits = code[0] & 0x3f;
break;
case 1:
bits = code[0] >> 6;
bits |= (code[1] & 0xf) << 2;
break;
case 2:
bits = code[1] >> 4;
bits |= (code[2] & 3) << 4;
break;
case 3:
bits = code[2] >> 2;
break;
}
return (bits + 0.5f) / 63.0f;
}
#ifdef USE_AVX
struct Codec6bit_avx : public Codec6bit {
static __m256 decode_8_components (const uint8_t *code, int i) {
return _mm256_set_ps
(decode_component(code, i + 7),
@ -151,127 +78,51 @@ struct Codec6bit_avx {
decode_component(code, i + 1),
decode_component(code, i + 0));
}
#endif
};
/*******************************************************************
* Quantizer: normalizes scalar vector components, then passes them
* through a codec
*******************************************************************/
template<class Codec, bool uniform, int SIMD>
struct QuantizerTemplate_avx {};
template<class Codec>
struct QuantizerTemplate_avx<Codec, true, 1>: Quantizer {
const size_t d;
const float vmin, vdiff;
QuantizerTemplate_avx(size_t d, const std::vector<float> &trained):
d(d), vmin(trained[0]), vdiff(trained[1])
{
}
void encode_vector(const float* x, uint8_t* code) const final {
for (size_t i = 0; i < d; i++) {
float xi = (x[i] - vmin) / vdiff;
if (xi < 0) {
xi = 0;
}
if (xi > 1.0) {
xi = 1.0;
}
Codec::encode_component(xi, code, i);
}
}
void decode_vector(const uint8_t* code, float* x) const final {
for (size_t i = 0; i < d; i++) {
float xi = Codec::decode_component(code, i);
x[i] = vmin + xi * vdiff;
}
}
float reconstruct_component (const uint8_t * code, int i) const
{
float xi = Codec::decode_component (code, i);
return vmin + xi * vdiff;
}
struct QuantizerTemplate_avx<Codec, true, 1> : public QuantizerTemplate<Codec, true, 1> {
QuantizerTemplate_avx(size_t d, const std::vector<float> &trained) :
QuantizerTemplate<Codec, true, 1> (d, trained) {}
};
#ifdef USE_AVX
template<class Codec>
struct QuantizerTemplate_avx<Codec, true, 8>: QuantizerTemplate_avx<Codec, true, 1> {
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained):
QuantizerTemplate_avx<Codec, true, 1> (d, trained) {}
struct QuantizerTemplate_avx<Codec, true, 8> : public QuantizerTemplate<Codec, true, 1> {
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained) :
QuantizerTemplate<Codec, true, 1> (d, trained) {}
__m256 reconstruct_8_components (const uint8_t * code, int i) const
{
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
__m256 xi = Codec::decode_8_components (code, i);
return _mm256_set1_ps(this->vmin) + xi * _mm256_set1_ps (this->vdiff);
}
};
#endif
template<class Codec>
struct QuantizerTemplate_avx<Codec, false, 1>: Quantizer {
const size_t d;
const float *vmin, *vdiff;
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained):
d(d), vmin(trained.data()), vdiff(trained.data() + d) {}
void encode_vector(const float* x, uint8_t* code) const final {
for (size_t i = 0; i < d; i++) {
float xi = (x[i] - vmin[i]) / vdiff[i];
if (xi < 0)
xi = 0;
if (xi > 1.0)
xi = 1.0;
Codec::encode_component(xi, code, i);
}
}
void decode_vector(const uint8_t* code, float* x) const final {
for (size_t i = 0; i < d; i++) {
float xi = Codec::decode_component(code, i);
x[i] = vmin[i] + xi * vdiff[i];
}
}
float reconstruct_component (const uint8_t * code, int i) const
{
float xi = Codec::decode_component (code, i);
return vmin[i] + xi * vdiff[i];
}
struct QuantizerTemplate_avx<Codec, false, 1> : public QuantizerTemplate<Codec, false, 1> {
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained) :
QuantizerTemplate<Codec, false, 1> (d, trained) {}
};
#ifdef USE_AVX
template<class Codec>
struct QuantizerTemplate_avx<Codec, false, 8>: QuantizerTemplate_avx<Codec, false, 1> {
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained):
QuantizerTemplate_avx<Codec, false, 1> (d, trained) {}
struct QuantizerTemplate_avx<Codec, false, 8>: public QuantizerTemplate<Codec, false, 1> {
QuantizerTemplate_avx (size_t d, const std::vector<float> &trained) :
QuantizerTemplate<Codec, false, 1> (d, trained) {}
__m256 reconstruct_8_components (const uint8_t * code, int i) const
{
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
__m256 xi = Codec::decode_8_components (code, i);
return _mm256_loadu_ps (this->vmin + i) + xi * _mm256_loadu_ps (this->vdiff + i);
}
};
#endif
/*******************************************************************
* FP16 quantizer
@ -281,45 +132,22 @@ template<int SIMDWIDTH>
struct QuantizerFP16_avx {};
template<>
struct QuantizerFP16_avx<1>: Quantizer {
const size_t d;
QuantizerFP16_avx(size_t d, const std::vector<float> & /* unused */):
d(d) {}
void encode_vector(const float* x, uint8_t* code) const final {
for (size_t i = 0; i < d; i++) {
((uint16_t*)code)[i] = encode_fp16(x[i]);
}
}
void decode_vector(const uint8_t* code, float* x) const final {
for (size_t i = 0; i < d; i++) {
x[i] = decode_fp16(((uint16_t*)code)[i]);
}
}
float reconstruct_component (const uint8_t * code, int i) const
{
return decode_fp16(((uint16_t*)code)[i]);
}
struct QuantizerFP16_avx<1> : public QuantizerFP16<1> {
QuantizerFP16_avx (size_t d, const std::vector<float> &unused) :
QuantizerFP16<1> (d, unused) {}
};
#ifdef USE_AVX
template<>
struct QuantizerFP16_avx<8>: QuantizerFP16_avx<1> {
struct QuantizerFP16_avx<8>: public QuantizerFP16<1> {
QuantizerFP16_avx (size_t d, const std::vector<float> &trained):
QuantizerFP16_avx<1> (d, trained) {}
QuantizerFP16<1> (d, trained) {}
__m256 reconstruct_8_components (const uint8_t * code, int i) const
{
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
__m128i codei = _mm_loadu_si128 ((const __m128i*)(code + 2 * i));
return _mm256_cvtph_ps (codei);
}
};
#endif
/*******************************************************************
* 8bit_direct quantizer
@ -329,75 +157,46 @@ template<int SIMDWIDTH>
struct Quantizer8bitDirect_avx {};
template<>
struct Quantizer8bitDirect_avx<1>: Quantizer {
const size_t d;
Quantizer8bitDirect_avx(size_t d, const std::vector<float> & /* unused */):
d(d) {}
void encode_vector(const float* x, uint8_t* code) const final {
for (size_t i = 0; i < d; i++) {
code[i] = (uint8_t)x[i];
}
}
void decode_vector(const uint8_t* code, float* x) const final {
for (size_t i = 0; i < d; i++) {
x[i] = code[i];
}
}
float reconstruct_component (const uint8_t * code, int i) const
{
return code[i];
}
struct Quantizer8bitDirect_avx<1> : public Quantizer8bitDirect<1> {
Quantizer8bitDirect_avx (size_t d, const std::vector<float> &unused) :
Quantizer8bitDirect(d, unused) {}
};
#ifdef USE_AVX
template<>
struct Quantizer8bitDirect_avx<8>: Quantizer8bitDirect_avx<1> {
Quantizer8bitDirect_avx (size_t d, const std::vector<float> &trained):
Quantizer8bitDirect_avx<1> (d, trained) {}
struct Quantizer8bitDirect_avx<8>: public Quantizer8bitDirect<1> {
Quantizer8bitDirect_avx (size_t d, const std::vector<float> &trained) :
Quantizer8bitDirect<1> (d, trained) {}
__m256 reconstruct_8_components (const uint8_t * code, int i) const
{
__m256 reconstruct_8_components (const uint8_t * code, int i) const {
__m128i x8 = _mm_loadl_epi64((__m128i*)(code + i)); // 8 * int8
__m256i y8 = _mm256_cvtepu8_epi32 (x8); // 8 * int32
return _mm256_cvtepi32_ps (y8); // 8 * float32
}
};
#endif
template<int SIMDWIDTH>
Quantizer *select_quantizer_1_avx (
QuantizerType qtype,
size_t d, const std::vector<float> & trained)
{
Quantizer *select_quantizer_1_avx (QuantizerType qtype, size_t d,
const std::vector<float> & trained) {
switch(qtype) {
case QuantizerType::QT_8bit:
return new QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>(d, trained);
case QuantizerType::QT_6bit:
return new QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit:
return new QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit_uniform:
return new QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit_uniform:
return new QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>(d, trained);
case QuantizerType::QT_fp16:
return new QuantizerFP16_avx<SIMDWIDTH> (d, trained);
case QuantizerType::QT_8bit_direct:
return new Quantizer8bitDirect_avx<SIMDWIDTH> (d, trained);
case QuantizerType::QT_8bit:
return new QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>(d, trained);
case QuantizerType::QT_6bit:
return new QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit:
return new QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit_uniform:
return new QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit_uniform:
return new QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>(d, trained);
case QuantizerType::QT_fp16:
return new QuantizerFP16_avx<SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit_direct:
return new Quantizer8bitDirect_avx<SIMDWIDTH>(d, trained);
}
FAISS_THROW_MSG ("unknown qtype");
}
/*******************************************************************
* Similarity: gets vector components and computes a similarity wrt. a
* query vector stored in the object. The data fields just encapsulate
@ -407,42 +206,14 @@ Quantizer *select_quantizer_1_avx (
template<int SIMDWIDTH>
struct SimilarityL2_avx {};
template<>
struct SimilarityL2_avx<1> {
struct SimilarityL2_avx<1> : public SimilarityL2<1> {
static constexpr int simdwidth = 1;
static constexpr MetricType metric_type = METRIC_L2;
const float *y, *yi;
explicit SimilarityL2_avx (const float * y): y(y) {}
/******* scalar accumulator *******/
float accu;
void begin () {
accu = 0;
yi = y;
}
void add_component (float x) {
float tmp = *yi++ - x;
accu += tmp * tmp;
}
void add_component_2 (float x1, float x2) {
float tmp = x1 - x2;
accu += tmp * tmp;
}
float result () {
return accu;
}
explicit SimilarityL2_avx (const float * y) : SimilarityL2<1>(y) {}
};
#ifdef USE_AVX
template<>
struct SimilarityL2_avx<8> {
static constexpr int simdwidth = 8;
@ -480,51 +251,18 @@ struct SimilarityL2_avx<8> {
}
};
/* as same as SimilarityL2<8>, let build pass */
template<>
struct SimilarityL2_avx<16> : SimilarityL2_avx<8>{
static constexpr int simdwidth = 8;
static constexpr MetricType metric_type = METRIC_L2;
explicit SimilarityL2_avx (const float * y) : SimilarityL2_avx<8>(y) {}
};
#endif
template<int SIMDWIDTH>
struct SimilarityIP_avx {};
template<>
struct SimilarityIP_avx<1> {
struct SimilarityIP_avx<1> : public SimilarityIP<1> {
static constexpr int simdwidth = 1;
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
const float *y, *yi;
float accu;
explicit SimilarityIP_avx (const float * y):
y (y) {}
void begin () {
accu = 0;
yi = y;
}
void add_component (float x) {
accu += *yi++ * x;
}
void add_component_2 (float x1, float x2) {
accu += x1 * x2;
}
float result () {
return accu;
}
explicit SimilarityIP_avx (const float * y) : SimilarityIP<1>(y) {}
};
#ifdef USE_AVX
template<>
struct SimilarityIP_avx<8> {
static constexpr int simdwidth = 8;
@ -534,8 +272,7 @@ struct SimilarityIP_avx<8> {
float accu;
explicit SimilarityIP_avx (const float * y):
y (y) {}
explicit SimilarityIP_avx (const float * y): y (y) {}
__m256 accu8;
@ -564,15 +301,6 @@ struct SimilarityIP_avx<8> {
}
};
/* as same as SimilarityIP<8>, let build pass */
template<>
struct SimilarityIP_avx<16> : SimilarityIP_avx<8> {
static constexpr int simdwidth = 8;
static constexpr MetricType metric_type = METRIC_INNER_PRODUCT;
explicit SimilarityIP_avx (const float * y) : SimilarityIP_avx<8>(y) {}
};
#endif
/*******************************************************************
* DistanceComputer: combines a similarity and a quantizer to do
@ -583,69 +311,19 @@ template<class Quantizer, class Similarity, int SIMDWIDTH>
struct DCTemplate_avx : SQDistanceComputer {};
template<class Quantizer, class Similarity>
struct DCTemplate_avx<Quantizer, Similarity, 1> : SQDistanceComputer
{
using Sim = Similarity;
Quantizer quant;
DCTemplate_avx(size_t d, const std::vector<float> &trained):
quant(d, trained)
{}
float compute_distance(const float* x, const uint8_t* code) const {
Similarity sim(x);
sim.begin();
for (size_t i = 0; i < quant.d; i++) {
float xi = quant.reconstruct_component(code, i);
sim.add_component(xi);
}
return sim.result();
}
float compute_code_distance(const uint8_t* code1, const uint8_t* code2)
const {
Similarity sim(nullptr);
sim.begin();
for (size_t i = 0; i < quant.d; i++) {
float x1 = quant.reconstruct_component(code1, i);
float x2 = quant.reconstruct_component(code2, i);
sim.add_component_2(x1, x2);
}
return sim.result();
}
void set_query (const float *x) final {
q = x;
}
/// compute distance of vector i to current query
float operator () (idx_t i) final {
return compute_distance (q, codes + i * code_size);
}
float symmetric_dis (idx_t i, idx_t j) override {
return compute_code_distance (codes + i * code_size,
codes + j * code_size);
}
float query_to_code (const uint8_t * code) const {
return compute_distance (q, code);
}
struct DCTemplate_avx<Quantizer, Similarity, 1> : public DCTemplate<Quantizer, Similarity, 1> {
DCTemplate_avx(size_t d, const std::vector<float> &trained) :
DCTemplate<Quantizer, Similarity, 1>(d, trained) {}
};
#ifdef USE_AVX
template<class Quantizer, class Similarity>
struct DCTemplate_avx<Quantizer, Similarity, 8> : SQDistanceComputer
{
struct DCTemplate_avx<Quantizer, Similarity, 8> : SQDistanceComputer {
using Sim = Similarity;
Quantizer quant;
DCTemplate_avx(size_t d, const std::vector<float> &trained):
quant(d, trained)
{}
quant(d, trained) {}
float compute_distance(const float* x, const uint8_t* code) const {
Similarity sim(x);
@ -688,9 +366,6 @@ struct DCTemplate_avx<Quantizer, Similarity, 8> : SQDistanceComputer
}
};
#endif
/*******************************************************************
* DistanceComputerByte: computes distances in the integer domain
@ -700,58 +375,11 @@ template<class Similarity, int SIMDWIDTH>
struct DistanceComputerByte_avx : SQDistanceComputer {};
template<class Similarity>
struct DistanceComputerByte_avx<Similarity, 1> : SQDistanceComputer {
using Sim = Similarity;
int d;
std::vector<uint8_t> tmp;
DistanceComputerByte_avx(int d, const std::vector<float> &): d(d), tmp(d) {
}
int compute_code_distance(const uint8_t* code1, const uint8_t* code2)
const {
int accu = 0;
for (int i = 0; i < d; i++) {
if (Sim::metric_type == METRIC_INNER_PRODUCT) {
accu += int(code1[i]) * code2[i];
} else {
int diff = int(code1[i]) - code2[i];
accu += diff * diff;
}
}
return accu;
}
void set_query (const float *x) final {
for (int i = 0; i < d; i++) {
tmp[i] = int(x[i]);
}
}
int compute_distance(const float* x, const uint8_t* code) {
set_query(x);
return compute_code_distance(tmp.data(), code);
}
/// compute distance of vector i to current query
float operator () (idx_t i) final {
return compute_distance (q, codes + i * code_size);
}
float symmetric_dis (idx_t i, idx_t j) override {
return compute_code_distance (codes + i * code_size,
codes + j * code_size);
}
float query_to_code (const uint8_t * code) const {
return compute_code_distance (tmp.data(), code);
}
struct DistanceComputerByte_avx<Similarity, 1> : public DistanceComputerByte<Similarity, 1> {
DistanceComputerByte_avx(int d, const std::vector<float> &unused) :
DistanceComputerByte<Similarity, 1>(d, unused) {}
};
#ifdef USE_AVX
template<class Similarity>
struct DistanceComputerByte_avx<Similarity, 8> : SQDistanceComputer {
using Sim = Similarity;
@ -759,11 +387,9 @@ struct DistanceComputerByte_avx<Similarity, 8> : SQDistanceComputer {
int d;
std::vector<uint8_t> tmp;
DistanceComputerByte_avx(int d, const std::vector<float> &): d(d), tmp(d) {
}
DistanceComputerByte_avx(int d, const std::vector<float> &): d(d), tmp(d) {}
int compute_code_distance(const uint8_t* code1, const uint8_t* code2)
const {
int compute_code_distance(const uint8_t* code1, const uint8_t* code2) const {
// __m256i accu = _mm256_setzero_ps ();
__m256i accu = _mm256_setzero_si256 ();
for (int i = 0; i < d; i += 16) {
@ -819,14 +445,12 @@ struct DistanceComputerByte_avx<Similarity, 8> : SQDistanceComputer {
}
};
#endif
/*******************************************************************
* select_distance_computer: runtime selection of template
* specialization
*******************************************************************/
template<class Sim>
SQDistanceComputer *select_distance_computer_avx (
QuantizerType qtype,
@ -834,41 +458,119 @@ SQDistanceComputer *select_distance_computer_avx (
{
constexpr int SIMDWIDTH = Sim::simdwidth;
switch(qtype) {
case QuantizerType::QT_8bit_uniform:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit_uniform:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, true, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit_uniform:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit_uniform:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, true, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec8bit_avx, false, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_6bit:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_6bit:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec6bit_avx, false, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_4bit:
return new DCTemplate_avx<QuantizerTemplate_avx<Codec4bit_avx, false, SIMDWIDTH>,
Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_fp16:
return new DCTemplate_avx
<QuantizerFP16_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit_direct:
if (d % 16 == 0) {
return new DistanceComputerByte_avx<Sim, SIMDWIDTH>(d, trained);
} else {
case QuantizerType::QT_fp16:
return new DCTemplate_avx
<Quantizer8bitDirect_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
}
<QuantizerFP16_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
case QuantizerType::QT_8bit_direct:
if (d % 16 == 0) {
return new DistanceComputerByte_avx<Sim, SIMDWIDTH>(d, trained);
} else {
return new DCTemplate_avx
<Quantizer8bitDirect_avx<SIMDWIDTH>, Sim, SIMDWIDTH>(d, trained);
}
}
FAISS_THROW_MSG ("unknown qtype");
return nullptr;
}
template<class DCClass>
InvertedListScanner* sel2_InvertedListScanner_avx (
const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool r)
{
return sel2_InvertedListScanner<DCClass> (sq, quantizer, store_pairs, r);
}
template<class Similarity, class Codec, bool uniform>
InvertedListScanner* sel12_InvertedListScanner_avx (
const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool r)
{
constexpr int SIMDWIDTH = Similarity::simdwidth;
using QuantizerClass = QuantizerTemplate_avx<Codec, uniform, SIMDWIDTH>;
using DCClass = DCTemplate_avx<QuantizerClass, Similarity, SIMDWIDTH>;
return sel2_InvertedListScanner_avx<DCClass> (sq, quantizer, store_pairs, r);
}
template<class Similarity>
InvertedListScanner* sel1_InvertedListScanner_avx (
const ScalarQuantizer *sq, const Index *quantizer,
bool store_pairs, bool r)
{
constexpr int SIMDWIDTH = Similarity::simdwidth;
switch(sq->qtype) {
case QuantizerType::QT_8bit_uniform:
return sel12_InvertedListScanner_avx
<Similarity, Codec8bit_avx, true>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_4bit_uniform:
return sel12_InvertedListScanner_avx
<Similarity, Codec4bit_avx, true>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_8bit:
return sel12_InvertedListScanner_avx
<Similarity, Codec8bit_avx, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_4bit:
return sel12_InvertedListScanner_avx
<Similarity, Codec4bit_avx, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_6bit:
return sel12_InvertedListScanner_avx
<Similarity, Codec6bit_avx, false>(sq, quantizer, store_pairs, r);
case QuantizerType::QT_fp16:
return sel2_InvertedListScanner_avx
<DCTemplate_avx<QuantizerFP16_avx<SIMDWIDTH>, Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
case QuantizerType::QT_8bit_direct:
if (sq->d % 16 == 0) {
return sel2_InvertedListScanner_avx
<DistanceComputerByte_avx<Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
} else {
return sel2_InvertedListScanner_avx
<DCTemplate_avx<Quantizer8bitDirect_avx<SIMDWIDTH>,
Similarity, SIMDWIDTH> >
(sq, quantizer, store_pairs, r);
}
}
FAISS_THROW_MSG ("unknown qtype");
return nullptr;
}
template<int SIMDWIDTH>
InvertedListScanner* sel0_InvertedListScanner_avx (
MetricType mt, const ScalarQuantizer *sq,
const Index *quantizer, bool store_pairs, bool by_residual)
{
if (mt == METRIC_L2) {
return sel1_InvertedListScanner_avx<SimilarityL2_avx<SIMDWIDTH> >
(sq, quantizer, store_pairs, by_residual);
} else if (mt == METRIC_INNER_PRODUCT) {
return sel1_InvertedListScanner_avx<SimilarityIP_avx<SIMDWIDTH> >
(sq, quantizer, store_pairs, by_residual);
} else {
FAISS_THROW_MSG("unsupported metric type");
}
}
} // namespace faiss

File diff suppressed because it is too large Load Diff

View File

@ -18,18 +18,22 @@ namespace faiss {
/* SSE */
SQDistanceComputer *
sq_get_distance_computer_L2_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
return select_distance_computer<SimilarityL2<1>> (qtype, dim, trained);
}
SQDistanceComputer *
sq_get_distance_computer_IP_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
return select_distance_computer<SimilarityIP<1>> (qtype, dim, trained);
sq_get_distance_computer_ref (MetricType metric, QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (metric == METRIC_L2) {
return select_distance_computer<SimilarityL2<1>>(qtype, dim, trained);
} else {
return select_distance_computer<SimilarityIP<1>>(qtype, dim, trained);
}
}
Quantizer *
sq_select_quantizer_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
sq_select_quantizer_ref (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
return select_quantizer_1<1> (qtype, dim, trained);
}
InvertedListScanner*
sq_select_inverted_list_scanner_ref (MetricType mt, const ScalarQuantizer *sq, const Index *quantizer, size_t dim, bool store_pairs, bool by_residual) {
return sel0_InvertedListScanner<1> (mt, sq, quantizer, store_pairs, by_residual);
}
} // namespace faiss

View File

@ -9,17 +9,33 @@
#pragma once
#include <vector>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
#include <faiss/MetricType.h>
namespace faiss {
SQDistanceComputer *
sq_get_distance_computer_L2_sse(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
SQDistanceComputer *
sq_get_distance_computer_IP_sse(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
sq_get_distance_computer_ref(
MetricType metric,
QuantizerType qtype,
size_t dim,
const std::vector<float>& trained);
Quantizer *
sq_select_quantizer_sse(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
sq_select_quantizer_ref(
QuantizerType qtype,
size_t dim,
const std::vector<float>& trained);
InvertedListScanner*
sq_select_inverted_list_scanner_ref(
MetricType mt,
const ScalarQuantizer *sq,
const Index *quantizer,
size_t dim,
bool store_pairs,
bool by_residual);
} // namespace faiss

View File

@ -17,25 +17,24 @@ namespace faiss {
********************************************************************/
SQDistanceComputer *
sq_get_distance_computer_L2_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (dim % 8 == 0) {
return select_distance_computer_avx<SimilarityL2_avx<8>>(qtype, dim, trained);
sq_get_distance_computer_avx (MetricType metric, QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (metric == METRIC_L2) {
if (dim % 8 == 0) {
return select_distance_computer_avx<SimilarityL2_avx<8>>(qtype, dim, trained);
} else {
return select_distance_computer_avx<SimilarityL2_avx<1>>(qtype, dim, trained);
}
} else {
return select_distance_computer_avx<SimilarityL2_avx<1>>(qtype, dim, trained);
}
}
SQDistanceComputer *
sq_get_distance_computer_IP_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (dim % 8 == 0) {
return select_distance_computer_avx<SimilarityIP_avx<8>>(qtype, dim, trained);
} else {
return select_distance_computer_avx<SimilarityIP_avx<1>>(qtype, dim, trained);
if (dim % 8 == 0) {
return select_distance_computer_avx<SimilarityIP_avx<8>>(qtype, dim, trained);
} else {
return select_distance_computer_avx<SimilarityIP_avx<1>>(qtype, dim, trained);
}
}
}
Quantizer *
sq_select_quantizer_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
sq_select_quantizer_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (dim % 8 == 0) {
return select_quantizer_1_avx<8>(qtype, dim, trained);
} else {
@ -43,4 +42,13 @@ sq_select_quantizer_avx(QuantizerType qtype, size_t dim, const std::vector<float
}
}
InvertedListScanner*
sq_select_inverted_list_scanner_avx (MetricType mt, const ScalarQuantizer *sq, const Index *quantizer, size_t dim, bool store_pairs, bool by_residual) {
if (dim % 8 == 0) {
return sel0_InvertedListScanner_avx<8> (mt, sq, quantizer, store_pairs, by_residual);
} else {
return sel0_InvertedListScanner_avx<1> (mt, sq, quantizer, store_pairs, by_residual);
}
}
} // namespace faiss

View File

@ -10,18 +10,32 @@
#pragma once
#include <vector>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
#include <faiss/MetricType.h>
namespace faiss {
SQDistanceComputer *
sq_get_distance_computer_L2_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
SQDistanceComputer *
sq_get_distance_computer_IP_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
sq_get_distance_computer_avx(
MetricType metric,
QuantizerType qtype,
size_t dim,
const std::vector<float>& trained);
Quantizer *
sq_select_quantizer_avx(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
sq_select_quantizer_avx(
QuantizerType qtype,
size_t dim,
const std::vector<float>& trained);
InvertedListScanner*
sq_select_inverted_list_scanner_avx(
MetricType mt,
const ScalarQuantizer *sq,
const Index *quantizer,
size_t dim,
bool store_pairs,
bool by_residual);
} // namespace faiss

View File

@ -17,24 +17,23 @@ namespace faiss {
********************************************************************/
SQDistanceComputer *
sq_get_distance_computer_L2_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (dim % 16 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<16>> (qtype, dim, trained);
} else if (dim % 8 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<8>> (qtype, dim, trained);
sq_get_distance_computer_avx512 (MetricType metric, QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (metric == METRIC_L2) {
if (dim % 16 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<16>>(qtype, dim, trained);
} else if (dim % 8 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<8>>(qtype, dim, trained);
} else {
return select_distance_computer_avx512<SimilarityL2_avx512<1>>(qtype, dim, trained);
}
} else {
return select_distance_computer_avx512<SimilarityL2_avx512<1>> (qtype, dim, trained);
}
}
SQDistanceComputer *
sq_get_distance_computer_IP_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
if (dim % 16 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<16>> (qtype, dim, trained);
} else if (dim % 8 == 0) {
return select_distance_computer_avx512<SimilarityIP_avx512<8>> (qtype, dim, trained);
} else {
return select_distance_computer_avx512<SimilarityIP_avx512<1>> (qtype, dim, trained);
if (dim % 16 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<16>>(qtype, dim, trained);
} else if (dim % 8 == 0) {
return select_distance_computer_avx512<SimilarityIP_avx512<8>>(qtype, dim, trained);
} else {
return select_distance_computer_avx512<SimilarityIP_avx512<1>>(qtype, dim, trained);
}
}
}
@ -49,5 +48,15 @@ sq_select_quantizer_avx512 (QuantizerType qtype, size_t dim, const std::vector<f
}
}
InvertedListScanner*
sq_select_inverted_list_scanner_avx512 (MetricType mt, const ScalarQuantizer *sq, const Index *quantizer, size_t dim, bool store_pairs, bool by_residual) {
if (dim % 16 == 0) {
return sel0_InvertedListScanner_avx512<16> (mt, sq, quantizer, store_pairs, by_residual);
} else if (dim % 8 == 0) {
return sel0_InvertedListScanner_avx512<8> (mt, sq, quantizer, store_pairs, by_residual);
} else {
return sel0_InvertedListScanner_avx512<1> (mt, sq, quantizer, store_pairs, by_residual);
}
}
} // namespace faiss

View File

@ -10,17 +10,32 @@
#pragma once
#include <vector>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
#include <faiss/MetricType.h>
namespace faiss {
SQDistanceComputer *
sq_get_distance_computer_L2_avx512(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
SQDistanceComputer *
sq_get_distance_computer_IP_avx512(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
sq_get_distance_computer_avx512(
MetricType metric,
QuantizerType qtype,
size_t dim,
const std::vector<float>& trained);
Quantizer *
sq_select_quantizer_avx512(QuantizerType qtype, size_t dim, const std::vector<float>& trained);
sq_select_quantizer_avx512(
QuantizerType qtype,
size_t dim,
const std::vector<float>& trained);
InvertedListScanner*
sq_select_inverted_list_scanner_avx512(
MetricType mt,
const ScalarQuantizer *sq,
const Index *quantizer,
size_t dim,
bool store_pairs,
bool by_residual);
} // namespace faiss