Merge remote-tracking branch 'upstream/0.6.0' into 0.6.0-yk-refactor-scheduler

pull/603/head
fishpenguin 2019-11-28 18:54:07 +08:00
commit 8fa80c1e88
498 changed files with 98914 additions and 122 deletions

View File

@ -22,6 +22,7 @@ Please mark all change in change log and use the ticket from JIRA.
- \#458 - Index data is not compatible between 0.5 and 0.6
- \#465 - Server hang caused by searching with nsg index
- \#486 - gpu no usage during index building
- \#497 - CPU-version search performance decreased
- \#504 - The code coverage rate of core/src/scheduler/optimizer is too low
- \#509 - IVF_PQ index build trapped into dead loop caused by invalid params
- \#513 - Unittest DELETE_BY_RANGE sometimes failed
@ -31,7 +32,10 @@ Please mark all change in change log and use the ticket from JIRA.
- \#532 - assigin value to `table_name` from confest shell
- \#533 - NSG build failed with MetricType Inner Product
- \#543 - client raise exception in shards when search results is empty
- \#497 - CPU-version search performance decreased
- \#545 - Avoid dead circle of build index thread when error occurs
- \#552 - Server down during building index_type: IVF_PQ using GPU-edition
- \#561 - Milvus server should report exception/error message or terminate on mysql metadata backend error
- \#599 - Build index log is incorrect
## Feature
- \#12 - Pure CPU version for Milvus
@ -48,6 +52,7 @@ Please mark all change in change log and use the ticket from JIRA.
- \#255 - Add ivfsq8 test report detailed version
- \#260 - C++ SDK README
- \#266 - Rpc request source code refactor
- \#274 - Logger the time cost during preloading data
- \#275 - Rename C++ SDK IndexType
- \#284 - Change C++ SDK to shared library
- \#306 - Use int64 for all config integer
@ -61,6 +66,7 @@ Please mark all change in change log and use the ticket from JIRA.
- \#433 - C++ SDK query result is not easy to use
- \#449 - Add ShowPartitions example for C++ SDK
- \#470 - Small raw files should not be build index
- \#584 - Intergrate internal FAISS
## Task

View File

@ -17,7 +17,7 @@ pipeline {
}
parameters{
choice choices: ['Release', 'Debug'], description: '', name: 'BUILD_TYPE'
choice choices: ['Release', 'Debug'], description: 'Build Type', name: 'BUILD_TYPE'
string defaultValue: 'registry.zilliz.com', description: 'DOCKER REGISTRY URL', name: 'DOKCER_REGISTRY_URL', trim: true
string defaultValue: 'ba070c98-c8cc-4f7c-b657-897715f359fc', description: 'DOCKER CREDENTIALS ID', name: 'DOCKER_CREDENTIALS_ID', trim: true
string defaultValue: 'http://192.168.1.202/artifactory/milvus', description: 'JFROG ARTFACTORY URL', name: 'JFROG_ARTFACTORY_URL', trim: true
@ -27,9 +27,8 @@ pipeline {
environment {
PROJECT_NAME = "milvus"
LOWER_BUILD_TYPE = params.BUILD_TYPE.toLowerCase()
SEMVER = "${BRANCH_NAME}"
JOBNAMES = env.JOB_NAME.split('/')
PIPELINE_NAME = "${JOBNAMES[0]}"
SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}"
PIPELINE_NAME = "${env.JOB_NAME.contains('/') ? env.JOB_NAME.getAt(0..(env.JOB_NAME.indexOf('/') - 1)) : env.JOB_NAME}"
}
stages {
@ -102,7 +101,7 @@ pipeline {
stages {
stage('Publish') {
steps {
container('publish-images'){
container('publish-images') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy"
}

View File

@ -0,0 +1,477 @@
#!/usr/bin/env groovy
pipeline {
agent none
options {
timestamps()
}
parameters{
choice choices: ['Release', 'Debug'], description: 'Build Type', name: 'BUILD_TYPE'
string defaultValue: 'registry.zilliz.com', description: 'DOCKER REGISTRY URL', name: 'DOKCER_REGISTRY_URL', trim: true
string defaultValue: 'a54e38ef-c424-4ea9-9224-b25fc20e3924', description: 'DOCKER CREDENTIALS ID', name: 'DOCKER_CREDENTIALS_ID', trim: true
string defaultValue: 'http://192.168.1.201/artifactory/milvus', description: 'JFROG ARTFACTORY URL', name: 'JFROG_ARTFACTORY_URL', trim: true
string defaultValue: '76fd48ab-2b8e-4eed-834d-2eefd23bb3a6', description: 'JFROG CREDENTIALS ID', name: 'JFROG_CREDENTIALS_ID', trim: true
}
environment {
PROJECT_NAME = "milvus"
LOWER_BUILD_TYPE = params.BUILD_TYPE.toLowerCase()
SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}"
PIPELINE_NAME = "${env.JOB_NAME.contains('/') ? env.JOB_NAME.getAt(0..(env.JOB_NAME.indexOf('/') - 1)) : env.JOB_NAME}"
}
stages {
stage("Ubuntu 18.04 x86_64") {
environment {
OS_NAME = "ubuntu18.04"
CPU_ARCH = "amd64"
}
parallel {
stage ("GPU Version") {
environment {
BINRARY_VERSION = "gpu"
PACKAGE_VERSION = VersionNumber([
versionNumberString : '${SEMVER}-gpu-${OS_NAME}-${CPU_ARCH}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, "yyyyMMdd"}-${BUILDS_TODAY}'
]);
DOCKER_VERSION = "${SEMVER}-gpu-${OS_NAME}-${LOWER_BUILD_TYPE}"
}
stages {
stage("Run Build") {
agent {
kubernetes {
label "${env.BINRARY_VERSION}-build"
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
name: milvus-gpu-build-env
labels:
app: milvus
componet: gpu-build-env
spec:
containers:
- name: milvus-gpu-build-env
image: registry.zilliz.com/milvus/milvus-gpu-build-env:v0.6.0-ubuntu18.04
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: BUILD_ENV_IMAGE_ID
value: "da9023b0f858f072672f86483a869aa87e90a5140864f89e5a012ec766d96dea"
command:
- cat
tty: true
resources:
limits:
memory: "24Gi"
cpu: "8.0"
nvidia.com/gpu: 1
requests:
memory: "16Gi"
cpu: "4.0"
- name: milvus-mysql
image: mysql:5.6
env:
- name: MYSQL_ROOT_PASSWORD
value: 123456
ports:
- containerPort: 3306
name: mysql
"""
}
}
stages {
stage('Build') {
steps {
container("milvus-${env.BINRARY_VERSION}-build-env") {
script {
load "${env.WORKSPACE}/ci/jenkins/step/build.groovy"
}
}
}
}
stage('Code Coverage') {
steps {
container("milvus-${env.BINRARY_VERSION}-build-env") {
script {
load "${env.WORKSPACE}/ci/jenkins/step/internalCoverage.groovy"
}
}
}
}
stage('Upload Package') {
steps {
container("milvus-${env.BINRARY_VERSION}-build-env") {
script {
load "${env.WORKSPACE}/ci/jenkins/step/package.groovy"
}
}
}
}
}
}
stage("Publish docker images") {
agent {
kubernetes {
label "${env.BINRARY_VERSION}-publish"
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
app: publish
componet: docker
spec:
containers:
- name: publish-images
image: registry.zilliz.com/library/docker:v1.0.0
securityContext:
privileged: true
command:
- cat
tty: true
volumeMounts:
- name: docker-sock
mountPath: /var/run/docker.sock
volumes:
- name: docker-sock
hostPath:
path: /var/run/docker.sock
"""
}
}
stages {
stage('Publish') {
steps {
container('publish-images') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy"
}
}
}
}
}
}
stage("Deploy to Development") {
environment {
FROMAT_SEMVER = "${env.SEMVER}".replaceAll("\\.", "-")
HELM_RELEASE_NAME = "${env.PIPELINE_NAME}-${env.FROMAT_SEMVER}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION}".toLowerCase()
}
agent {
kubernetes {
label "${env.BINRARY_VERSION}-dev-test"
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
app: milvus
componet: test-env
spec:
containers:
- name: milvus-test-env
image: registry.zilliz.com/milvus/milvus-test-env:v0.1
command:
- cat
tty: true
volumeMounts:
- name: kubeconf
mountPath: /root/.kube/
readOnly: true
volumes:
- name: kubeconf
secret:
secretName: test-cluster-config
"""
}
}
stages {
stage("Deploy to Dev") {
steps {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/deploySingle2Dev.groovy"
}
}
}
}
stage("Dev Test") {
steps {
container('milvus-test-env') {
script {
boolean isNightlyTest = isTimeTriggeredBuild()
if (isNightlyTest) {
load "${env.WORKSPACE}/ci/jenkins/step/singleDevNightlyTest.groovy"
} else {
load "${env.WORKSPACE}/ci/jenkins/step/singleDevTest.groovy"
}
}
}
}
}
stage ("Cleanup Dev") {
steps {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
}
}
}
}
}
post {
unsuccessful {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
}
}
}
}
}
}
}
stage ("CPU Version") {
environment {
BINRARY_VERSION = "cpu"
PACKAGE_VERSION = VersionNumber([
versionNumberString : '${SEMVER}-cpu-${OS_NAME}-${CPU_ARCH}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, "yyyyMMdd"}-${BUILDS_TODAY}'
]);
DOCKER_VERSION = "${SEMVER}-cpu-${OS_NAME}-${LOWER_BUILD_TYPE}"
}
stages {
stage("Run Build") {
agent {
kubernetes {
label "${env.BINRARY_VERSION}-build"
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
name: milvus-cpu-build-env
labels:
app: milvus
componet: cpu-build-env
spec:
containers:
- name: milvus-cpu-build-env
image: registry.zilliz.com/milvus/milvus-cpu-build-env:v0.6.0-ubuntu18.04
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: BUILD_ENV_IMAGE_ID
value: "23476391bec80c64f10d44a6370c73c71f011a6b95114b10ff82a60e771e11c7"
command:
- cat
tty: true
resources:
limits:
memory: "24Gi"
cpu: "8.0"
requests:
memory: "16Gi"
cpu: "4.0"
- name: milvus-mysql
image: mysql:5.6
env:
- name: MYSQL_ROOT_PASSWORD
value: 123456
ports:
- containerPort: 3306
name: mysql
"""
}
}
stages {
stage('Build') {
steps {
container("milvus-${env.BINRARY_VERSION}-build-env") {
script {
load "${env.WORKSPACE}/ci/jenkins/step/build.groovy"
}
}
}
}
stage('Code Coverage') {
steps {
container("milvus-${env.BINRARY_VERSION}-build-env") {
script {
load "${env.WORKSPACE}/ci/jenkins/step/internalCoverage.groovy"
}
}
}
}
stage('Upload Package') {
steps {
container("milvus-${env.BINRARY_VERSION}-build-env") {
script {
load "${env.WORKSPACE}/ci/jenkins/step/package.groovy"
}
}
}
}
}
}
stage("Publish docker images") {
agent {
kubernetes {
label "${env.BINRARY_VERSION}-publish"
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
app: publish
componet: docker
spec:
containers:
- name: publish-images
image: registry.zilliz.com/library/docker:v1.0.0
securityContext:
privileged: true
command:
- cat
tty: true
volumeMounts:
- name: docker-sock
mountPath: /var/run/docker.sock
volumes:
- name: docker-sock
hostPath:
path: /var/run/docker.sock
"""
}
}
stages {
stage('Publish') {
steps {
container('publish-images'){
script {
load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy"
}
}
}
}
}
}
stage("Deploy to Development") {
environment {
FROMAT_SEMVER = "${env.SEMVER}".replaceAll("\\.", "-")
HELM_RELEASE_NAME = "${env.PIPELINE_NAME}-${env.FROMAT_SEMVER}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION}".toLowerCase()
}
agent {
kubernetes {
label "${env.BINRARY_VERSION}-dev-test"
defaultContainer 'jnlp'
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
app: milvus
componet: test-env
spec:
containers:
- name: milvus-test-env
image: registry.zilliz.com/milvus/milvus-test-env:v0.1
command:
- cat
tty: true
volumeMounts:
- name: kubeconf
mountPath: /root/.kube/
readOnly: true
volumes:
- name: kubeconf
secret:
secretName: test-cluster-config
"""
}
}
stages {
stage("Deploy to Dev") {
steps {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/deploySingle2Dev.groovy"
}
}
}
}
stage("Dev Test") {
steps {
container('milvus-test-env') {
script {
boolean isNightlyTest = isTimeTriggeredBuild()
if (isNightlyTest) {
load "${env.WORKSPACE}/ci/jenkins/step/singleDevNightlyTest.groovy"
} else {
load "${env.WORKSPACE}/ci/jenkins/step/singleDevTest.groovy"
}
}
}
}
}
stage ("Cleanup Dev") {
steps {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
}
}
}
}
}
post {
unsuccessful {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
}
}
}
}
}
}
}
}
}
}
}
boolean isTimeTriggeredBuild() {
if (currentBuild.getBuildCauses('hudson.triggers.TimerTrigger$TimerTriggerCause').size() != 0) {
return true
}
return false
}

View File

@ -3,9 +3,9 @@ timeout(time: 60, unit: 'MINUTES') {
withCredentials([usernamePassword(credentialsId: "${params.JFROG_CREDENTIALS_ID}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
def checkResult = sh(script: "./check_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache", returnStatus: true)
if ("${env.BINRARY_VERSION}" == "gpu") {
sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -g -u -c"
sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -g -x -u -c"
} else {
sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -m -u -c"
sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -u -c"
}
sh "./update_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache -u ${USERNAME} -p ${PASSWORD}"
}

View File

@ -0,0 +1,6 @@
timeout(time: 30, unit: 'MINUTES') {
dir ("ci/scripts") {
sh "./coverage.sh -o /opt/milvus -u root -p 123456 -t \$POD_IP"
}
}

View File

@ -46,7 +46,7 @@ check_ccache() {
echo "fetching ${BRANCH}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz"
wget -q --method HEAD "${ARTIFACTORY_URL}/${BRANCH}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz"
if [[ $? == 0 ]];then
wget "${ARTIFACTORY_URL}/${BRANCH}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz" && \
wget -q "${ARTIFACTORY_URL}/${BRANCH}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz" && \
mkdir -p ${CCACHE_DIRECTORY} && \
tar zxf ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz -C ${CCACHE_DIRECTORY} && \
rm ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz

View File

@ -35,15 +35,15 @@ if (NOT DEFINED CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build.")
endif ()
set (GIT_BRANCH_NAME_REGEX "[0-9]+\\.[0-9]+\\.[0-9]")
set(GIT_BRANCH_NAME_REGEX "[0-9]+\\.[0-9]+\\.[0-9]")
MACRO(GET_GIT_BRANCH_NAME GIT_BRANCH_NAME)
execute_process(COMMAND sh "-c" "git log --decorate | head -n 1 | sed 's/.*(\\(.*\\))/\\1/' | sed 's/.*, //' | sed 's=[a-zA-Z]*\/==g'"
OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
if(NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
if (NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
execute_process(COMMAND "git" rev-parse --abbrev-ref HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
endif ()
if(NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
if (NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
execute_process(COMMAND "git" symbolic-ref --short -q HEAD HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
endif ()
ENDMACRO(GET_GIT_BRANCH_NAME)
@ -79,7 +79,7 @@ if (MILVUS_VERSION_MAJOR STREQUAL ""
OR MILVUS_VERSION_PATCH STREQUAL "")
message(WARNING "Failed to determine Milvus version from git branch name")
set(MILVUS_VERSION "0.6.0")
endif()
endif ()
message(STATUS "Build version = ${MILVUS_VERSION}")
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/version.h.in ${CMAKE_CURRENT_SOURCE_DIR}/src/version.h @ONLY)
@ -141,6 +141,11 @@ if (MILVUS_USE_CCACHE)
endif (CCACHE_FOUND)
endif ()
if (CUSTOMIZATION)
set(MILVUS_GPU_VERSION ON)
add_compile_definitions(CUSTOMIZATION)
endif ()
set(MILVUS_CPU_VERSION false)
if (MILVUS_GPU_VERSION)
message(STATUS "Building Milvus GPU version")
@ -170,10 +175,6 @@ else ()
endif ()
endif ()
if (CUSTOMIZATION)
add_definitions(-DCUSTOMIZATION)
endif (CUSTOMIZATION)
config_summary()
add_subdirectory(src)

View File

@ -41,10 +41,12 @@ macro(define_option_string name description default)
endmacro()
#----------------------------------------------------------------------
set_option_category("GPU version")
set_option_category("Milvus Build Option")
define_option(MILVUS_GPU_VERSION "Build GPU version" OFF)
define_option(CUSTOMIZATION "Build with customized FAISS library" OFF)
#----------------------------------------------------------------------
set_option_category("Thirdparty")

View File

@ -41,6 +41,7 @@
#include <iostream>
#include <set>
#include <thread>
#include <utility>
namespace milvus {
namespace engine {
@ -51,6 +52,8 @@ constexpr uint64_t METRIC_ACTION_INTERVAL = 1;
constexpr uint64_t COMPACT_ACTION_INTERVAL = 1;
constexpr uint64_t INDEX_ACTION_INTERVAL = 1;
constexpr uint64_t INDEX_FAILED_RETRY_TIME = 1;
static const Status SHUTDOWN_ERROR = Status(DB_ERROR, "Milsvus server is shutdown!");
void
@ -179,7 +182,7 @@ DBImpl::PreloadTable(const std::string& table_id) {
return SHUTDOWN_ERROR;
}
// get all table files from parent table
// step 1: get all table files from parent table
meta::DatesT dates;
std::vector<size_t> ids;
meta::TableFilesSchema files_array;
@ -188,7 +191,7 @@ DBImpl::PreloadTable(const std::string& table_id) {
return status;
}
// get files from partition tables
// step 2: get files from partition tables
std::vector<meta::TableSchema> partiton_array;
status = meta_ptr_->ShowPartitions(table_id, partiton_array);
for (auto& schema : partiton_array) {
@ -200,6 +203,10 @@ DBImpl::PreloadTable(const std::string& table_id) {
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t available_size = cache_total - cache_usage;
// step 3: load file one by one
ENGINE_LOG_DEBUG << "Begin pre-load table:" + table_id + ", totally " << files_array.size()
<< " files need to be pre-loaded";
TimeRecorderAuto rc("Pre-load table:" + table_id);
for (auto& file : files_array) {
ExecutionEnginePtr engine = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_,
(MetricType)file.metric_type_, file.nlist_);
@ -210,10 +217,12 @@ DBImpl::PreloadTable(const std::string& table_id) {
size += engine->PhysicalSize();
if (size > available_size) {
ENGINE_LOG_DEBUG << "Pre-load canceled since cache almost full";
return Status(SERVER_CACHE_FULL, "Cache is full");
} else {
try {
// step 1: load index
std::string msg = "Pre-loaded file: " + file.file_id_ + " size: " + std::to_string(file.file_size_);
TimeRecorderAuto rc_1(msg);
engine->Load(true);
} catch (std::exception& ex) {
std::string msg = "Pre-load table encounter exception: " + std::string(ex.what());
@ -361,6 +370,7 @@ DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) {
WaitMergeFileFinish();
// step 4: wait and build index
status = CleanFailedIndexFileOfTable(table_id);
status = BuildTableIndexRecursively(table_id, index);
return status;
@ -828,22 +838,35 @@ DBImpl::BackgroundBuildIndex() {
std::unique_lock<std::mutex> lock(build_index_mutex_);
meta::TableFilesSchema to_index_files;
meta_ptr_->FilesToIndex(to_index_files);
Status status;
Status status = IgnoreFailedIndexFiles(to_index_files);
if (!to_index_files.empty()) {
scheduler::BuildIndexJobPtr job = std::make_shared<scheduler::BuildIndexJob>(meta_ptr_, options_);
// step 2: put build index task to scheduler
std::map<scheduler::BuildIndexJobPtr, scheduler::TableFileSchemaPtr> job2file_map;
for (auto& file : to_index_files) {
scheduler::BuildIndexJobPtr job = std::make_shared<scheduler::BuildIndexJob>(meta_ptr_, options_);
scheduler::TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
job->AddToIndexFiles(file_ptr);
scheduler::JobMgrInst::GetInstance()->Put(job);
job2file_map.insert(std::make_pair(job, file_ptr));
}
scheduler::JobMgrInst::GetInstance()->Put(job);
job->WaitBuildIndexFinish();
if (!job->GetStatus().ok()) {
Status status = job->GetStatus();
ENGINE_LOG_ERROR << "Building index failed: " << status.ToString();
for (auto iter = job2file_map.begin(); iter != job2file_map.end(); ++iter) {
scheduler::BuildIndexJobPtr job = iter->first;
meta::TableFileSchema& file_schema = *(iter->second.get());
job->WaitBuildIndexFinish();
if (!job->GetStatus().ok()) {
Status status = job->GetStatus();
ENGINE_LOG_ERROR << "Building index job " << job->id() << " failed: " << status.ToString();
MarkFailedIndexFile(file_schema);
} else {
MarkSucceedIndexFile(file_schema);
ENGINE_LOG_DEBUG << "Building index job " << job->id() << " succeed.";
}
}
ENGINE_LOG_DEBUG << "Background build index thread finished";
}
// ENGINE_LOG_TRACE << "Background build index thread exit";
@ -911,6 +934,7 @@ DBImpl::DropTableRecursively(const std::string& table_id, const meta::DatesT& da
if (dates.empty()) {
status = mem_mgr_->EraseMemVector(table_id); // not allow insert
status = meta_ptr_->DropTable(table_id); // soft delete table
CleanFailedIndexFileOfTable(table_id);
// scheduler will determine when to delete table files
auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource();
@ -989,6 +1013,8 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10 * 1000, times * 100)));
GetFilesToBuildIndex(table_id, file_types, table_files);
times++;
IgnoreFailedIndexFiles(table_files);
}
// build index for partition
@ -1001,12 +1027,27 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
}
}
// failed to build index for some files, return error
std::vector<std::string> failed_files;
GetFailedIndexFileOfTable(table_id, failed_files);
if (!failed_files.empty()) {
std::string msg = "Failed to build index for " + std::to_string(failed_files.size()) +
((failed_files.size() == 1) ? " file" : " files");
#ifdef MILVUS_CPU_VERSION
msg += ", please double check index parameters.";
#else
msg += ", file size is too large or gpu memory is not enough.";
#endif
return Status(DB_ERROR, msg);
}
return Status::OK();
}
Status
DBImpl::DropTableIndexRecursively(const std::string& table_id) {
ENGINE_LOG_DEBUG << "Drop index for table: " << table_id;
CleanFailedIndexFileOfTable(table_id);
auto status = meta_ptr_->DropTableIndex(table_id);
if (!status.ok()) {
return status;
@ -1049,5 +1090,86 @@ DBImpl::GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_c
return Status::OK();
}
Status
DBImpl::CleanFailedIndexFileOfTable(const std::string& table_id) {
std::lock_guard<std::mutex> lck(index_failed_mutex_);
index_failed_files_.erase(table_id); // rebuild failed index files for this table
return Status::OK();
}
Status
DBImpl::GetFailedIndexFileOfTable(const std::string& table_id, std::vector<std::string>& failed_files) {
failed_files.clear();
std::lock_guard<std::mutex> lck(index_failed_mutex_);
auto iter = index_failed_files_.find(table_id);
if (iter != index_failed_files_.end()) {
FileID2FailedTimes& failed_map = iter->second;
for (auto it_file = failed_map.begin(); it_file != failed_map.end(); ++it_file) {
failed_files.push_back(it_file->first);
}
}
return Status::OK();
}
Status
DBImpl::MarkFailedIndexFile(const meta::TableFileSchema& file) {
std::lock_guard<std::mutex> lck(index_failed_mutex_);
auto iter = index_failed_files_.find(file.table_id_);
if (iter == index_failed_files_.end()) {
FileID2FailedTimes failed_files;
failed_files.insert(std::make_pair(file.file_id_, 1));
index_failed_files_.insert(std::make_pair(file.table_id_, failed_files));
} else {
auto it_failed_files = iter->second.find(file.file_id_);
if (it_failed_files != iter->second.end()) {
it_failed_files->second++;
} else {
iter->second.insert(std::make_pair(file.file_id_, 1));
}
}
return Status::OK();
}
Status
DBImpl::MarkSucceedIndexFile(const meta::TableFileSchema& file) {
std::lock_guard<std::mutex> lck(index_failed_mutex_);
auto iter = index_failed_files_.find(file.table_id_);
if (iter != index_failed_files_.end()) {
iter->second.erase(file.file_id_);
}
return Status::OK();
}
Status
DBImpl::IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files) {
std::lock_guard<std::mutex> lck(index_failed_mutex_);
// there could be some failed files belong to different table.
// some files may has failed for several times, no need to build index for these files.
// thus we can avoid dead circle for build index operation
for (auto it_file = table_files.begin(); it_file != table_files.end();) {
auto it_failed_files = index_failed_files_.find((*it_file).table_id_);
if (it_failed_files != index_failed_files_.end()) {
auto it_failed_file = it_failed_files->second.find((*it_file).file_id_);
if (it_failed_file != it_failed_files->second.end()) {
if (it_failed_file->second >= INDEX_FAILED_RETRY_TIME) {
it_file = table_files.erase(it_file);
continue;
}
}
}
++it_file;
}
return Status::OK();
}
} // namespace engine
} // namespace milvus

View File

@ -25,6 +25,7 @@
#include <atomic>
#include <condition_variable>
#include <list>
#include <map>
#include <memory>
#include <mutex>
#include <set>
@ -35,8 +36,6 @@
namespace milvus {
namespace engine {
class Env;
namespace meta {
class Meta;
}
@ -179,6 +178,21 @@ class DBImpl : public DB {
Status
GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_count);
Status
CleanFailedIndexFileOfTable(const std::string& table_id);
Status
GetFailedIndexFileOfTable(const std::string& table_id, std::vector<std::string>& failed_files);
Status
MarkFailedIndexFile(const meta::TableFileSchema& file);
Status
MarkSucceedIndexFile(const meta::TableFileSchema& file);
Status
IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files);
private:
const DBOptions options_;
@ -200,7 +214,11 @@ class DBImpl : public DB {
std::list<std::future<void>> index_thread_results_;
std::mutex build_index_mutex_;
}; // DBImpl
std::mutex index_failed_mutex_;
using FileID2FailedTimes = std::map<std::string, uint64_t>;
using Table2FailedFiles = std::map<std::string, FileID2FailedTimes>;
Table2FailedFiles index_failed_files_; // file id mapping to failed times
}; // DBImpl
} // namespace engine
} // namespace milvus

View File

@ -154,7 +154,9 @@ GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file
}
std::string msg = "Table file doesn't exist: " + file_path;
ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_;
if (table_file.file_size_ > 0) { // no need to pop error for empty file
ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_;
}
return Status(DB_ERROR, msg);
}

View File

@ -290,45 +290,50 @@ MySQLMetaImpl::Initialize() {
// step 4: validate to avoid open old version schema
ValidateMetaSchema();
// step 5: create meta tables
try {
if (mode_ != DBOptions::MODE::CLUSTER_READONLY) {
CleanUpShadowFiles();
}
// step 5: clean shadow files
if (mode_ != DBOptions::MODE::CLUSTER_READONLY) {
CleanUpShadowFiles();
}
{
mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);
// step 6: try connect mysql server
mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);
if (connectionPtr == nullptr) {
return Status(DB_ERROR, "Failed to connect to meta server(mysql)");
}
if (connectionPtr == nullptr) {
std::string msg = "Failed to connect MySQL meta server: " + uri;
ENGINE_LOG_ERROR << msg;
throw Exception(DB_INVALID_META_URI, msg);
}
if (!connectionPtr->thread_aware()) {
ENGINE_LOG_ERROR << "MySQL++ wasn't built with thread awareness! Can't run without it.";
return Status(DB_ERROR, "MySQL++ wasn't built with thread awareness! Can't run without it.");
}
mysqlpp::Query InitializeQuery = connectionPtr->query();
if (!connectionPtr->thread_aware()) {
std::string msg =
"Failed to initialize MySQL meta backend: MySQL client component wasn't built with thread awareness";
ENGINE_LOG_ERROR << msg;
throw Exception(DB_INVALID_META_URI, msg);
}
InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLES_SCHEMA.name() << " ("
<< TABLES_SCHEMA.ToString() + ");";
// step 7: create meta table Tables
mysqlpp::Query InitializeQuery = connectionPtr->query();
ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();
InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLES_SCHEMA.name() << " (" << TABLES_SCHEMA.ToString() + ");";
if (!InitializeQuery.exec()) {
return HandleException("Initialization Error", InitializeQuery.error());
}
ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();
InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLEFILES_SCHEMA.name() << " ("
<< TABLEFILES_SCHEMA.ToString() + ");";
if (!InitializeQuery.exec()) {
std::string msg = "Failed to create meta table 'Tables' in MySQL";
ENGINE_LOG_ERROR << msg;
throw Exception(DB_META_TRANSACTION_FAILED, msg);
}
ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();
// step 8: create meta table TableFiles
InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLEFILES_SCHEMA.name() << " ("
<< TABLEFILES_SCHEMA.ToString() + ");";
if (!InitializeQuery.exec()) {
return HandleException("Initialization Error", InitializeQuery.error());
}
} // Scoped Connection
} catch (std::exception& e) {
return HandleException("GENERAL ERROR DURING INITIALIZATION", e.what());
ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();
if (!InitializeQuery.exec()) {
std::string msg = "Failed to create meta table 'TableFiles' in MySQL";
ENGINE_LOG_ERROR << msg;
throw Exception(DB_META_TRANSACTION_FAILED, msg);
}
return Status::OK();
@ -1610,10 +1615,34 @@ MySQLMetaImpl::FilesByType(const std::string& table_id, const std::vector<int>&
}
}
ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count
<< " new files:" << new_count << " new_merge files:" << new_merge_count
<< " new_index files:" << new_index_count << " to_index files:" << to_index_count
<< " index files:" << index_count << " backup files:" << backup_count;
std::string msg = "Get table files by type.";
for (int file_type : file_types) {
switch (file_type) {
case (int)TableFileSchema::RAW:
msg = msg + " raw files:" + std::to_string(raw_count);
break;
case (int)TableFileSchema::NEW:
msg = msg + " new files:" + std::to_string(new_count);
break;
case (int)TableFileSchema::NEW_MERGE:
msg = msg + " new_merge files:" + std::to_string(new_merge_count);
break;
case (int)TableFileSchema::NEW_INDEX:
msg = msg + " new_index files:" + std::to_string(new_index_count);
break;
case (int)TableFileSchema::TO_INDEX:
msg = msg + " to_index files:" + std::to_string(to_index_count);
break;
case (int)TableFileSchema::INDEX:
msg = msg + " index files:" + std::to_string(index_count);
break;
case (int)TableFileSchema::BACKUP:
msg = msg + " backup files:" + std::to_string(backup_count);
break;
default:break;
}
}
ENGINE_LOG_DEBUG << msg;
}
} catch (std::exception& e) {
return HandleException("GENERAL ERROR WHEN GET FILE BY TYPE", e.what());

View File

@ -1157,10 +1157,34 @@ SqliteMetaImpl::FilesByType(const std::string& table_id,
table_files.emplace_back(file_schema);
}
ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count
<< " new files:" << new_count << " new_merge files:" << new_merge_count
<< " new_index files:" << new_index_count << " to_index files:" << to_index_count
<< " index files:" << index_count << " backup files:" << backup_count;
std::string msg = "Get table files by type.";
for (int file_type : file_types) {
switch (file_type) {
case (int)TableFileSchema::RAW:
msg = msg + " raw files:" + std::to_string(raw_count);
break;
case (int)TableFileSchema::NEW:
msg = msg + " new files:" + std::to_string(new_count);
break;
case (int)TableFileSchema::NEW_MERGE:
msg = msg + " new_merge files:" + std::to_string(new_merge_count);
break;
case (int)TableFileSchema::NEW_INDEX:
msg = msg + " new_index files:" + std::to_string(new_index_count);
break;
case (int)TableFileSchema::TO_INDEX:
msg = msg + " to_index files:" + std::to_string(to_index_count);
break;
case (int)TableFileSchema::INDEX:
msg = msg + " index files:" + std::to_string(index_count);
break;
case (int)TableFileSchema::BACKUP:
msg = msg + " backup files:" + std::to_string(backup_count);
break;
default:break;
}
}
ENGINE_LOG_DEBUG << msg;
}
} catch (std::exception& e) {
return HandleException("Encounter exception when check non index files", e.what());

View File

@ -72,6 +72,11 @@ include(ExternalProject)
include(DefineOptionsCore)
include(BuildUtilsCore)
if (CUSTOMIZATION)
set(MILVUS_GPU_VERSION ON)
add_compile_definitions(CUSTOMIZATION)
endif ()
set(KNOWHERE_CPU_VERSION false)
if (MILVUS_GPU_VERSION OR KNOWHERE_GPU_VERSION)
message(STATUS "Building Knowhere GPU version")

View File

@ -49,6 +49,8 @@ else ()
define_option(KNOWHERE_GPU_VERSION "Build GPU version" OFF)
endif ()
define_option(CUSTOMIZATION "Build with customized FAISS library" OFF)
#----------------------------------------------------------------------
set_option_category("Thirdparty")

View File

@ -225,11 +225,11 @@ foreach (_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
set(${_LIB_NAME} "${_LIB_VERSION}")
endforeach ()
set(FAISS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/faiss)
if (DEFINED ENV{FAISS_SOURCE_URL})
set(FAISS_SOURCE_URL "$ENV{FAISS_SOURCE_URL}")
else ()
set(FAISS_SOURCE_URL "https://github.com/JinHai-CN/faiss/archive/${FAISS_VERSION}.tar.gz")
set(FAISS_MD5 "b02c1a53234f5acc9bea1b0c55524f50")
endif ()
if (DEFINED ENV{KNOWHERE_ARROW_URL})
@ -737,12 +737,12 @@ macro(build_faiss)
set(FAISS_COMPUTE_TYPE "gpu")
else ()
set(FAISS_COMPUTE_TYPE "cpu")
endif()
endif ()
if (FAISS_WITH_MKL)
set(FAISS_CACHE_PACKAGE_NAME "faiss_${FAISS_COMPUTE_TYPE}_mkl_${FAISS_COMBINE_MD5}.tar.gz")
else ()
set(FAISS_CACHE_PACKAGE_NAME "faiss_${FAISS_COMPUTE_TYPE}_openblas_${FAISS_COMBINE_MD5}.tar.gz")
endif()
endif ()
set(FAISS_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${FAISS_CACHE_PACKAGE_NAME}")
set(FAISS_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${FAISS_CACHE_PACKAGE_NAME}")
@ -779,21 +779,41 @@ macro(build_faiss)
endif ()
endif ()
else ()
externalproject_add(faiss_ep
URL
${FAISS_SOURCE_URL}
${EP_LOG_OPTIONS}
CONFIGURE_COMMAND
"./configure"
${FAISS_CONFIGURE_ARGS}
BUILD_COMMAND
${MAKE} ${MAKE_BUILD_ARGS} all
BUILD_IN_SOURCE
1
INSTALL_COMMAND
${MAKE} install
BUILD_BYPRODUCTS
${FAISS_STATIC_LIB})
if (CUSTOMIZATION)
externalproject_add(faiss_ep
DOWNLOAD_COMMAND
""
SOURCE_DIR
${FAISS_SOURCE_DIR}
${EP_LOG_OPTIONS}
CONFIGURE_COMMAND
"./configure"
${FAISS_CONFIGURE_ARGS}
BUILD_COMMAND
${MAKE} ${MAKE_BUILD_ARGS} all
BUILD_IN_SOURCE
1
INSTALL_COMMAND
${MAKE} install
BUILD_BYPRODUCTS
${FAISS_STATIC_LIB})
else ()
externalproject_add(faiss_ep
URL
${FAISS_SOURCE_URL}
${EP_LOG_OPTIONS}
CONFIGURE_COMMAND
"./configure"
${FAISS_CONFIGURE_ARGS}
BUILD_COMMAND
${MAKE} ${MAKE_BUILD_ARGS} all
BUILD_IN_SOURCE
1
INSTALL_COMMAND
${MAKE} install
BUILD_BYPRODUCTS
${FAISS_STATIC_LIB})
endif ()
if (NOT FAISS_WITH_MKL)
ExternalProject_Add_StepDependencies(faiss_ep build openblas_ep lapack_ep)

View File

@ -0,0 +1 @@
sift1M

View File

@ -0,0 +1,21 @@
*.swp
*.swo
*.o
*.a
*.dSYM
*.so
*.dylib
*.pyc
*~
.DS_Store
depend
/config.*
/aclocal.m4
/autom4te.cache/
/makefile.inc
/bin/
/c_api/bin/
/c_api/gpu/bin/
/tests/test
/tests/gtest/
include/

View File

@ -0,0 +1,719 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
/*
* implementation of Hyper-parameter auto-tuning
*/
#include <faiss/AutoTune.h>
#include <cmath>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/utils.h>
#include <faiss/utils/random.h>
#include <faiss/IndexFlat.h>
#include <faiss/VectorTransform.h>
#include <faiss/IndexPreTransform.h>
#include <faiss/IndexLSH.h>
#include <faiss/IndexPQ.h>
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexIVFPQR.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/MetaIndexes.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/IndexHNSW.h>
#include <faiss/IndexBinaryFlat.h>
#include <faiss/IndexBinaryHNSW.h>
#include <faiss/IndexBinaryIVF.h>
namespace faiss {
AutoTuneCriterion::AutoTuneCriterion (idx_t nq, idx_t nnn):
nq (nq), nnn (nnn), gt_nnn (0)
{}
void AutoTuneCriterion::set_groundtruth (
int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
{
this->gt_nnn = gt_nnn;
if (gt_D_in) { // allow null for this, as it is often not used
gt_D.resize (nq * gt_nnn);
memcpy (gt_D.data(), gt_D_in, sizeof (gt_D[0]) * nq * gt_nnn);
}
gt_I.resize (nq * gt_nnn);
memcpy (gt_I.data(), gt_I_in, sizeof (gt_I[0]) * nq * gt_nnn);
}
OneRecallAtRCriterion::OneRecallAtRCriterion (idx_t nq, idx_t R):
AutoTuneCriterion(nq, R), R(R)
{}
double OneRecallAtRCriterion::evaluate(const float* /*D*/, const idx_t* I)
const {
FAISS_THROW_IF_NOT_MSG(
(gt_I.size() == gt_nnn * nq && gt_nnn >= 1 && nnn >= R),
"ground truth not initialized");
idx_t n_ok = 0;
for (idx_t q = 0; q < nq; q++) {
idx_t gt_nn = gt_I[q * gt_nnn];
const idx_t* I_line = I + q * nnn;
for (int i = 0; i < R; i++) {
if (I_line[i] == gt_nn) {
n_ok++;
break;
}
}
}
return n_ok / double(nq);
}
IntersectionCriterion::IntersectionCriterion (idx_t nq, idx_t R):
AutoTuneCriterion(nq, R), R(R)
{}
double IntersectionCriterion::evaluate(const float* /*D*/, const idx_t* I)
const {
FAISS_THROW_IF_NOT_MSG(
(gt_I.size() == gt_nnn * nq && gt_nnn >= R && nnn >= R),
"ground truth not initialized");
int64_t n_ok = 0;
#pragma omp parallel for reduction(+: n_ok)
for (idx_t q = 0; q < nq; q++) {
n_ok += ranklist_intersection_size (
R, &gt_I [q * gt_nnn],
R, I + q * nnn);
}
return n_ok / double (nq * R);
}
/***************************************************************
* OperatingPoints
***************************************************************/
OperatingPoints::OperatingPoints ()
{
clear();
}
void OperatingPoints::clear ()
{
all_pts.clear();
optimal_pts.clear();
/// default point: doing nothing gives 0 performance and takes 0 time
OperatingPoint op = {0, 0, "", -1};
optimal_pts.push_back(op);
}
/// add a performance measure
bool OperatingPoints::add (double perf, double t, const std::string & key,
size_t cno)
{
OperatingPoint op = {perf, t, key, int64_t(cno)};
all_pts.push_back (op);
if (perf == 0) {
return false; // no method for 0 accuracy is faster than doing nothing
}
std::vector<OperatingPoint> & a = optimal_pts;
if (perf > a.back().perf) {
// keep unconditionally
a.push_back (op);
} else if (perf == a.back().perf) {
if (t < a.back ().t) {
a.back() = op;
} else {
return false;
}
} else {
int i;
// stricto sensu this should be a bissection
for (i = 0; i < a.size(); i++) {
if (a[i].perf >= perf) break;
}
assert (i < a.size());
if (t < a[i].t) {
if (a[i].perf == perf) {
a[i] = op;
} else {
a.insert (a.begin() + i, op);
}
} else {
return false;
}
}
{ // remove non-optimal points from array
int i = a.size() - 1;
while (i > 0) {
if (a[i].t < a[i - 1].t)
a.erase (a.begin() + (i - 1));
i--;
}
}
return true;
}
int OperatingPoints::merge_with (const OperatingPoints &other,
const std::string & prefix)
{
int n_add = 0;
for (int i = 0; i < other.all_pts.size(); i++) {
const OperatingPoint & op = other.all_pts[i];
if (add (op.perf, op.t, prefix + op.key, op.cno))
n_add++;
}
return n_add;
}
/// get time required to obtain a given performance measure
double OperatingPoints::t_for_perf (double perf) const
{
const std::vector<OperatingPoint> & a = optimal_pts;
if (perf > a.back().perf) return 1e50;
int i0 = -1, i1 = a.size() - 1;
while (i0 + 1 < i1) {
int imed = (i0 + i1 + 1) / 2;
if (a[imed].perf < perf) i0 = imed;
else i1 = imed;
}
return a[i1].t;
}
void OperatingPoints::all_to_gnuplot (const char *fname) const
{
FILE *f = fopen(fname, "w");
if (!f) {
fprintf (stderr, "cannot open %s", fname);
perror("");
abort();
}
for (int i = 0; i < all_pts.size(); i++) {
const OperatingPoint & op = all_pts[i];
fprintf (f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
}
fclose(f);
}
void OperatingPoints::optimal_to_gnuplot (const char *fname) const
{
FILE *f = fopen(fname, "w");
if (!f) {
fprintf (stderr, "cannot open %s", fname);
perror("");
abort();
}
double prev_perf = 0.0;
for (int i = 0; i < optimal_pts.size(); i++) {
const OperatingPoint & op = optimal_pts[i];
fprintf (f, "%g %g\n", prev_perf, op.t);
fprintf (f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
prev_perf = op.perf;
}
fclose(f);
}
void OperatingPoints::display (bool only_optimal) const
{
const std::vector<OperatingPoint> &pts =
only_optimal ? optimal_pts : all_pts;
printf("Tested %ld operating points, %ld ones are optimal:\n",
all_pts.size(), optimal_pts.size());
for (int i = 0; i < pts.size(); i++) {
const OperatingPoint & op = pts[i];
const char *star = "";
if (!only_optimal) {
for (int j = 0; j < optimal_pts.size(); j++) {
if (op.cno == optimal_pts[j].cno) {
star = "*";
break;
}
}
}
printf ("cno=%ld key=%s perf=%.4f t=%.3f %s\n",
op.cno, op.key.c_str(), op.perf, op.t, star);
}
}
/***************************************************************
* ParameterSpace
***************************************************************/
ParameterSpace::ParameterSpace ():
verbose (1), n_experiments (500),
batchsize (1<<30), thread_over_batches (false),
min_test_duration (0)
{
}
/* not keeping this constructor as inheritors will call the parent
initialize()
*/
#if 0
ParameterSpace::ParameterSpace (Index *index):
verbose (1), n_experiments (500),
batchsize (1<<30), thread_over_batches (false)
{
initialize(index);
}
#endif
size_t ParameterSpace::n_combinations () const
{
size_t n = 1;
for (int i = 0; i < parameter_ranges.size(); i++)
n *= parameter_ranges[i].values.size();
return n;
}
/// get string representation of the combination
std::string ParameterSpace::combination_name (size_t cno) const {
char buf[1000], *wp = buf;
*wp = 0;
for (int i = 0; i < parameter_ranges.size(); i++) {
const ParameterRange & pr = parameter_ranges[i];
size_t j = cno % pr.values.size();
cno /= pr.values.size();
wp += snprintf (
wp, buf + 1000 - wp, "%s%s=%g", i == 0 ? "" : ",",
pr.name.c_str(), pr.values[j]);
}
return std::string (buf);
}
bool ParameterSpace::combination_ge (size_t c1, size_t c2) const
{
for (int i = 0; i < parameter_ranges.size(); i++) {
int nval = parameter_ranges[i].values.size();
size_t j1 = c1 % nval;
size_t j2 = c2 % nval;
if (!(j1 >= j2)) return false;
c1 /= nval;
c2 /= nval;
}
return true;
}
#define DC(classname) \
const classname *ix = dynamic_cast<const classname *>(index)
static void init_pq_ParameterRange (const ProductQuantizer & pq,
ParameterRange & pr)
{
if (pq.code_size % 4 == 0) {
// Polysemous not supported for code sizes that are not a
// multiple of 4
for (int i = 2; i <= pq.code_size * 8 / 2; i+= 2)
pr.values.push_back(i);
}
pr.values.push_back (pq.code_size * 8);
}
ParameterRange &ParameterSpace::add_range(const char * name)
{
for (auto & pr : parameter_ranges) {
if (pr.name == name) {
return pr;
}
}
parameter_ranges.push_back (ParameterRange ());
parameter_ranges.back ().name = name;
return parameter_ranges.back ();
}
/// initialize with reasonable parameters for the index
void ParameterSpace::initialize (const Index * index)
{
if (DC (IndexPreTransform)) {
index = ix->index;
}
if (DC (IndexRefineFlat)) {
ParameterRange & pr = add_range("k_factor_rf");
for (int i = 0; i <= 6; i++) {
pr.values.push_back (1 << i);
}
index = ix->base_index;
}
if (DC (IndexPreTransform)) {
index = ix->index;
}
if (DC (IndexIVF)) {
{
ParameterRange & pr = add_range("nprobe");
for (int i = 0; i < 13; i++) {
size_t nprobe = 1 << i;
if (nprobe >= ix->nlist) break;
pr.values.push_back (nprobe);
}
}
if (dynamic_cast<const IndexHNSW*>(ix->quantizer)) {
ParameterRange & pr = add_range("efSearch");
for (int i = 2; i <= 9; i++) {
pr.values.push_back (1 << i);
}
}
}
if (DC (IndexPQ)) {
ParameterRange & pr = add_range("ht");
init_pq_ParameterRange (ix->pq, pr);
}
if (DC (IndexIVFPQ)) {
ParameterRange & pr = add_range("ht");
init_pq_ParameterRange (ix->pq, pr);
}
if (DC (IndexIVF)) {
const MultiIndexQuantizer *miq =
dynamic_cast<const MultiIndexQuantizer *> (ix->quantizer);
if (miq) {
ParameterRange & pr_max_codes = add_range("max_codes");
for (int i = 8; i < 20; i++) {
pr_max_codes.values.push_back (1 << i);
}
pr_max_codes.values.push_back (
std::numeric_limits<double>::infinity()
);
}
}
if (DC (IndexIVFPQR)) {
ParameterRange & pr = add_range("k_factor");
for (int i = 0; i <= 6; i++) {
pr.values.push_back (1 << i);
}
}
if (dynamic_cast<const IndexHNSW*>(index)) {
ParameterRange & pr = add_range("efSearch");
for (int i = 2; i <= 9; i++) {
pr.values.push_back (1 << i);
}
}
}
#undef DC
// non-const version
#define DC(classname) classname *ix = dynamic_cast<classname *>(index)
/// set a combination of parameters on an index
void ParameterSpace::set_index_parameters (Index *index, size_t cno) const
{
for (int i = 0; i < parameter_ranges.size(); i++) {
const ParameterRange & pr = parameter_ranges[i];
size_t j = cno % pr.values.size();
cno /= pr.values.size();
double val = pr.values [j];
set_index_parameter (index, pr.name, val);
}
}
/// set a combination of parameters on an index
void ParameterSpace::set_index_parameters (
Index *index, const char *description_in) const
{
char description[strlen(description_in) + 1];
char *ptr;
memcpy (description, description_in, strlen(description_in) + 1);
for (char *tok = strtok_r (description, " ,", &ptr);
tok;
tok = strtok_r (nullptr, " ,", &ptr)) {
char name[100];
double val;
int ret = sscanf (tok, "%100[^=]=%lf", name, &val);
FAISS_THROW_IF_NOT_FMT (
ret == 2, "could not interpret parameters %s", tok);
set_index_parameter (index, name, val);
}
}
void ParameterSpace::set_index_parameter (
Index * index, const std::string & name, double val) const
{
if (verbose > 1)
printf(" set %s=%g\n", name.c_str(), val);
if (name == "verbose") {
index->verbose = int(val);
// and fall through to also enable it on sub-indexes
}
if (DC (IndexPreTransform)) {
set_index_parameter (ix->index, name, val);
return;
}
if (DC (IndexShards)) {
// call on all sub-indexes
auto fn =
[this, name, val](int, Index* subIndex) {
set_index_parameter(subIndex, name, val);
};
ix->runOnIndex(fn);
return;
}
if (DC (IndexReplicas)) {
// call on all sub-indexes
auto fn =
[this, name, val](int, Index* subIndex) {
set_index_parameter(subIndex, name, val);
};
ix->runOnIndex(fn);
return;
}
if (DC (IndexRefineFlat)) {
if (name == "k_factor_rf") {
ix->k_factor = int(val);
return;
}
// otherwise it is for the sub-index
set_index_parameter (&ix->refine_index, name, val);
return;
}
if (name == "verbose") {
index->verbose = int(val);
return; // last verbose that we could find
}
if (name == "nprobe") {
if (DC (IndexIDMap)) {
set_index_parameter (ix->index, name, val);
return;
} else if (DC (IndexIVF)) {
ix->nprobe = int(val);
return;
}
}
if (name == "ht") {
if (DC (IndexPQ)) {
if (val >= ix->pq.code_size * 8) {
ix->search_type = IndexPQ::ST_PQ;
} else {
ix->search_type = IndexPQ::ST_polysemous;
ix->polysemous_ht = int(val);
}
return;
} else if (DC (IndexIVFPQ)) {
if (val >= ix->pq.code_size * 8) {
ix->polysemous_ht = 0;
} else {
ix->polysemous_ht = int(val);
}
return;
}
}
if (name == "k_factor") {
if (DC (IndexIVFPQR)) {
ix->k_factor = val;
return;
}
}
if (name == "max_codes") {
if (DC (IndexIVF)) {
ix->max_codes = std::isfinite(val) ? size_t(val) : 0;
return;
}
}
if (name == "efSearch") {
if (DC (IndexHNSW)) {
ix->hnsw.efSearch = int(val);
return;
}
if (DC (IndexIVF)) {
if (IndexHNSW *cq =
dynamic_cast<IndexHNSW *>(ix->quantizer)) {
cq->hnsw.efSearch = int(val);
return;
}
}
}
FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:"
"could not set parameter %s",
name.c_str());
}
void ParameterSpace::display () const
{
printf ("ParameterSpace, %ld parameters, %ld combinations:\n",
parameter_ranges.size (), n_combinations ());
for (int i = 0; i < parameter_ranges.size(); i++) {
const ParameterRange & pr = parameter_ranges[i];
printf (" %s: ", pr.name.c_str ());
char sep = '[';
for (int j = 0; j < pr.values.size(); j++) {
printf ("%c %g", sep, pr.values [j]);
sep = ',';
}
printf ("]\n");
}
}
void ParameterSpace::update_bounds (size_t cno, const OperatingPoint & op,
double *upper_bound_perf,
double *lower_bound_t) const
{
if (combination_ge (cno, op.cno)) {
if (op.t > *lower_bound_t) *lower_bound_t = op.t;
}
if (combination_ge (op.cno, cno)) {
if (op.perf < *upper_bound_perf) *upper_bound_perf = op.perf;
}
}
void ParameterSpace::explore (Index *index,
size_t nq, const float *xq,
const AutoTuneCriterion & crit,
OperatingPoints * ops) const
{
FAISS_THROW_IF_NOT_MSG (nq == crit.nq,
"criterion does not have the same nb of queries");
size_t n_comb = n_combinations ();
if (n_experiments == 0) {
for (size_t cno = 0; cno < n_comb; cno++) {
set_index_parameters (index, cno);
std::vector<Index::idx_t> I(nq * crit.nnn);
std::vector<float> D(nq * crit.nnn);
double t0 = getmillisecs ();
index->search (nq, xq, crit.nnn, D.data(), I.data());
double t_search = (getmillisecs() - t0) / 1e3;
double perf = crit.evaluate (D.data(), I.data());
bool keep = ops->add (perf, t_search, combination_name (cno), cno);
if (verbose)
printf(" %ld/%ld: %s perf=%.3f t=%.3f s %s\n", cno, n_comb,
combination_name (cno).c_str(), perf, t_search,
keep ? "*" : "");
}
return;
}
int n_exp = n_experiments;
if (n_exp > n_comb) n_exp = n_comb;
FAISS_THROW_IF_NOT (n_comb == 1 || n_exp > 2);
std::vector<int> perm (n_comb);
// make sure the slowest and fastest experiment are run
perm[0] = 0;
if (n_comb > 1) {
perm[1] = n_comb - 1;
rand_perm (&perm[2], n_comb - 2, 1234);
for (int i = 2; i < perm.size(); i++) perm[i] ++;
}
for (size_t xp = 0; xp < n_exp; xp++) {
size_t cno = perm[xp];
if (verbose)
printf(" %ld/%d: cno=%ld %s ", xp, n_exp, cno,
combination_name (cno).c_str());
{
double lower_bound_t = 0.0;
double upper_bound_perf = 1.0;
for (int i = 0; i < ops->all_pts.size(); i++) {
update_bounds (cno, ops->all_pts[i],
&upper_bound_perf, &lower_bound_t);
}
double best_t = ops->t_for_perf (upper_bound_perf);
if (verbose)
printf ("bounds [perf<=%.3f t>=%.3f] %s",
upper_bound_perf, lower_bound_t,
best_t <= lower_bound_t ? "skip\n" : "");
if (best_t <= lower_bound_t) continue;
}
set_index_parameters (index, cno);
std::vector<Index::idx_t> I(nq * crit.nnn);
std::vector<float> D(nq * crit.nnn);
double t0 = getmillisecs ();
int nrun = 0;
double t_search;
do {
if (thread_over_batches) {
#pragma omp parallel for
for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
size_t q1 = q0 + batchsize;
if (q1 > nq) q1 = nq;
index->search (q1 - q0, xq + q0 * index->d,
crit.nnn,
D.data() + q0 * crit.nnn,
I.data() + q0 * crit.nnn);
}
} else {
for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
size_t q1 = q0 + batchsize;
if (q1 > nq) q1 = nq;
index->search (q1 - q0, xq + q0 * index->d,
crit.nnn,
D.data() + q0 * crit.nnn,
I.data() + q0 * crit.nnn);
}
}
nrun ++;
t_search = (getmillisecs() - t0) / 1e3;
} while (t_search < min_test_duration);
t_search /= nrun;
double perf = crit.evaluate (D.data(), I.data());
bool keep = ops->add (perf, t_search, combination_name (cno), cno);
if (verbose)
printf(" perf %.3f t %.3f (%d runs) %s\n",
perf, t_search, nrun,
keep ? "*" : "");
}
}
} // namespace faiss

View File

@ -0,0 +1,212 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_AUTO_TUNE_H
#define FAISS_AUTO_TUNE_H
#include <vector>
#include <unordered_map>
#include <stdint.h>
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>
namespace faiss {
/**
* Evaluation criterion. Returns a performance measure in [0,1],
* higher is better.
*/
struct AutoTuneCriterion {
typedef Index::idx_t idx_t;
idx_t nq; ///< nb of queries this criterion is evaluated on
idx_t nnn; ///< nb of NNs that the query should request
idx_t gt_nnn; ///< nb of GT NNs required to evaluate crterion
std::vector<float> gt_D; ///< Ground-truth distances (size nq * gt_nnn)
std::vector<idx_t> gt_I; ///< Ground-truth indexes (size nq * gt_nnn)
AutoTuneCriterion (idx_t nq, idx_t nnn);
/** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
*
* @param gt_D_in size nq * gt_nnn
* @param gt_I_in size nq * gt_nnn
*/
void set_groundtruth (int gt_nnn, const float *gt_D_in,
const idx_t *gt_I_in);
/** Evaluate the criterion.
*
* @param D size nq * nnn
* @param I size nq * nnn
* @return the criterion, between 0 and 1. Larger is better.
*/
virtual double evaluate (const float *D, const idx_t *I) const = 0;
virtual ~AutoTuneCriterion () {}
};
struct OneRecallAtRCriterion: AutoTuneCriterion {
idx_t R;
OneRecallAtRCriterion (idx_t nq, idx_t R);
double evaluate(const float* D, const idx_t* I) const override;
~OneRecallAtRCriterion() override {}
};
struct IntersectionCriterion: AutoTuneCriterion {
idx_t R;
IntersectionCriterion (idx_t nq, idx_t R);
double evaluate(const float* D, const idx_t* I) const override;
~IntersectionCriterion() override {}
};
/**
* Maintains a list of experimental results. Each operating point is a
* (perf, t, key) triplet, where higher perf and lower t is
* better. The key field is an arbitrary identifier for the operating point
*/
struct OperatingPoint {
double perf; ///< performance measure (output of a Criterion)
double t; ///< corresponding execution time (ms)
std::string key; ///< key that identifies this op pt
int64_t cno; ///< integer identifer
};
struct OperatingPoints {
/// all operating points
std::vector<OperatingPoint> all_pts;
/// optimal operating points, sorted by perf
std::vector<OperatingPoint> optimal_pts;
// begins with a single operating point: t=0, perf=0
OperatingPoints ();
/// add operating points from other to this, with a prefix to the keys
int merge_with (const OperatingPoints &other,
const std::string & prefix = "");
void clear ();
/// add a performance measure. Return whether it is an optimal point
bool add (double perf, double t, const std::string & key, size_t cno = 0);
/// get time required to obtain a given performance measure
double t_for_perf (double perf) const;
/// easy-to-read output
void display (bool only_optimal = true) const;
/// output to a format easy to digest by gnuplot
void all_to_gnuplot (const char *fname) const;
void optimal_to_gnuplot (const char *fname) const;
};
/// possible values of a parameter, sorted from least to most expensive/accurate
struct ParameterRange {
std::string name;
std::vector<double> values;
};
/** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
*/
struct ParameterSpace {
/// all tunable parameters
std::vector<ParameterRange> parameter_ranges;
// exploration parameters
/// verbosity during exploration
int verbose;
/// nb of experiments during optimization (0 = try all combinations)
int n_experiments;
/// maximum number of queries to submit at a time.
size_t batchsize;
/// use multithreading over batches (useful to benchmark
/// independent single-searches)
bool thread_over_batches;
/// run tests several times until they reach at least this
/// duration (to avoid jittering in MT mode)
double min_test_duration;
ParameterSpace ();
/// nb of combinations, = product of values sizes
size_t n_combinations () const;
/// returns whether combinations c1 >= c2 in the tuple sense
bool combination_ge (size_t c1, size_t c2) const;
/// get string representation of the combination
std::string combination_name (size_t cno) const;
/// print a description on stdout
void display () const;
/// add a new parameter (or return it if it exists)
ParameterRange &add_range(const char * name);
/// initialize with reasonable parameters for the index
virtual void initialize (const Index * index);
/// set a combination of parameters on an index
void set_index_parameters (Index *index, size_t cno) const;
/// set a combination of parameters described by a string
void set_index_parameters (Index *index, const char *param_string) const;
/// set one of the parameters
virtual void set_index_parameter (
Index * index, const std::string & name, double val) const;
/** find an upper bound on the performance and a lower bound on t
* for configuration cno given another operating point op */
void update_bounds (size_t cno, const OperatingPoint & op,
double *upper_bound_perf,
double *lower_bound_t) const;
/** explore operating points
* @param index index to run on
* @param xq query vectors (size nq * index.d)
* @param crit selection criterion
* @param ops resulting operating points
*/
void explore (Index *index,
size_t nq, const float *xq,
const AutoTuneCriterion & crit,
OperatingPoints * ops) const;
virtual ~ParameterSpace () {}
};
} // namespace faiss
#endif

View File

@ -0,0 +1,2 @@
# Code of Conduct
Facebook has adopted a Code of Conduct that we expect project participants to adhere to. Please [read the full text](https://code.fb.com/codeofconduct) so that you can understand what actions will and will not be tolerated.

View File

@ -0,0 +1,53 @@
# Contributing to Faiss
We want to make contributing to this project as easy and transparent as
possible.
## Our Development Process
We mainly develop Faiss within Facebook. Sometimes, we will sync the
github version of Faiss with the internal state.
## Pull Requests
We welcome pull requests that add significant value to Faiss. If you plan to do
a major development and contribute it back to Faiss, please contact us first before
putting too much effort into it.
1. Fork the repo and create your branch from `master`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").
There is a Facebook internal test suite for Faiss, and we need to run
all changes to Faiss through it.
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
## Coding Style
* 4 or 2 spaces for indentation in C++ (no tabs)
* 80 character line length (both for C++ and Python)
* C++ language level: C++11
## License
By contributing to Faiss, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.

View File

@ -0,0 +1,261 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/Clustering.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <faiss/utils/utils.h>
#include <faiss/utils/random.h>
#include <faiss/utils/distances.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/IndexFlat.h>
namespace faiss {
ClusteringParameters::ClusteringParameters ():
niter(25),
nredo(1),
verbose(false),
spherical(false),
int_centroids(false),
update_index(false),
frozen_centroids(false),
min_points_per_centroid(39),
max_points_per_centroid(256),
seed(1234)
{}
// 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
Clustering::Clustering (int d, int k):
d(d), k(k) {}
Clustering::Clustering (int d, int k, const ClusteringParameters &cp):
ClusteringParameters (cp), d(d), k(k) {}
static double imbalance_factor (int n, int k, int64_t *assign) {
std::vector<int> hist(k, 0);
for (int i = 0; i < n; i++)
hist[assign[i]]++;
double tot = 0, uf = 0;
for (int i = 0 ; i < k ; i++) {
tot += hist[i];
uf += hist[i] * (double) hist[i];
}
uf = uf * k / (tot * tot);
return uf;
}
void Clustering::post_process_centroids ()
{
if (spherical) {
fvec_renorm_L2 (d, k, centroids.data());
}
if (int_centroids) {
for (size_t i = 0; i < centroids.size(); i++)
centroids[i] = roundf (centroids[i]);
}
}
void Clustering::train (idx_t nx, const float *x_in, Index & index) {
FAISS_THROW_IF_NOT_FMT (nx >= k,
"Number of training points (%ld) should be at least "
"as large as number of clusters (%ld)", nx, k);
double t0 = getmillisecs();
// yes it is the user's responsibility, but it may spare us some
// hard-to-debug reports.
for (size_t i = 0; i < nx * d; i++) {
FAISS_THROW_IF_NOT_MSG (finite (x_in[i]),
"input contains NaN's or Inf's");
}
const float *x = x_in;
ScopeDeleter<float> del1;
if (nx > k * max_points_per_centroid) {
if (verbose)
printf("Sampling a subset of %ld / %ld for training\n",
k * max_points_per_centroid, nx);
std::vector<int> perm (nx);
rand_perm (perm.data (), nx, seed);
nx = k * max_points_per_centroid;
float * x_new = new float [nx * d];
for (idx_t i = 0; i < nx; i++)
memcpy (x_new + i * d, x + perm[i] * d, sizeof(x_new[0]) * d);
x = x_new;
del1.set (x);
} else if (nx < k * min_points_per_centroid) {
fprintf (stderr,
"WARNING clustering %ld points to %ld centroids: "
"please provide at least %ld training points\n",
nx, k, idx_t(k) * min_points_per_centroid);
}
if (nx == k) {
if (verbose) {
printf("Number of training points (%ld) same as number of "
"clusters, just copying\n", nx);
}
// this is a corner case, just copy training set to clusters
centroids.resize (d * k);
memcpy (centroids.data(), x_in, sizeof (*x_in) * d * k);
index.reset();
index.add(k, x_in);
return;
}
if (verbose)
printf("Clustering %d points in %ldD to %ld clusters, "
"redo %d times, %d iterations\n",
int(nx), d, k, nredo, niter);
idx_t * assign = new idx_t[nx];
ScopeDeleter<idx_t> del (assign);
float * dis = new float[nx];
ScopeDeleter<float> del2(dis);
// for redo
float best_err = HUGE_VALF;
std::vector<float> best_obj;
std::vector<float> best_centroids;
// support input centroids
FAISS_THROW_IF_NOT_MSG (
centroids.size() % d == 0,
"size of provided input centroids not a multiple of dimension");
size_t n_input_centroids = centroids.size() / d;
if (verbose && n_input_centroids > 0) {
printf (" Using %zd centroids provided as input (%sfrozen)\n",
n_input_centroids, frozen_centroids ? "" : "not ");
}
double t_search_tot = 0;
if (verbose) {
printf(" Preprocessing in %.2f s\n",
(getmillisecs() - t0) / 1000.);
}
t0 = getmillisecs();
for (int redo = 0; redo < nredo; redo++) {
if (verbose && nredo > 1) {
printf("Outer iteration %d / %d\n", redo, nredo);
}
// initialize remaining centroids with random points from the dataset
centroids.resize (d * k);
std::vector<int> perm (nx);
rand_perm (perm.data(), nx, seed + 1 + redo * 15486557L);
for (int i = n_input_centroids; i < k ; i++)
memcpy (&centroids[i * d], x + perm[i] * d,
d * sizeof (float));
post_process_centroids ();
if (index.ntotal != 0) {
index.reset();
}
if (!index.is_trained) {
index.train (k, centroids.data());
}
index.add (k, centroids.data());
float err = 0;
for (int i = 0; i < niter; i++) {
double t0s = getmillisecs();
index.search (nx, x, 1, dis, assign);
InterruptCallback::check();
t_search_tot += getmillisecs() - t0s;
err = 0;
for (int j = 0; j < nx; j++)
err += dis[j];
obj.push_back (err);
int nsplit = km_update_centroids (
x, centroids.data(),
assign, d, k, nx, frozen_centroids ? n_input_centroids : 0);
if (verbose) {
printf (" Iteration %d (%.2f s, search %.2f s): "
"objective=%g imbalance=%.3f nsplit=%d \r",
i, (getmillisecs() - t0) / 1000.0,
t_search_tot / 1000,
err, imbalance_factor (nx, k, assign),
nsplit);
fflush (stdout);
}
post_process_centroids ();
index.reset ();
if (update_index)
index.train (k, centroids.data());
assert (index.ntotal == 0);
index.add (k, centroids.data());
InterruptCallback::check ();
}
if (verbose) printf("\n");
if (nredo > 1) {
if (err < best_err) {
if (verbose)
printf ("Objective improved: keep new clusters\n");
best_centroids = centroids;
best_obj = obj;
best_err = err;
}
index.reset ();
}
}
if (nredo > 1) {
centroids = best_centroids;
obj = best_obj;
index.reset();
index.add(k, best_centroids.data());
}
}
float kmeans_clustering (size_t d, size_t n, size_t k,
const float *x,
float *centroids)
{
Clustering clus (d, k);
clus.verbose = d * n * k > (1L << 30);
// display logs if > 1Gflop per iteration
IndexFlatL2 index (d);
clus.train (n, x, index);
memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
return clus.obj.back();
}
} // namespace faiss

View File

@ -0,0 +1,101 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_CLUSTERING_H
#define FAISS_CLUSTERING_H
#include <faiss/Index.h>
#include <vector>
namespace faiss {
/** Class for the clustering parameters. Can be passed to the
* constructor of the Clustering object.
*/
struct ClusteringParameters {
int niter; ///< clustering iterations
int nredo; ///< redo clustering this many times and keep best
bool verbose;
bool spherical; ///< do we want normalized centroids?
bool int_centroids; ///< round centroids coordinates to integer
bool update_index; ///< update index after each iteration?
bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations
int min_points_per_centroid; ///< otherwise you get a warning
int max_points_per_centroid; ///< to limit size of dataset
int seed; ///< seed for the random number generator
/// sets reasonable defaults
ClusteringParameters ();
};
/** clustering based on assignment - centroid update iterations
*
* The clustering is based on an Index object that assigns training
* points to the centroids. Therefore, at each iteration the centroids
* are added to the index.
*
* On output, the centoids table is set to the latest version
* of the centroids and they are also added to the index. If the
* centroids table it is not empty on input, it is also used for
* initialization.
*
* To do several clusterings, just call train() several times on
* different training sets, clearing the centroid table in between.
*/
struct Clustering: ClusteringParameters {
typedef Index::idx_t idx_t;
size_t d; ///< dimension of the vectors
size_t k; ///< nb of centroids
/// centroids (k * d)
std::vector<float> centroids;
/// objective values (sum of distances reported by index) over
/// iterations
std::vector<float> obj;
/// the only mandatory parameters are k and d
Clustering (int d, int k);
Clustering (int d, int k, const ClusteringParameters &cp);
/// Index is used during the assignment stage
virtual void train (idx_t n, const float * x, faiss::Index & index);
/// Post-process the centroids after each centroid update.
/// includes optional L2 normalization and nearest integer rounding
void post_process_centroids ();
virtual ~Clustering() {}
};
/** simplified interface
*
* @param d dimension of the data
* @param n nb of training vectors
* @param k nb of output centroids
* @param x training set (size n * d)
* @param centroids output centroids (size k * d)
* @return final quantization error
*/
float kmeans_clustering (size_t d, size_t n, size_t k,
const float *x,
float *centroids);
}
#endif

View File

@ -0,0 +1,29 @@
FROM nvidia/cuda:8.0-devel-centos7
# Install MKL
RUN yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo
RUN rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
RUN yum install -y intel-mkl-2019.3-062
ENV LD_LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
ENV LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LIBRARY_PATH
ENV LD_PRELOAD /usr/lib64/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\
/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:\
/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so
# Install necessary build tools
RUN yum install -y gcc-c++ make swig3
# Install necesary headers/libs
RUN yum install -y python-devel numpy
COPY . /opt/faiss
WORKDIR /opt/faiss
# --with-cuda=/usr/local/cuda-8.0
RUN ./configure --prefix=/usr --libdir=/usr/lib64 --without-cuda
RUN make -j $(nproc)
RUN make -C python
RUN make test
RUN make install
RUN make -C demos demo_ivfpq_indexing && ./demos/demo_ivfpq_indexing

View File

@ -0,0 +1,353 @@
[//]: # "**********************************************************"
[//]: # "** INSTALL file for Faiss (Fair AI Similarity Search **"
[//]: # "**********************************************************"
INSTALL file for Faiss (Fair AI Similarity Search)
==================================================
Install via Conda
-----------------
The easiest way to install FAISS is from Anaconda. We regularly push stable releases to the pytorch conda channel.
Currently we support faiss-cpu both on Linux and OSX. We also provide faiss-gpu compiled with CUDA8/CUDA9/CUDA10 on Linux systems.
You can easily install it by
```
# CPU version only
conda install faiss-cpu -c pytorch
# GPU version
conda install faiss-gpu cudatoolkit=8.0 -c pytorch # For CUDA8
conda install faiss-gpu cudatoolkit=9.0 -c pytorch # For CUDA9
conda install faiss-gpu cudatoolkit=10.0 -c pytorch # For CUDA10
```
Compile from source
-------------------
The Faiss compilation works in 2 steps:
1. compile the C++ core and examples
2. compile the Python interface
Steps 2 depends on 1.
It is also possible to build a pure C interface. This optional process is
described separately (please see the [C interface installation file](c_api/INSTALL.md))
General compilation instructions
================================
TL;DR: `./configure && make (&& make install)` for the C++ library, and then `cd python; make && make install` for the python interface.
1. `./configure`
This generates the system-dependent configuration for the `Makefile`, stored in
a file called `makefile.inc`.
A few useful options:
- `./configure --without-cuda` in order to build the CPU part only.
- `./configure --with-cuda=/path/to/cuda-10.1` in order to hint to the path of
the cudatoolkit.
- `./configure --with-cuda-arch="-gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_72,code=sm_72"` for specifying which GPU architectures to build against.
- `./configure --with-python=/path/to/python3.7` in order to build a python
interface for a different python than the default one.
- `LDFLAGS=-L/path_to_mkl/lib/ ./configure` so that configure detects the MKL BLAS imeplementation. Note that this may require to set the LD_LIBRARY_PATH at runtime.
2. `make`
This builds the C++ library (the whole library if a suitable cuda toolkit was
found, or the CPU part only otherwise).
3. `make install` (optional)
This installs the headers and libraries.
4. `make -C python` (or `make py`)
This builds the python interface.
5. `make -C python install`
This installs the python library.
Faiss has been tested only on x86_64 machines on Linux and Mac OS.
Faiss requires a C++ compiler that understands:
- the Intel intrinsics for SSE instructions,
- the GCC intrinsic for the popcount instruction,
- basic OpenMP.
There are a few examples for makefile.inc in the example_makefiles/
subdirectory. There are also indications for specific configurations in the
troubleshooting section of the wiki.
https://github.com/facebookresearch/faiss/wiki/Troubleshooting
Faiss comes as a .a archive, that can be linked with executables or
dynamic libraries (useful for the Python wrapper).
BLAS/Lapack
-----------
The only variables that need to be configured for the C++ Faiss are
the BLAS/Lapack flags (a linear aglebra software package). It needs a
flag telling whether BLAS/Lapack uses 32 or 64 bit integers and the
linking flags. Faiss uses the Fortran 77 interface of BLAS/Lapack and
thus does not need an include path.
There are several BLAS implementations, depending on the OS and
machine. To have reasonable performance, the BLAS library should be
multithreaded. See the example makefile.inc's for hints and examples
on how to set the flags, or simply run the configure script:
`./configure`
To check that the link flags are correct, and verify whether the
implementation uses 32 or 64 bit integers, you can
`make misc/test_blas`
and run
`./misc/test_blas`
Testing Faiss
-------------
A basic usage example is in
`demos/demo_ivfpq_indexing`
which you can build by calling
`make -C demos demo_ivfpq_indexing`
It makes a small index, stores it and performs some searches. A normal
runtime is around 20s. With a fast machine and Intel MKL's BLAS it
runs in 2.5s.
To run the whole test suite:
`make test` (for the CPU part)
`make test_gpu` (for the GPU part)
A real-life benchmark
---------------------
A bit longer example runs and evaluates Faiss on the SIFT1M
dataset. To run it, please download the ANN_SIFT1M dataset from
http://corpus-texmex.irisa.fr/
and unzip it to the subdirectory `sift1M` at the root of the source
directory for this repository.
Then compile and run the following (after ensuring you have installed faiss):
```
make demos
./demos/demo_sift1M
```
This is a demonstration of the high-level auto-tuning API. You can try
setting a different index_key to find the indexing structure that
gives the best performance.
The Python interface
======================================
The Python interface is compiled with
`make -C python` (or `make py`)
How it works
------------
The Python interface is provided via SWIG (Simple Wrapper and
Interface Generator) and an additional level of manual wrappers (in python/faiss.py).
SWIG generates two wrapper files: a Python file (`python/swigfaiss.py`) and a
C++ file that must be compiled to a dynamic library (`python/_swigfaiss.so`).
Testing the Python wrapper
--------------------------
Often, a successful compile does not mean that the library works,
because missing symbols are detected only at runtime. You should be
able to load the Faiss dynamic library:
`python -c "import faiss"`
In case of failure, it reports the first missing symbol. To see all
missing symbols (on Linux), use
`ldd -r _swigfaiss.so`
Sometimes, problems (eg with BLAS libraries) appear only when actually
calling a BLAS function. A simple way to check this
```python
python -c "import faiss, numpy
faiss.Kmeans(10, 20).train(numpy.random.rand(1000, 10).astype('float32'))
```
Real-life test
--------------
The following script extends the demo_sift1M test to several types of
indexes. This must be run from the root of the source directory for this
repository:
```
mkdir tmp # graphs of the output will be written here
PYTHONPATH=. python demos/demo_auto_tune.py
```
It will cycle through a few types of indexes and find optimal
operating points. You can play around with the types of indexes.
Step 3: Compiling the GPU implementation
========================================
The GPU version is a superset of the CPU version. In addition it
requires the cuda compiler and related libraries (Cublas)
The nvcc-specific flags to pass to the compiler, based on your desired
compute capability can be customized by providing the `--with-cuda-arch` to
`./configure`. Only compute capability 3.5+ is supported. For example, we enable
by default:
```
-gencode=arch=compute_35,code=compute_35
-gencode=arch=compute_52,code=compute_52
-gencode=arch=compute_60,code=compute_60
-gencode=arch=compute_61,code=compute_61
-gencode=arch=compute_70,code=compute_70
-gencode=arch=compute_75,code=compute_75
```
However, look at https://developer.nvidia.com/cuda-gpus to determine
what compute capability you need to use, and replace our gencode
specifications with the one(s) you need.
Most other flags are related to the C++11 compiler used by nvcc to
complile the actual C++ code. They are normally just transmitted by
nvcc, except some of them that are not recognized and that should be
escaped by prefixing them with -Xcompiler. Also link flags that are
prefixed with -Wl, should be passed with -Xlinker.
You may want to add `-j 10` to use 10 threads during compile.
Testing the GPU implementation
------------------------------
Compile the example with
`make -C gpu/test demo_ivfpq_indexing_gpu`
This produce the GPU code equivalent to the CPU
demo_ivfpq_indexing. It also shows how to translate indexed from/to
the GPU.
Python example with GPU support
-------------------------------
The auto-tuning example above also runs on the GPU. Edit
`demos/demo_auto_tune.py` at line 100 with the values
```python
keys_to_test = keys_gpu
use_gpu = True
```
and you can run
```
export PYTHONPATH=.
python demos/demo_auto_tune.py
```
to test the GPU code.
Docker instructions
===================
For using GPU capabilities of Faiss, you'll need to run "nvidia-docker"
rather than "docker". Make sure that docker
(https://docs.docker.com/engine/installation/) and nvidia-docker
(https://github.com/NVIDIA/nvidia-docker) are installed on your system
To build the "faiss" image, run
`nvidia-docker build -t faiss .`
or if you don't want/need to clone the sources, just run
`nvidia-docker build -t faiss github.com/facebookresearch/faiss`
If you want to run the tests during the docker build, uncomment the
last 3 "RUN" steps in the Dockerfile. But you might want to run the
tests by yourself, so just run
`nvidia-docker run -ti --name faiss faiss bash`
and run what you want. If you need a dataset (like sift1M), download it
inside the created container, or better, mount a directory from the host
nvidia-docker run -ti --name faiss -v /my/host/data/folder/ann_dataset/sift/:/opt/faiss/sift1M faiss bash
How to use Faiss in your own projects
=====================================
C++
---
The makefile generates a static and a dynamic library
```
libfaiss.a
libfaiss.so (or libfaiss.dylib)
```
the executable should be linked to one of these. If you use
the static version (.a), add the LDFLAGS used in the Makefile.
For binary-only distributions, the headers should be under
a `faiss/` directory, so that they can be included as
```c++
#include <faiss/IndexIVFPQ.h>
#include <faiss/gpu/GpuIndexFlat.h>
```
Python
------
To import Faiss in your own Python project, you need the files
```
__init__.py
swigfaiss.py
_swigfaiss.so
```
to be present in a `faiss/` directory visible in the PYTHONPATH or in the
current directory.
Then Faiss can be used in python with
```python
import faiss
```

View File

@ -0,0 +1,344 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IVFlib.h>
#include <memory>
#include <faiss/IndexPreTransform.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/utils.h>
namespace faiss { namespace ivflib {
void check_compatible_for_merge (const Index * index0,
const Index * index1)
{
const faiss::IndexPreTransform *pt0 =
dynamic_cast<const faiss::IndexPreTransform *>(index0);
if (pt0) {
const faiss::IndexPreTransform *pt1 =
dynamic_cast<const faiss::IndexPreTransform *>(index1);
FAISS_THROW_IF_NOT_MSG (pt1, "both indexes should be pretransforms");
FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
for (int i = 0; i < pt0->chain.size(); i++) {
FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
}
index0 = pt0->index;
index1 = pt1->index;
}
FAISS_THROW_IF_NOT (typeid(index0) == typeid(index1));
FAISS_THROW_IF_NOT (index0->d == index1->d &&
index0->metric_type == index1->metric_type);
const faiss::IndexIVF *ivf0 = dynamic_cast<const faiss::IndexIVF *>(index0);
if (ivf0) {
const faiss::IndexIVF *ivf1 =
dynamic_cast<const faiss::IndexIVF *>(index1);
FAISS_THROW_IF_NOT (ivf1);
ivf0->check_compatible_for_merge (*ivf1);
}
// TODO: check as thoroughfully for other index types
}
const IndexIVF * extract_index_ivf (const Index * index)
{
if (auto *pt =
dynamic_cast<const IndexPreTransform *>(index)) {
index = pt->index;
}
auto *ivf = dynamic_cast<const IndexIVF *>(index);
FAISS_THROW_IF_NOT (ivf);
return ivf;
}
IndexIVF * extract_index_ivf (Index * index) {
return const_cast<IndexIVF*> (extract_index_ivf ((const Index*)(index)));
}
void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
check_compatible_for_merge (index0, index1);
IndexIVF * ivf0 = extract_index_ivf (index0);
IndexIVF * ivf1 = extract_index_ivf (index1);
ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
// useful for IndexPreTransform
index0->ntotal = ivf0->ntotal;
index1->ntotal = ivf1->ntotal;
}
void search_centroid(faiss::Index *index,
const float* x, int n,
idx_t* centroid_ids)
{
std::unique_ptr<float[]> del;
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
x = index_pre->apply_chain(n, x);
del.reset((float*)x);
index = index_pre->index;
}
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
assert(index_ivf);
index_ivf->quantizer->assign(n, x, centroid_ids);
}
void search_and_return_centroids(faiss::Index *index,
size_t n,
const float* xin,
long k,
float *distances,
idx_t* labels,
idx_t* query_centroid_ids,
idx_t* result_centroid_ids)
{
const float *x = xin;
std::unique_ptr<float []> del;
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
x = index_pre->apply_chain(n, x);
del.reset((float*)x);
index = index_pre->index;
}
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
assert(index_ivf);
size_t nprobe = index_ivf->nprobe;
std::vector<idx_t> cent_nos (n * nprobe);
std::vector<float> cent_dis (n * nprobe);
index_ivf->quantizer->search(
n, x, nprobe, cent_dis.data(), cent_nos.data());
if (query_centroid_ids) {
for (size_t i = 0; i < n; i++)
query_centroid_ids[i] = cent_nos[i * nprobe];
}
index_ivf->search_preassigned (n, x, k,
cent_nos.data(), cent_dis.data(),
distances, labels, true);
for (size_t i = 0; i < n * k; i++) {
idx_t label = labels[i];
if (label < 0) {
if (result_centroid_ids)
result_centroid_ids[i] = -1;
} else {
long list_no = label >> 32;
long list_index = label & 0xffffffff;
if (result_centroid_ids)
result_centroid_ids[i] = list_no;
labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
}
}
}
SlidingIndexWindow::SlidingIndexWindow (Index *index): index (index) {
n_slice = 0;
IndexIVF* index_ivf = const_cast<IndexIVF*>(extract_index_ivf (index));
ils = dynamic_cast<ArrayInvertedLists *> (index_ivf->invlists);
nlist = ils->nlist;
FAISS_THROW_IF_NOT_MSG (ils,
"only supports indexes with ArrayInvertedLists");
sizes.resize(nlist);
}
template<class T>
static void shift_and_add (std::vector<T> & dst,
size_t remove,
const std::vector<T> & src)
{
if (remove > 0)
memmove (dst.data(), dst.data() + remove,
(dst.size() - remove) * sizeof (T));
size_t insert_point = dst.size() - remove;
dst.resize (insert_point + src.size());
memcpy (dst.data() + insert_point, src.data (), src.size() * sizeof(T));
}
template<class T>
static void remove_from_begin (std::vector<T> & v,
size_t remove)
{
if (remove > 0)
v.erase (v.begin(), v.begin() + remove);
}
void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
FAISS_THROW_IF_NOT_MSG (!remove_oldest || n_slice > 0,
"cannot remove slice: there is none");
const ArrayInvertedLists *ils2 = nullptr;
if(sub_index) {
check_compatible_for_merge (index, sub_index);
ils2 = dynamic_cast<const ArrayInvertedLists*>(
extract_index_ivf (sub_index)->invlists);
FAISS_THROW_IF_NOT_MSG (ils2, "supports only ArrayInvertedLists");
}
IndexIVF *index_ivf = extract_index_ivf (index);
if (remove_oldest && ils2) {
for (int i = 0; i < nlist; i++) {
std::vector<size_t> & sizesi = sizes[i];
size_t amount_to_remove = sizesi[0];
index_ivf->ntotal += ils2->ids[i].size() - amount_to_remove;
shift_and_add (ils->ids[i], amount_to_remove, ils2->ids[i]);
shift_and_add (ils->codes[i], amount_to_remove * ils->code_size,
ils2->codes[i]);
for (int j = 0; j + 1 < n_slice; j++) {
sizesi[j] = sizesi[j + 1] - amount_to_remove;
}
sizesi[n_slice - 1] = ils->ids[i].size();
}
} else if (ils2) {
for (int i = 0; i < nlist; i++) {
index_ivf->ntotal += ils2->ids[i].size();
shift_and_add (ils->ids[i], 0, ils2->ids[i]);
shift_and_add (ils->codes[i], 0, ils2->codes[i]);
sizes[i].push_back(ils->ids[i].size());
}
n_slice++;
} else if (remove_oldest) {
for (int i = 0; i < nlist; i++) {
size_t amount_to_remove = sizes[i][0];
index_ivf->ntotal -= amount_to_remove;
remove_from_begin (ils->ids[i], amount_to_remove);
remove_from_begin (ils->codes[i],
amount_to_remove * ils->code_size);
for (int j = 0; j + 1 < n_slice; j++) {
sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
}
sizes[i].pop_back ();
}
n_slice--;
} else {
FAISS_THROW_MSG ("nothing to do???");
}
index->ntotal = index_ivf->ntotal;
}
// Get a subset of inverted lists [i0, i1). Works on IndexIVF's and
// IndexIVF's embedded in a IndexPreTransform
ArrayInvertedLists *
get_invlist_range (const Index *index, long i0, long i1)
{
const IndexIVF *ivf = extract_index_ivf (index);
FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
const InvertedLists *src = ivf->invlists;
ArrayInvertedLists * il = new ArrayInvertedLists(i1 - i0, src->code_size);
for (long i = i0; i < i1; i++) {
il->add_entries(i - i0, src->list_size(i),
InvertedLists::ScopedIds (src, i).get(),
InvertedLists::ScopedCodes (src, i).get());
}
return il;
}
void set_invlist_range (Index *index, long i0, long i1,
ArrayInvertedLists * src)
{
IndexIVF *ivf = extract_index_ivf (index);
FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
ArrayInvertedLists *dst = dynamic_cast<ArrayInvertedLists *>(ivf->invlists);
FAISS_THROW_IF_NOT_MSG (dst, "only ArrayInvertedLists supported");
FAISS_THROW_IF_NOT (src->nlist == i1 - i0 &&
dst->code_size == src->code_size);
size_t ntotal = index->ntotal;
for (long i = i0 ; i < i1; i++) {
ntotal -= dst->list_size (i);
ntotal += src->list_size (i - i0);
std::swap (src->codes[i - i0], dst->codes[i]);
std::swap (src->ids[i - i0], dst->ids[i]);
}
ivf->ntotal = index->ntotal = ntotal;
}
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params,
size_t *nb_dis_ptr)
{
FAISS_THROW_IF_NOT (params);
const float *prev_x = x;
ScopeDeleter<float> del;
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
x = ip->apply_chain (n, x);
if (x != prev_x) {
del.set(x);
}
index = ip->index;
}
std::vector<idx_t> Iq(params->nprobe * n);
std::vector<float> Dq(params->nprobe * n);
const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
FAISS_THROW_IF_NOT (index_ivf);
double t0 = getmillisecs();
index_ivf->quantizer->search(n, x, params->nprobe,
Dq.data(), Iq.data());
double t1 = getmillisecs();
indexIVF_stats.quantization_time += t1 - t0;
if (nb_dis_ptr) {
size_t nb_dis = 0;
const InvertedLists *il = index_ivf->invlists;
for (idx_t i = 0; i < n * params->nprobe; i++) {
if (Iq[i] >= 0) {
nb_dis += il->list_size(Iq[i]);
}
}
*nb_dis_ptr = nb_dis;
}
index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
distances, labels,
false, params);
double t2 = getmillisecs();
indexIVF_stats.search_time += t2 - t1;
}
} } // namespace faiss::ivflib

132
core/src/index/thirdparty/faiss/IVFlib.h vendored Normal file
View File

@ -0,0 +1,132 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_IVFLIB_H
#define FAISS_IVFLIB_H
/** Since IVF (inverted file) indexes are of so much use for
* large-scale use cases, we group a few functions related to them in
* this small library. Most functions work both on IndexIVFs and
* IndexIVFs embedded within an IndexPreTransform.
*/
#include <vector>
#include <faiss/IndexIVF.h>
namespace faiss { namespace ivflib {
/** check if two indexes have the same parameters and are trained in
* the same way, otherwise throw. */
void check_compatible_for_merge (const Index * index1,
const Index * index2);
/** get an IndexIVF from an index. The index may be an IndexIVF or
* some wrapper class that encloses an IndexIVF
*
* throws an exception if this is not the case.
*/
const IndexIVF * extract_index_ivf (const Index * index);
IndexIVF * extract_index_ivf (Index * index);
/** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
* embedded in a IndexPreTransform. On output, the index1 is empty.
*
* @param shift_ids: translate the ids from index1 to index0->prev_ntotal
*/
void merge_into(Index *index0, Index *index1, bool shift_ids);
typedef Index::idx_t idx_t;
/* Returns the cluster the embeddings belong to.
*
* @param index Index, which should be an IVF index
* (otherwise there are no clusters)
* @param embeddings object descriptors for which the centroids should be found,
* size num_objects * d
* @param centroid_ids
* cluster id each object belongs to, size num_objects
*/
void search_centroid(Index *index,
const float* x, int n,
idx_t* centroid_ids);
/* Returns the cluster the embeddings belong to.
*
* @param index Index, which should be an IVF index
* (otherwise there are no clusters)
* @param query_centroid_ids
* centroid ids corresponding to the query vectors (size n)
* @param result_centroid_ids
* centroid ids corresponding to the results (size n * k)
* other arguments are the same as the standard search function
*/
void search_and_return_centroids(Index *index,
size_t n,
const float* xin,
long k,
float *distances,
idx_t* labels,
idx_t* query_centroid_ids,
idx_t* result_centroid_ids);
/** A set of IndexIVFs concatenated together in a FIFO fashion.
* at each "step", the oldest index slice is removed and a new index is added.
*/
struct SlidingIndexWindow {
/// common index that contains the sliding window
Index * index;
/// InvertedLists of index
ArrayInvertedLists *ils;
/// number of slices currently in index
int n_slice;
/// same as index->nlist
size_t nlist;
/// cumulative list sizes at each slice
std::vector<std::vector<size_t> > sizes;
/// index should be initially empty and trained
SlidingIndexWindow (Index *index);
/** Add one index to the current index and remove the oldest one.
*
* @param sub_index slice to swap in (can be NULL)
* @param remove_oldest if true, remove the oldest slices */
void step(const Index *sub_index, bool remove_oldest);
};
/// Get a subset of inverted lists [i0, i1)
ArrayInvertedLists * get_invlist_range (const Index *index,
long i0, long i1);
/// Set a subset of inverted lists
void set_invlist_range (Index *index, long i0, long i1,
ArrayInvertedLists * src);
// search an IndexIVF, possibly embedded in an IndexPreTransform with
// given parameters. Optionally returns the number of distances
// computed
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params,
size_t *nb_dis = nullptr);
} } // namespace faiss::ivflib
#endif

View File

@ -0,0 +1,171 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/Index.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>
#include <cstring>
namespace faiss {
Index::~Index ()
{
}
void Index::train(idx_t /*n*/, const float* /*x*/) {
// does nothing by default
}
void Index::range_search (idx_t , const float *, float,
RangeSearchResult *) const
{
FAISS_THROW_MSG ("range search not implemented");
}
void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k)
{
float * distances = new float[n * k];
ScopeDeleter<float> del(distances);
search (n, x, k, distances, labels);
}
void Index::add_with_ids(
idx_t /*n*/,
const float* /*x*/,
const idx_t* /*xids*/) {
FAISS_THROW_MSG ("add_with_ids not implemented for this type of index");
}
size_t Index::remove_ids(const IDSelector& /*sel*/) {
FAISS_THROW_MSG ("remove_ids not implemented for this type of index");
return -1;
}
void Index::reconstruct (idx_t, float * ) const {
FAISS_THROW_MSG ("reconstruct not implemented for this type of index");
}
void Index::reconstruct_n (idx_t i0, idx_t ni, float *recons) const {
for (idx_t i = 0; i < ni; i++) {
reconstruct (i0 + i, recons + i * d);
}
}
void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const {
search (n, x, k, distances, labels);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
float* reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
reconstruct (key, reconstructed);
}
}
}
}
void Index::compute_residual (const float * x,
float * residual, idx_t key) const {
reconstruct (key, residual);
for (size_t i = 0; i < d; i++) {
residual[i] = x[i] - residual[i];
}
}
void Index::compute_residual_n (idx_t n, const float* xs,
float* residuals,
const idx_t* keys) const {
#pragma omp parallel for
for (idx_t i = 0; i < n; ++i) {
compute_residual(&xs[i * d], &residuals[i * d], keys[i]);
}
}
size_t Index::sa_code_size () const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}
void Index::sa_encode (idx_t, const float *,
uint8_t *) const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}
void Index::sa_decode (idx_t, const uint8_t *,
float *) const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}
namespace {
// storage that explicitly reconstructs vectors before computing distances
struct GenericDistanceComputer : DistanceComputer {
size_t d;
const Index& storage;
std::vector<float> buf;
const float *q;
explicit GenericDistanceComputer(const Index& storage)
: storage(storage) {
d = storage.d;
buf.resize(d * 2);
}
float operator () (idx_t i) override {
storage.reconstruct(i, buf.data());
return fvec_L2sqr(q, buf.data(), d);
}
float symmetric_dis(idx_t i, idx_t j) override {
storage.reconstruct(i, buf.data());
storage.reconstruct(j, buf.data() + d);
return fvec_L2sqr(buf.data() + d, buf.data(), d);
}
void set_query(const float *x) override {
q = x;
}
};
} // namespace
DistanceComputer * Index::get_distance_computer() const {
if (metric_type == METRIC_L2) {
return new GenericDistanceComputer(*this);
} else {
FAISS_THROW_MSG ("get_distance_computer() not implemented");
}
}
}

261
core/src/index/thirdparty/faiss/Index.h vendored Normal file
View File

@ -0,0 +1,261 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_H
#define FAISS_INDEX_H
#include <cstdio>
#include <typeinfo>
#include <string>
#include <sstream>
#define FAISS_VERSION_MAJOR 1
#define FAISS_VERSION_MINOR 6
#define FAISS_VERSION_PATCH 0
/**
* @namespace faiss
*
* Throughout the library, vectors are provided as float * pointers.
* Most algorithms can be optimized when several vectors are processed
* (added/searched) together in a batch. In this case, they are passed
* in as a matrix. When n vectors of size d are provided as float * x,
* component j of vector i is
*
* x[ i * d + j ]
*
* where 0 <= i < n and 0 <= j < d. In other words, matrices are
* always compact. When specifying the size of the matrix, we call it
* an n*d matrix, which implies a row-major storage.
*/
namespace faiss {
/// Some algorithms support both an inner product version and a L2 search version.
enum MetricType {
METRIC_INNER_PRODUCT = 0, ///< maximum inner product search
METRIC_L2 = 1, ///< squared L2 search
METRIC_L1, ///< L1 (aka cityblock)
METRIC_Linf, ///< infinity distance
METRIC_Lp, ///< L_p distance, p is given by metric_arg
/// some additional metrics defined in scipy.spatial.distance
METRIC_Canberra = 20,
METRIC_BrayCurtis,
METRIC_JensenShannon,
};
/// Forward declarations see AuxIndexStructures.h
struct IDSelector;
struct RangeSearchResult;
struct DistanceComputer;
/** Abstract structure for an index
*
* Supports adding vertices and searching them.
*
* Currently only asymmetric queries are supported:
* database-to-database queries are not implemented.
*/
struct Index {
using idx_t = int64_t; ///< all indices are this type
using component_t = float;
using distance_t = float;
int d; ///< vector dimension
idx_t ntotal; ///< total nb of indexed vectors
bool verbose; ///< verbosity level
/// set if the Index does not require training, or if training is
/// done already
bool is_trained;
/// type of metric this index uses for search
MetricType metric_type;
float metric_arg; ///< argument of the metric type
explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
d(d),
ntotal(0),
verbose(false),
is_trained(true),
metric_type (metric),
metric_arg(0) {}
virtual ~Index ();
/** Perform training on a representative set of vectors
*
* @param n nb of training vectors
* @param x training vecors, size n * d
*/
virtual void train(idx_t n, const float* x);
/** Add n vectors of dimension d to the index.
*
* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
* This function slices the input vectors in chuncks smaller than
* blocksize_add and calls add_core.
* @param x input matrix, size n * d
*/
virtual void add (idx_t n, const float *x) = 0;
/** Same as add, but stores xids instead of sequential ids.
*
* The default implementation fails with an assertion, as it is
* not supported by all indexes.
*
* @param xids if non-null, ids to store for the vectors (size n)
*/
virtual void add_with_ids (idx_t n, const float * x, const idx_t *xids);
/** query n vectors of dimension d to the index.
*
* return at most k vectors. If there are not enough results for a
* query, the result array is padded with -1s.
*
* @param x input vectors to search, size n * d
* @param labels output labels of the NNs, size n*k
* @param distances output pairwise distances, size n*k
*/
virtual void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const = 0;
/** query n vectors of dimension d to the index.
*
* return all vectors with distance < radius. Note that many
* indexes do not implement the range_search (only the k-NN search
* is mandatory).
*
* @param x input vectors to search, size n * d
* @param radius search radius
* @param result result table
*/
virtual void range_search (idx_t n, const float *x, float radius,
RangeSearchResult *result) const;
/** return the indexes of the k vectors closest to the query x.
*
* This function is identical as search but only return labels of neighbors.
* @param x input vectors to search, size n * d
* @param labels output labels of the NNs, size n*k
*/
void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
/// removes all elements from the database.
virtual void reset() = 0;
/** removes IDs from the index. Not supported by all
* indexes. Returns the number of elements removed.
*/
virtual size_t remove_ids (const IDSelector & sel);
/** Reconstruct a stored vector (or an approximation if lossy coding)
*
* this function may not be defined for some indexes
* @param key id of the vector to reconstruct
* @param recons reconstucted vector (size d)
*/
virtual void reconstruct (idx_t key, float * recons) const;
/** Reconstruct vectors i0 to i0 + ni - 1
*
* this function may not be defined for some indexes
* @param recons reconstucted vector (size ni * d)
*/
virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* If there are not enough results for a query, the resulting arrays
* is padded with -1s.
*
* @param recons reconstructed vectors size (n, k, d)
**/
virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const;
/** Computes a residual vector after indexing encoding.
*
* The residual vector is the difference between a vector and the
* reconstruction that can be decoded from its representation in
* the index. The residual can be used for multiple-stage indexing
* methods, like IndexIVF's methods.
*
* @param x input vector, size d
* @param residual output residual vector, size d
* @param key encoded index, as returned by search and assign
*/
virtual void compute_residual (const float * x,
float * residual, idx_t key) const;
/** Computes a residual vector after indexing encoding (batch form).
* Equivalent to calling compute_residual for each vector.
*
* The residual vector is the difference between a vector and the
* reconstruction that can be decoded from its representation in
* the index. The residual can be used for multiple-stage indexing
* methods, like IndexIVF's methods.
*
* @param n number of vectors
* @param xs input vectors, size (n x d)
* @param residuals output residual vectors, size (n x d)
* @param keys encoded index, as returned by search and assign
*/
virtual void compute_residual_n (idx_t n, const float* xs,
float* residuals,
const idx_t* keys) const;
/** Get a DistanceComputer (defined in AuxIndexStructures) object
* for this kind of index.
*
* DistanceComputer is implemented for indexes that support random
* access of their vectors.
*/
virtual DistanceComputer * get_distance_computer() const;
/* The standalone codec interface */
/** size of the produced codes in bytes */
virtual size_t sa_code_size () const;
/** encode a set of vectors
*
* @param n number of vectors
* @param x input vectors, size n * d
* @param bytes output encoded vectors, size n * sa_code_size()
*/
virtual void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const;
/** encode a set of vectors
*
* @param n number of vectors
* @param bytes input encoded vectors, size n * sa_code_size()
* @param x output vectors, size n * d
*/
virtual void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const;
};
}
#endif

View File

@ -0,0 +1,437 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/Index2Layer.h>
#include <cmath>
#include <cstdio>
#include <cassert>
#include <stdint.h>
#ifdef __SSE__
#include <immintrin.h>
#endif
#include <algorithm>
#include <faiss/IndexIVFPQ.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/IndexFlat.h>
#include <faiss/utils/distances.h>
/*
#include <faiss/utils/Heap.h>
#include <faiss/Clustering.h>
#include <faiss/utils/hamming.h>
*/
namespace faiss {
using idx_t = Index::idx_t;
/*************************************
* Index2Layer implementation
*************************************/
Index2Layer::Index2Layer (Index * quantizer, size_t nlist,
int M, int nbit,
MetricType metric):
Index (quantizer->d, metric),
q1 (quantizer, nlist),
pq (quantizer->d, M, nbit)
{
is_trained = false;
for (int nbyte = 0; nbyte < 7; nbyte++) {
if ((1L << (8 * nbyte)) >= nlist) {
code_size_1 = nbyte;
break;
}
}
code_size_2 = pq.code_size;
code_size = code_size_1 + code_size_2;
}
Index2Layer::Index2Layer ()
{
code_size = code_size_1 = code_size_2 = 0;
}
Index2Layer::~Index2Layer ()
{}
void Index2Layer::train(idx_t n, const float* x)
{
if (verbose) {
printf ("training level-1 quantizer %ld vectors in %dD\n",
n, d);
}
q1.train_q1 (n, x, verbose, metric_type);
if (verbose) {
printf("computing residuals\n");
}
const float * x_in = x;
x = fvecs_maybe_subsample (
d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
x, verbose, pq.cp.seed);
ScopeDeleter<float> del_x (x_in == x ? nullptr : x);
std::vector<idx_t> assign(n); // assignement to coarse centroids
q1.quantizer->assign (n, x, assign.data());
std::vector<float> residuals(n * d);
for (idx_t i = 0; i < n; i++) {
q1.quantizer->compute_residual (
x + i * d, residuals.data() + i * d, assign[i]);
}
if (verbose)
printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
pq.M, pq.ksub, n, d);
pq.verbose = verbose;
pq.train (n, residuals.data());
is_trained = true;
}
void Index2Layer::add(idx_t n, const float* x)
{
idx_t bs = 32768;
if (n > bs) {
for (idx_t i0 = 0; i0 < n; i0 += bs) {
idx_t i1 = std::min(i0 + bs, n);
if (verbose) {
printf("Index2Layer::add: adding %ld:%ld / %ld\n",
i0, i1, n);
}
add (i1 - i0, x + i0 * d);
}
return;
}
std::vector<idx_t> codes1 (n);
q1.quantizer->assign (n, x, codes1.data());
std::vector<float> residuals(n * d);
for (idx_t i = 0; i < n; i++) {
q1.quantizer->compute_residual (
x + i * d, residuals.data() + i * d, codes1[i]);
}
std::vector<uint8_t> codes2 (n * code_size_2);
pq.compute_codes (residuals.data(), codes2.data(), n);
codes.resize ((ntotal + n) * code_size);
uint8_t *wp = &codes[ntotal * code_size];
{
int i = 0x11223344;
const char *ip = (char*)&i;
FAISS_THROW_IF_NOT_MSG (ip[0] == 0x44,
"works only on a little-endian CPU");
}
// copy to output table
for (idx_t i = 0; i < n; i++) {
memcpy (wp, &codes1[i], code_size_1);
wp += code_size_1;
memcpy (wp, &codes2[i * code_size_2], code_size_2);
wp += code_size_2;
}
ntotal += n;
}
void Index2Layer::search(
idx_t /*n*/,
const float* /*x*/,
idx_t /*k*/,
float* /*distances*/,
idx_t* /*labels*/) const {
FAISS_THROW_MSG("not implemented");
}
void Index2Layer::reconstruct_n(idx_t i0, idx_t ni, float* recons) const
{
float recons1[d];
FAISS_THROW_IF_NOT (i0 >= 0 && i0 + ni <= ntotal);
const uint8_t *rp = &codes[i0 * code_size];
for (idx_t i = 0; i < ni; i++) {
idx_t key = 0;
memcpy (&key, rp, code_size_1);
q1.quantizer->reconstruct (key, recons1);
rp += code_size_1;
pq.decode (rp, recons);
for (idx_t j = 0; j < d; j++) {
recons[j] += recons1[j];
}
rp += code_size_2;
recons += d;
}
}
void Index2Layer::transfer_to_IVFPQ (IndexIVFPQ & other) const
{
FAISS_THROW_IF_NOT (other.nlist == q1.nlist);
FAISS_THROW_IF_NOT (other.code_size == code_size_2);
FAISS_THROW_IF_NOT (other.ntotal == 0);
const uint8_t *rp = codes.data();
for (idx_t i = 0; i < ntotal; i++) {
idx_t key = 0;
memcpy (&key, rp, code_size_1);
rp += code_size_1;
other.invlists->add_entry (key, i, rp);
rp += code_size_2;
}
other.ntotal = ntotal;
}
void Index2Layer::reconstruct(idx_t key, float* recons) const
{
reconstruct_n (key, 1, recons);
}
void Index2Layer::reset()
{
ntotal = 0;
codes.clear ();
}
namespace {
struct Distance2Level : DistanceComputer {
size_t d;
const Index2Layer& storage;
std::vector<float> buf;
const float *q;
const float *pq_l1_tab, *pq_l2_tab;
explicit Distance2Level(const Index2Layer& storage)
: storage(storage) {
d = storage.d;
FAISS_ASSERT(storage.pq.dsub == 4);
pq_l2_tab = storage.pq.centroids.data();
buf.resize(2 * d);
}
float symmetric_dis(idx_t i, idx_t j) override {
storage.reconstruct(i, buf.data());
storage.reconstruct(j, buf.data() + d);
return fvec_L2sqr(buf.data() + d, buf.data(), d);
}
void set_query(const float *x) override {
q = x;
}
};
// well optimized for xNN+PQNN
struct DistanceXPQ4 : Distance2Level {
int M, k;
explicit DistanceXPQ4(const Index2Layer& storage)
: Distance2Level (storage) {
const IndexFlat *quantizer =
dynamic_cast<IndexFlat*> (storage.q1.quantizer);
FAISS_ASSERT(quantizer);
M = storage.pq.M;
pq_l1_tab = quantizer->xb.data();
}
float operator () (idx_t i) override {
#ifdef __SSE__
const uint8_t *code = storage.codes.data() + i * storage.code_size;
long key = 0;
memcpy (&key, code, storage.code_size_1);
code += storage.code_size_1;
// walking pointers
const float *qa = q;
const __m128 *l1_t = (const __m128 *)(pq_l1_tab + d * key);
const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
__m128 accu = _mm_setzero_ps();
for (int m = 0; m < M; m++) {
__m128 qi = _mm_loadu_ps(qa);
__m128 recons = l1_t[m] + pq_l2_t[*code++];
__m128 diff = qi - recons;
accu += diff * diff;
pq_l2_t += 256;
qa += 4;
}
accu = _mm_hadd_ps (accu, accu);
accu = _mm_hadd_ps (accu, accu);
return _mm_cvtss_f32 (accu);
#else
FAISS_THROW_MSG("not implemented for non-x64 platforms");
#endif
}
};
// well optimized for 2xNN+PQNN
struct Distance2xXPQ4 : Distance2Level {
int M_2, mi_nbits;
explicit Distance2xXPQ4(const Index2Layer& storage)
: Distance2Level(storage) {
const MultiIndexQuantizer *mi =
dynamic_cast<MultiIndexQuantizer*> (storage.q1.quantizer);
FAISS_ASSERT(mi);
FAISS_ASSERT(storage.pq.M % 2 == 0);
M_2 = storage.pq.M / 2;
mi_nbits = mi->pq.nbits;
pq_l1_tab = mi->pq.centroids.data();
}
float operator () (idx_t i) override {
const uint8_t *code = storage.codes.data() + i * storage.code_size;
long key01 = 0;
memcpy (&key01, code, storage.code_size_1);
code += storage.code_size_1;
#ifdef __SSE__
// walking pointers
const float *qa = q;
const __m128 *pq_l1_t = (const __m128 *)pq_l1_tab;
const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
__m128 accu = _mm_setzero_ps();
for (int mi_m = 0; mi_m < 2; mi_m++) {
long l1_idx = key01 & ((1L << mi_nbits) - 1);
const __m128 * pq_l1 = pq_l1_t + M_2 * l1_idx;
for (int m = 0; m < M_2; m++) {
__m128 qi = _mm_loadu_ps(qa);
__m128 recons = pq_l1[m] + pq_l2_t[*code++];
__m128 diff = qi - recons;
accu += diff * diff;
pq_l2_t += 256;
qa += 4;
}
pq_l1_t += M_2 << mi_nbits;
key01 >>= mi_nbits;
}
accu = _mm_hadd_ps (accu, accu);
accu = _mm_hadd_ps (accu, accu);
return _mm_cvtss_f32 (accu);
#else
FAISS_THROW_MSG("not implemented for non-x64 platforms");
#endif
}
};
} // namespace
DistanceComputer * Index2Layer::get_distance_computer() const {
#ifdef __SSE__
const MultiIndexQuantizer *mi =
dynamic_cast<MultiIndexQuantizer*> (q1.quantizer);
if (mi && pq.M % 2 == 0 && pq.dsub == 4) {
return new Distance2xXPQ4(*this);
}
const IndexFlat *fl =
dynamic_cast<IndexFlat*> (q1.quantizer);
if (fl && pq.dsub == 4) {
return new DistanceXPQ4(*this);
}
#endif
return Index::get_distance_computer();
}
/* The standalone codec interface */
size_t Index2Layer::sa_code_size () const
{
return code_size;
}
void Index2Layer::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
{
FAISS_THROW_IF_NOT (is_trained);
std::unique_ptr<int64_t []> list_nos (new int64_t [n]);
q1.quantizer->assign (n, x, list_nos.get());
std::vector<float> residuals(n * d);
for (idx_t i = 0; i < n; i++) {
q1.quantizer->compute_residual (
x + i * d, residuals.data() + i * d, list_nos[i]);
}
pq.compute_codes (residuals.data(), bytes, n);
for (idx_t i = n - 1; i >= 0; i--) {
uint8_t * code = bytes + i * code_size;
memmove (code + code_size_1,
bytes + i * code_size_2, code_size_2);
q1.encode_listno (list_nos[i], code);
}
}
void Index2Layer::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
{
#pragma omp parallel
{
std::vector<float> residual (d);
#pragma omp for
for (size_t i = 0; i < n; i++) {
const uint8_t *code = bytes + i * code_size;
int64_t list_no = q1.decode_listno (code);
float *xi = x + i * d;
pq.decode (code + code_size_1, xi);
q1.quantizer->reconstruct (list_no, residual.data());
for (size_t j = 0; j < d; j++) {
xi[j] += residual[j];
}
}
}
}
} // namespace faiss

View File

@ -0,0 +1,85 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include <faiss/IndexPQ.h>
#include <faiss/IndexIVF.h>
namespace faiss {
struct IndexIVFPQ;
/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially
*
* The class is mainly inteded to store encoded vectors that can be
* accessed randomly, the search function is not implemented.
*/
struct Index2Layer: Index {
/// first level quantizer
Level1Quantizer q1;
/// second level quantizer is always a PQ
ProductQuantizer pq;
/// Codes. Size ntotal * code_size.
std::vector<uint8_t> codes;
/// size of the code for the first level (ceil(log8(q1.nlist)))
size_t code_size_1;
/// size of the code for the second level
size_t code_size_2;
/// code_size_1 + code_size_2
size_t code_size;
Index2Layer (Index * quantizer, size_t nlist,
int M, int nbit = 8,
MetricType metric = METRIC_L2);
Index2Layer ();
~Index2Layer ();
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
/// not implemented
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
void reset() override;
DistanceComputer * get_distance_computer() const override;
/// transfer the flat codes to an IVFPQ index
void transfer_to_IVFPQ(IndexIVFPQ & other) const;
/* The standalone codec interface */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x, uint8_t *bytes) const override;
void sa_decode (idx_t n, const uint8_t *bytes, float *x) const override;
};
} // namespace faiss

View File

@ -0,0 +1,77 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexBinary.h>
#include <faiss/impl/FaissAssert.h>
#include <cstring>
namespace faiss {
IndexBinary::~IndexBinary() {}
void IndexBinary::train(idx_t, const uint8_t *) {
// Does nothing by default.
}
void IndexBinary::range_search(idx_t, const uint8_t *, int,
RangeSearchResult *) const {
FAISS_THROW_MSG("range search not implemented");
}
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
int *distances = new int[n * k];
ScopeDeleter<int> del(distances);
search(n, x, k, distances, labels);
}
void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
}
size_t IndexBinary::remove_ids(const IDSelector&) {
FAISS_THROW_MSG("remove_ids not implemented for this type of index");
return 0;
}
void IndexBinary::reconstruct(idx_t, uint8_t *) const {
FAISS_THROW_MSG("reconstruct not implemented for this type of index");
}
void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
for (idx_t i = 0; i < ni; i++) {
reconstruct(i0 + i, recons + i * d);
}
}
void IndexBinary::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels,
uint8_t *recons) const {
search(n, x, k, distances, labels);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
uint8_t *reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
reconstruct(key, reconstructed);
}
}
}
}
void IndexBinary::display() const {
printf("Index: %s -> %ld elements\n", typeid (*this).name(), ntotal);
}
} // namespace faiss

View File

@ -0,0 +1,163 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_BINARY_H
#define FAISS_INDEX_BINARY_H
#include <cstdio>
#include <typeinfo>
#include <string>
#include <sstream>
#include <faiss/impl/FaissAssert.h>
#include <faiss/Index.h>
namespace faiss {
/// Forward declarations see AuxIndexStructures.h
struct IDSelector;
struct RangeSearchResult;
/** Abstract structure for a binary index.
*
* Supports adding vertices and searching them.
*
* All queries are symmetric because there is no distinction between codes and
* vectors.
*/
struct IndexBinary {
using idx_t = Index::idx_t; ///< all indices are this type
using component_t = uint8_t;
using distance_t = int32_t;
int d; ///< vector dimension
int code_size; ///< number of bytes per vector ( = d / 8 )
idx_t ntotal; ///< total nb of indexed vectors
bool verbose; ///< verbosity level
/// set if the Index does not require training, or if training is done already
bool is_trained;
/// type of metric this index uses for search
MetricType metric_type;
explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
: d(d),
code_size(d / 8),
ntotal(0),
verbose(false),
is_trained(true),
metric_type(metric) {
FAISS_THROW_IF_NOT(d % 8 == 0);
}
virtual ~IndexBinary();
/** Perform training on a representative set of vectors.
*
* @param n nb of training vectors
* @param x training vecors, size n * d / 8
*/
virtual void train(idx_t n, const uint8_t *x);
/** Add n vectors of dimension d to the index.
*
* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
* @param x input matrix, size n * d / 8
*/
virtual void add(idx_t n, const uint8_t *x) = 0;
/** Same as add, but stores xids instead of sequential ids.
*
* The default implementation fails with an assertion, as it is
* not supported by all indexes.
*
* @param xids if non-null, ids to store for the vectors (size n)
*/
virtual void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids);
/** Query n vectors of dimension d to the index.
*
* return at most k vectors. If there are not enough results for a
* query, the result array is padded with -1s.
*
* @param x input vectors to search, size n * d / 8
* @param labels output labels of the NNs, size n*k
* @param distances output pairwise distances, size n*k
*/
virtual void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const = 0;
/** Query n vectors of dimension d to the index.
*
* return all vectors with distance < radius. Note that many
* indexes do not implement the range_search (only the k-NN search
* is mandatory).
*
* @param x input vectors to search, size n * d / 8
* @param radius search radius
* @param result result table
*/
virtual void range_search(idx_t n, const uint8_t *x, int radius,
RangeSearchResult *result) const;
/** Return the indexes of the k vectors closest to the query x.
*
* This function is identical to search but only returns labels of neighbors.
* @param x input vectors to search, size n * d / 8
* @param labels output labels of the NNs, size n*k
*/
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1);
/// Removes all elements from the database.
virtual void reset() = 0;
/** Removes IDs from the index. Not supported by all indexes.
*/
virtual size_t remove_ids(const IDSelector& sel);
/** Reconstruct a stored vector.
*
* This function may not be defined for some indexes.
* @param key id of the vector to reconstruct
* @param recons reconstucted vector (size d / 8)
*/
virtual void reconstruct(idx_t key, uint8_t *recons) const;
/** Reconstruct vectors i0 to i0 + ni - 1.
*
* This function may not be defined for some indexes.
* @param recons reconstucted vectors (size ni * d / 8)
*/
virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* If there are not enough results for a query, the resulting array
* is padded with -1s.
*
* @param recons reconstructed vectors size (n, k, d)
**/
virtual void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels,
uint8_t *recons) const;
/** Display the actual class name and some more info. */
void display() const;
};
} // namespace faiss
#endif // FAISS_INDEX_BINARY_H

View File

@ -0,0 +1,83 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexBinaryFlat.h>
#include <cstring>
#include <faiss/utils/hamming.h>
#include <faiss/utils/utils.h>
#include <faiss/utils/Heap.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
namespace faiss {
IndexBinaryFlat::IndexBinaryFlat(idx_t d)
: IndexBinary(d) {}
void IndexBinaryFlat::add(idx_t n, const uint8_t *x) {
xb.insert(xb.end(), x, x + n * code_size);
ntotal += n;
}
void IndexBinaryFlat::reset() {
xb.clear();
ntotal = 0;
}
void IndexBinaryFlat::search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const {
const idx_t block_size = query_batch_size;
for (idx_t s = 0; s < n; s += block_size) {
idx_t nn = block_size;
if (s + block_size > n) {
nn = n - s;
}
if (use_heap) {
// We see the distances and labels as heaps.
int_maxheap_array_t res = {
size_t(nn), size_t(k), labels + s * k, distances + s * k
};
hammings_knn_hc(&res, x + s * code_size, xb.data(), ntotal, code_size,
/* ordered = */ true);
} else {
hammings_knn_mc(x + s * code_size, xb.data(), nn, ntotal, k, code_size,
distances + s * k, labels + s * k);
}
}
}
size_t IndexBinaryFlat::remove_ids(const IDSelector& sel) {
idx_t j = 0;
for (idx_t i = 0; i < ntotal; i++) {
if (sel.is_member(i)) {
// should be removed
} else {
if (i > j) {
memmove(&xb[code_size * j], &xb[code_size * i], sizeof(xb[0]) * code_size);
}
j++;
}
}
long nremove = ntotal - j;
if (nremove > 0) {
ntotal = j;
xb.resize(ntotal * code_size);
}
return nremove;
}
void IndexBinaryFlat::reconstruct(idx_t key, uint8_t *recons) const {
memcpy(recons, &(xb[code_size * key]), sizeof(*recons) * code_size);
}
} // namespace faiss

View File

@ -0,0 +1,54 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef INDEX_BINARY_FLAT_H
#define INDEX_BINARY_FLAT_H
#include <vector>
#include <faiss/IndexBinary.h>
namespace faiss {
/** Index that stores the full vectors and performs exhaustive search. */
struct IndexBinaryFlat : IndexBinary {
/// database vectors, size ntotal * d / 8
std::vector<uint8_t> xb;
/** Select between using a heap or counting to select the k smallest values
* when scanning inverted lists.
*/
bool use_heap = true;
size_t query_batch_size = 32;
explicit IndexBinaryFlat(idx_t d);
void add(idx_t n, const uint8_t *x) override;
void reset() override;
void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const override;
void reconstruct(idx_t key, uint8_t *recons) const override;
/** Remove some ids. Note that because of the indexing structure,
* the semantics of this operation are different from the usual ones:
* the new ids are shifted. */
size_t remove_ids(const IDSelector& sel) override;
IndexBinaryFlat() {}
};
} // namespace faiss
#endif // INDEX_BINARY_FLAT_H

View File

@ -0,0 +1,78 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexBinaryFromFloat.h>
#include <memory>
#include <faiss/utils/utils.h>
namespace faiss {
IndexBinaryFromFloat::IndexBinaryFromFloat() {}
IndexBinaryFromFloat::IndexBinaryFromFloat(Index *index)
: IndexBinary(index->d),
index(index),
own_fields(false) {
is_trained = index->is_trained;
ntotal = index->ntotal;
}
IndexBinaryFromFloat::~IndexBinaryFromFloat() {
if (own_fields) {
delete index;
}
}
void IndexBinaryFromFloat::add(idx_t n, const uint8_t *x) {
constexpr idx_t bs = 32768;
std::unique_ptr<float[]> xf(new float[bs * d]);
for (idx_t b = 0; b < n; b += bs) {
idx_t bn = std::min(bs, n - b);
binary_to_real(bn * d, x + b * code_size, xf.get());
index->add(bn, xf.get());
}
ntotal = index->ntotal;
}
void IndexBinaryFromFloat::reset() {
index->reset();
ntotal = index->ntotal;
}
void IndexBinaryFromFloat::search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const {
constexpr idx_t bs = 32768;
std::unique_ptr<float[]> xf(new float[bs * d]);
std::unique_ptr<float[]> df(new float[bs * k]);
for (idx_t b = 0; b < n; b += bs) {
idx_t bn = std::min(bs, n - b);
binary_to_real(bn * d, x + b * code_size, xf.get());
index->search(bn, xf.get(), k, df.get(), labels + b * k);
for (int i = 0; i < bn * k; ++i) {
distances[b * k + i] = int32_t(std::round(df[i] / 4.0));
}
}
}
void IndexBinaryFromFloat::train(idx_t n, const uint8_t *x) {
std::unique_ptr<float[]> xf(new float[n * d]);
binary_to_real(n * d, x, xf.get());
index->train(n, xf.get());
is_trained = true;
ntotal = index->ntotal;
}
} // namespace faiss

View File

@ -0,0 +1,52 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_BINARY_FROM_FLOAT_H
#define FAISS_INDEX_BINARY_FROM_FLOAT_H
#include <faiss/IndexBinary.h>
namespace faiss {
struct Index;
/** IndexBinary backed by a float Index.
*
* Supports adding vertices and searching them.
*
* All queries are symmetric because there is no distinction between codes and
* vectors.
*/
struct IndexBinaryFromFloat : IndexBinary {
Index *index = nullptr;
bool own_fields = false; ///< Whether object owns the index pointer.
IndexBinaryFromFloat();
explicit IndexBinaryFromFloat(Index *index);
~IndexBinaryFromFloat();
void add(idx_t n, const uint8_t *x) override;
void reset() override;
void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const override;
void train(idx_t n, const uint8_t *x) override;
};
} // namespace faiss
#endif // FAISS_INDEX_BINARY_FROM_FLOAT_H

View File

@ -0,0 +1,325 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexBinaryHNSW.h>
#include <memory>
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <omp.h>
#include <unordered_set>
#include <queue>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdint.h>
#include <faiss/utils/random.h>
#include <faiss/utils/Heap.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/IndexBinaryFlat.h>
#include <faiss/utils/hamming.h>
#include <faiss/impl/AuxIndexStructures.h>
namespace faiss {
/**************************************************************
* add / search blocks of descriptors
**************************************************************/
namespace {
void hnsw_add_vertices(IndexBinaryHNSW& index_hnsw,
size_t n0,
size_t n, const uint8_t *x,
bool verbose,
bool preset_levels = false) {
HNSW& hnsw = index_hnsw.hnsw;
size_t ntotal = n0 + n;
double t0 = getmillisecs();
if (verbose) {
printf("hnsw_add_vertices: adding %ld elements on top of %ld "
"(preset_levels=%d)\n",
n, n0, int(preset_levels));
}
int max_level = hnsw.prepare_level_tab(n, preset_levels);
if (verbose) {
printf(" max_level = %d\n", max_level);
}
std::vector<omp_lock_t> locks(ntotal);
for(int i = 0; i < ntotal; i++) {
omp_init_lock(&locks[i]);
}
// add vectors from highest to lowest level
std::vector<int> hist;
std::vector<int> order(n);
{ // make buckets with vectors of the same level
// build histogram
for (int i = 0; i < n; i++) {
HNSW::storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
while (pt_level >= hist.size()) {
hist.push_back(0);
}
hist[pt_level] ++;
}
// accumulate
std::vector<int> offsets(hist.size() + 1, 0);
for (int i = 0; i < hist.size() - 1; i++) {
offsets[i + 1] = offsets[i] + hist[i];
}
// bucket sort
for (int i = 0; i < n; i++) {
HNSW::storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
order[offsets[pt_level]++] = pt_id;
}
}
{ // perform add
RandomGenerator rng2(789);
int i1 = n;
for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
int i0 = i1 - hist[pt_level];
if (verbose) {
printf("Adding %d elements at level %d\n",
i1 - i0, pt_level);
}
// random permutation to get rid of dataset order bias
for (int j = i0; j < i1; j++) {
std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
}
#pragma omp parallel
{
VisitedTable vt (ntotal);
std::unique_ptr<DistanceComputer> dis(
index_hnsw.get_distance_computer()
);
int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1;
#pragma omp for schedule(dynamic)
for (int i = i0; i < i1; i++) {
HNSW::storage_idx_t pt_id = order[i];
dis->set_query((float *)(x + (pt_id - n0) * index_hnsw.code_size));
hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
if (prev_display >= 0 && i - i0 > prev_display + 10000) {
prev_display = i - i0;
printf(" %d / %d\r", i - i0, i1 - i0);
fflush(stdout);
}
}
}
i1 = i0;
}
FAISS_ASSERT(i1 == 0);
}
if (verbose) {
printf("Done in %.3f ms\n", getmillisecs() - t0);
}
for(int i = 0; i < ntotal; i++)
omp_destroy_lock(&locks[i]);
}
} // anonymous namespace
/**************************************************************
* IndexBinaryHNSW implementation
**************************************************************/
IndexBinaryHNSW::IndexBinaryHNSW()
{
is_trained = true;
}
IndexBinaryHNSW::IndexBinaryHNSW(int d, int M)
: IndexBinary(d),
hnsw(M),
own_fields(true),
storage(new IndexBinaryFlat(d))
{
is_trained = true;
}
IndexBinaryHNSW::IndexBinaryHNSW(IndexBinary *storage, int M)
: IndexBinary(storage->d),
hnsw(M),
own_fields(false),
storage(storage)
{
is_trained = true;
}
IndexBinaryHNSW::~IndexBinaryHNSW() {
if (own_fields) {
delete storage;
}
}
void IndexBinaryHNSW::train(idx_t n, const uint8_t *x)
{
// hnsw structure does not require training
storage->train(n, x);
is_trained = true;
}
void IndexBinaryHNSW::search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const
{
#pragma omp parallel
{
VisitedTable vt(ntotal);
std::unique_ptr<DistanceComputer> dis(get_distance_computer());
#pragma omp for
for(idx_t i = 0; i < n; i++) {
idx_t *idxi = labels + i * k;
float *simi = (float *)(distances + i * k);
dis->set_query((float *)(x + i * code_size));
maxheap_heapify(k, simi, idxi);
hnsw.search(*dis, k, idxi, simi, vt);
maxheap_reorder(k, simi, idxi);
}
}
#pragma omp parallel for
for (int i = 0; i < n * k; ++i) {
distances[i] = std::round(((float *)distances)[i]);
}
}
void IndexBinaryHNSW::add(idx_t n, const uint8_t *x)
{
FAISS_THROW_IF_NOT(is_trained);
int n0 = ntotal;
storage->add(n, x);
ntotal = storage->ntotal;
hnsw_add_vertices(*this, n0, n, x, verbose,
hnsw.levels.size() == ntotal);
}
void IndexBinaryHNSW::reset()
{
hnsw.reset();
storage->reset();
ntotal = 0;
}
void IndexBinaryHNSW::reconstruct(idx_t key, uint8_t *recons) const
{
storage->reconstruct(key, recons);
}
namespace {
template<class HammingComputer>
struct FlatHammingDis : DistanceComputer {
const int code_size;
const uint8_t *b;
size_t ndis;
HammingComputer hc;
float operator () (idx_t i) override {
ndis++;
return hc.hamming(b + i * code_size);
}
float symmetric_dis(idx_t i, idx_t j) override {
return HammingComputerDefault(b + j * code_size, code_size)
.hamming(b + i * code_size);
}
explicit FlatHammingDis(const IndexBinaryFlat& storage)
: code_size(storage.code_size),
b(storage.xb.data()),
ndis(0),
hc() {}
// NOTE: Pointers are cast from float in order to reuse the floating-point
// DistanceComputer.
void set_query(const float *x) override {
hc.set((uint8_t *)x, code_size);
}
~FlatHammingDis() override {
#pragma omp critical
{
hnsw_stats.ndis += ndis;
}
}
};
} // namespace
DistanceComputer *IndexBinaryHNSW::get_distance_computer() const {
IndexBinaryFlat *flat_storage = dynamic_cast<IndexBinaryFlat *>(storage);
FAISS_ASSERT(flat_storage != nullptr);
switch(code_size) {
case 4:
return new FlatHammingDis<HammingComputer4>(*flat_storage);
case 8:
return new FlatHammingDis<HammingComputer8>(*flat_storage);
case 16:
return new FlatHammingDis<HammingComputer16>(*flat_storage);
case 20:
return new FlatHammingDis<HammingComputer20>(*flat_storage);
case 32:
return new FlatHammingDis<HammingComputer32>(*flat_storage);
case 64:
return new FlatHammingDis<HammingComputer64>(*flat_storage);
default:
if (code_size % 8 == 0) {
return new FlatHammingDis<HammingComputerM8>(*flat_storage);
} else if (code_size % 4 == 0) {
return new FlatHammingDis<HammingComputerM4>(*flat_storage);
}
}
return new FlatHammingDis<HammingComputerDefault>(*flat_storage);
}
} // namespace faiss

View File

@ -0,0 +1,56 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <faiss/impl/HNSW.h>
#include <faiss/IndexBinaryFlat.h>
#include <faiss/utils/utils.h>
namespace faiss {
/** The HNSW index is a normal random-access index with a HNSW
* link structure built on top */
struct IndexBinaryHNSW : IndexBinary {
typedef HNSW::storage_idx_t storage_idx_t;
// the link strcuture
HNSW hnsw;
// the sequential storage
bool own_fields;
IndexBinary *storage;
explicit IndexBinaryHNSW();
explicit IndexBinaryHNSW(int d, int M = 32);
explicit IndexBinaryHNSW(IndexBinary *storage, int M = 32);
~IndexBinaryHNSW() override;
DistanceComputer *get_distance_computer() const;
void add(idx_t n, const uint8_t *x) override;
/// Trains the storage if needed
void train(idx_t n, const uint8_t* x) override;
/// entry point for search
void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const override;
void reconstruct(idx_t key, uint8_t* recons) const override;
void reset() override;
};
} // namespace faiss

View File

@ -0,0 +1,671 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#include <faiss/IndexBinaryIVF.h>
#include <cstdio>
#include <memory>
#include <faiss/utils/hamming.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/IndexFlat.h>
namespace faiss {
IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist)
: IndexBinary(d),
invlists(new ArrayInvertedLists(nlist, code_size)),
own_invlists(true),
nprobe(1),
max_codes(0),
maintain_direct_map(false),
quantizer(quantizer),
nlist(nlist),
own_fields(false),
clustering_index(nullptr)
{
FAISS_THROW_IF_NOT (d == quantizer->d);
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
cp.niter = 10;
}
IndexBinaryIVF::IndexBinaryIVF()
: invlists(nullptr),
own_invlists(false),
nprobe(1),
max_codes(0),
maintain_direct_map(false),
quantizer(nullptr),
nlist(0),
own_fields(false),
clustering_index(nullptr)
{}
void IndexBinaryIVF::add(idx_t n, const uint8_t *x) {
add_with_ids(n, x, nullptr);
}
void IndexBinaryIVF::add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) {
add_core(n, x, xids, nullptr);
}
void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const idx_t *xids,
const idx_t *precomputed_idx) {
FAISS_THROW_IF_NOT(is_trained);
assert(invlists);
FAISS_THROW_IF_NOT_MSG(!(maintain_direct_map && xids),
"cannot have direct map and add with ids");
const idx_t * idx;
std::unique_ptr<idx_t[]> scoped_idx;
if (precomputed_idx) {
idx = precomputed_idx;
} else {
scoped_idx.reset(new idx_t[n]);
quantizer->assign(n, x, scoped_idx.get());
idx = scoped_idx.get();
}
long n_add = 0;
for (size_t i = 0; i < n; i++) {
idx_t id = xids ? xids[i] : ntotal + i;
idx_t list_no = idx[i];
if (list_no < 0)
continue;
const uint8_t *xi = x + i * code_size;
size_t offset = invlists->add_entry(list_no, id, xi);
if (maintain_direct_map)
direct_map.push_back(list_no << 32 | offset);
n_add++;
}
if (verbose) {
printf("IndexBinaryIVF::add_with_ids: added %ld / %ld vectors\n",
n_add, n);
}
ntotal += n_add;
}
void IndexBinaryIVF::make_direct_map(bool new_maintain_direct_map) {
// nothing to do
if (new_maintain_direct_map == maintain_direct_map)
return;
if (new_maintain_direct_map) {
direct_map.resize(ntotal, -1);
for (size_t key = 0; key < nlist; key++) {
size_t list_size = invlists->list_size(key);
const idx_t *idlist = invlists->get_ids(key);
for (size_t ofs = 0; ofs < list_size; ofs++) {
FAISS_THROW_IF_NOT_MSG(0 <= idlist[ofs] && idlist[ofs] < ntotal,
"direct map supported only for seuquential ids");
direct_map[idlist[ofs]] = key << 32 | ofs;
}
}
} else {
direct_map.clear();
}
maintain_direct_map = new_maintain_direct_map;
}
void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const {
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
double t0 = getmillisecs();
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
indexIVF_stats.quantization_time += getmillisecs() - t0;
t0 = getmillisecs();
invlists->prefetch_lists(idx.get(), n * nprobe);
search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
distances, labels, false);
indexIVF_stats.search_time += getmillisecs() - t0;
}
void IndexBinaryIVF::reconstruct(idx_t key, uint8_t *recons) const {
FAISS_THROW_IF_NOT_MSG(direct_map.size() == ntotal,
"direct map is not initialized");
idx_t list_no = direct_map[key] >> 32;
idx_t offset = direct_map[key] & 0xffffffff;
reconstruct_from_offset(list_no, offset, recons);
}
void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
for (idx_t list_no = 0; list_no < nlist; list_no++) {
size_t list_size = invlists->list_size(list_no);
const Index::idx_t *idlist = invlists->get_ids(list_no);
for (idx_t offset = 0; offset < list_size; offset++) {
idx_t id = idlist[offset];
if (!(id >= i0 && id < i0 + ni)) {
continue;
}
uint8_t *reconstructed = recons + (id - i0) * d;
reconstruct_from_offset(list_no, offset, reconstructed);
}
}
}
void IndexBinaryIVF::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels,
uint8_t *recons) const {
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
invlists->prefetch_lists(idx.get(), n * nprobe);
// search_preassigned() with `store_pairs` enabled to obtain the list_no
// and offset into `codes` for reconstruction
search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
distances, labels, /* store_pairs */true);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
uint8_t *reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
int list_no = key >> 32;
int offset = key & 0xffffffff;
// Update label to the actual id
labels[ij] = invlists->get_single_id(list_no, offset);
reconstruct_from_offset(list_no, offset, reconstructed);
}
}
}
}
void IndexBinaryIVF::reconstruct_from_offset(idx_t list_no, idx_t offset,
uint8_t *recons) const {
memcpy(recons, invlists->get_single_code(list_no, offset), code_size);
}
void IndexBinaryIVF::reset() {
direct_map.clear();
invlists->reset();
ntotal = 0;
}
size_t IndexBinaryIVF::remove_ids(const IDSelector& sel) {
FAISS_THROW_IF_NOT_MSG(!maintain_direct_map,
"direct map remove not implemented");
std::vector<idx_t> toremove(nlist);
#pragma omp parallel for
for (idx_t i = 0; i < nlist; i++) {
idx_t l0 = invlists->list_size (i), l = l0, j = 0;
const idx_t *idsi = invlists->get_ids(i);
while (j < l) {
if (sel.is_member(idsi[j])) {
l--;
invlists->update_entry(
i, j,
invlists->get_single_id(i, l),
invlists->get_single_code(i, l));
} else {
j++;
}
}
toremove[i] = l0 - l;
}
// this will not run well in parallel on ondisk because of possible shrinks
size_t nremove = 0;
for (idx_t i = 0; i < nlist; i++) {
if (toremove[i] > 0) {
nremove += toremove[i];
invlists->resize(
i, invlists->list_size(i) - toremove[i]);
}
}
ntotal -= nremove;
return nremove;
}
void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
if (verbose) {
printf("Training quantizer\n");
}
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (verbose) {
printf("IVF quantizer does not need training.\n");
}
} else {
if (verbose) {
printf("Training quantizer on %ld vectors in %dD\n", n, d);
}
Clustering clus(d, nlist, cp);
quantizer->reset();
std::unique_ptr<float[]> x_f(new float[n * d]);
binary_to_real(n * d, x, x_f.get());
IndexFlatL2 index_tmp(d);
if (clustering_index && verbose) {
printf("using clustering_index of dimension %d to do the clustering\n",
clustering_index->d);
}
clus.train(n, x_f.get(), clustering_index ? *clustering_index : index_tmp);
std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
quantizer->add(clus.k, x_b.get());
quantizer->is_trained = true;
}
is_trained = true;
}
void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) {
// minimal sanity checks
FAISS_THROW_IF_NOT(other.d == d);
FAISS_THROW_IF_NOT(other.nlist == nlist);
FAISS_THROW_IF_NOT(other.code_size == code_size);
FAISS_THROW_IF_NOT_MSG((!maintain_direct_map &&
!other.maintain_direct_map),
"direct map copy not implemented");
FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other),
"can only merge indexes of the same type");
invlists->merge_from (other.invlists, add_id);
ntotal += other.ntotal;
other.ntotal = 0;
}
void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) {
FAISS_THROW_IF_NOT(il->nlist == nlist &&
il->code_size == code_size);
if (own_invlists) {
delete invlists;
}
invlists = il;
own_invlists = own;
}
namespace {
using idx_t = Index::idx_t;
template<class HammingComputer, bool store_pairs>
struct IVFBinaryScannerL2: BinaryInvertedListScanner {
HammingComputer hc;
size_t code_size;
IVFBinaryScannerL2 (size_t code_size): code_size (code_size)
{}
void set_query (const uint8_t *query_vector) override {
hc.set (query_vector, code_size);
}
idx_t list_no;
void set_list (idx_t list_no, uint8_t /* coarse_dis */) override {
this->list_no = list_no;
}
uint32_t distance_to_code (const uint8_t *code) const override {
return hc.hamming (code);
}
size_t scan_codes (size_t n,
const uint8_t *codes,
const idx_t *ids,
int32_t *simi, idx_t *idxi,
size_t k) const override
{
using C = CMax<int32_t, idx_t>;
size_t nup = 0;
for (size_t j = 0; j < n; j++) {
uint32_t dis = hc.hamming (codes);
if (dis < simi[0]) {
heap_pop<C> (k, simi, idxi);
idx_t id = store_pairs ? (list_no << 32 | j) : ids[j];
heap_push<C> (k, simi, idxi, dis, id);
nup++;
}
codes += code_size;
}
return nup;
}
};
template <bool store_pairs>
BinaryInvertedListScanner *select_IVFBinaryScannerL2 (size_t code_size) {
switch (code_size) {
#define HANDLE_CS(cs) \
case cs: \
return new IVFBinaryScannerL2<HammingComputer ## cs, store_pairs> (cs);
HANDLE_CS(4);
HANDLE_CS(8);
HANDLE_CS(16);
HANDLE_CS(20);
HANDLE_CS(32);
HANDLE_CS(64);
#undef HANDLE_CS
default:
if (code_size % 8 == 0) {
return new IVFBinaryScannerL2<HammingComputerM8,
store_pairs> (code_size);
} else if (code_size % 4 == 0) {
return new IVFBinaryScannerL2<HammingComputerM4,
store_pairs> (code_size);
} else {
return new IVFBinaryScannerL2<HammingComputerDefault,
store_pairs> (code_size);
}
}
}
void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
size_t n,
const uint8_t *x,
idx_t k,
const idx_t *keys,
const int32_t * coarse_dis,
int32_t *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params)
{
long nprobe = params ? params->nprobe : ivf.nprobe;
long max_codes = params ? params->max_codes : ivf.max_codes;
MetricType metric_type = ivf.metric_type;
// almost verbatim copy from IndexIVF::search_preassigned
size_t nlistv = 0, ndis = 0, nheap = 0;
using HeapForIP = CMin<int32_t, idx_t>;
using HeapForL2 = CMax<int32_t, idx_t>;
#pragma omp parallel if(n > 1) reduction(+: nlistv, ndis, nheap)
{
std::unique_ptr<BinaryInvertedListScanner> scanner
(ivf.get_InvertedListScanner (store_pairs));
#pragma omp for
for (size_t i = 0; i < n; i++) {
const uint8_t *xi = x + i * ivf.code_size;
scanner->set_query(xi);
const idx_t * keysi = keys + i * nprobe;
int32_t * simi = distances + k * i;
idx_t * idxi = labels + k * i;
if (metric_type == METRIC_INNER_PRODUCT) {
heap_heapify<HeapForIP> (k, simi, idxi);
} else {
heap_heapify<HeapForL2> (k, simi, idxi);
}
size_t nscan = 0;
for (size_t ik = 0; ik < nprobe; ik++) {
idx_t key = keysi[ik]; /* select the list */
if (key < 0) {
// not enough centroids for multiprobe
continue;
}
FAISS_THROW_IF_NOT_FMT
(key < (idx_t) ivf.nlist,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
key, ik, ivf.nlist);
scanner->set_list (key, coarse_dis[i * nprobe + ik]);
nlistv++;
size_t list_size = ivf.invlists->list_size(key);
InvertedLists::ScopedCodes scodes (ivf.invlists, key);
std::unique_ptr<InvertedLists::ScopedIds> sids;
const Index::idx_t * ids = nullptr;
if (!store_pairs) {
sids.reset (new InvertedLists::ScopedIds (ivf.invlists, key));
ids = sids->get();
}
nheap += scanner->scan_codes (list_size, scodes.get(),
ids, simi, idxi, k);
nscan += list_size;
if (max_codes && nscan >= max_codes)
break;
}
ndis += nscan;
if (metric_type == METRIC_INNER_PRODUCT) {
heap_reorder<HeapForIP> (k, simi, idxi);
} else {
heap_reorder<HeapForL2> (k, simi, idxi);
}
} // parallel for
} // parallel
indexIVF_stats.nq += n;
indexIVF_stats.nlist += nlistv;
indexIVF_stats.ndis += ndis;
indexIVF_stats.nheap_updates += nheap;
}
template<class HammingComputer, bool store_pairs>
void search_knn_hamming_count(const IndexBinaryIVF& ivf,
size_t nx,
const uint8_t *x,
const idx_t *keys,
int k,
int32_t *distances,
idx_t *labels,
const IVFSearchParameters *params) {
const int nBuckets = ivf.d + 1;
std::vector<int> all_counters(nx * nBuckets, 0);
std::unique_ptr<idx_t[]> all_ids_per_dis(new idx_t[nx * nBuckets * k]);
long nprobe = params ? params->nprobe : ivf.nprobe;
long max_codes = params ? params->max_codes : ivf.max_codes;
std::vector<HCounterState<HammingComputer>> cs;
for (size_t i = 0; i < nx; ++i) {
cs.push_back(HCounterState<HammingComputer>(
all_counters.data() + i * nBuckets,
all_ids_per_dis.get() + i * nBuckets * k,
x + i * ivf.code_size,
ivf.d,
k
));
}
size_t nlistv = 0, ndis = 0;
#pragma omp parallel for reduction(+: nlistv, ndis)
for (size_t i = 0; i < nx; i++) {
const idx_t * keysi = keys + i * nprobe;
HCounterState<HammingComputer>& csi = cs[i];
size_t nscan = 0;
for (size_t ik = 0; ik < nprobe; ik++) {
idx_t key = keysi[ik]; /* select the list */
if (key < 0) {
// not enough centroids for multiprobe
continue;
}
FAISS_THROW_IF_NOT_FMT (
key < (idx_t) ivf.nlist,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
key, ik, ivf.nlist);
nlistv++;
size_t list_size = ivf.invlists->list_size(key);
InvertedLists::ScopedCodes scodes (ivf.invlists, key);
const uint8_t *list_vecs = scodes.get();
const Index::idx_t *ids = store_pairs
? nullptr
: ivf.invlists->get_ids(key);
for (size_t j = 0; j < list_size; j++) {
const uint8_t * yj = list_vecs + ivf.code_size * j;
idx_t id = store_pairs ? (key << 32 | j) : ids[j];
csi.update_counter(yj, id);
}
if (ids)
ivf.invlists->release_ids (key, ids);
nscan += list_size;
if (max_codes && nscan >= max_codes)
break;
}
ndis += nscan;
int nres = 0;
for (int b = 0; b < nBuckets && nres < k; b++) {
for (int l = 0; l < csi.counters[b] && nres < k; l++) {
labels[i * k + nres] = csi.ids_per_dis[b * k + l];
distances[i * k + nres] = b;
nres++;
}
}
while (nres < k) {
labels[i * k + nres] = -1;
distances[i * k + nres] = std::numeric_limits<int32_t>::max();
++nres;
}
}
indexIVF_stats.nq += nx;
indexIVF_stats.nlist += nlistv;
indexIVF_stats.ndis += ndis;
}
template<bool store_pairs>
void search_knn_hamming_count_1 (
const IndexBinaryIVF& ivf,
size_t nx,
const uint8_t *x,
const idx_t *keys,
int k,
int32_t *distances,
idx_t *labels,
const IVFSearchParameters *params) {
switch (ivf.code_size) {
#define HANDLE_CS(cs) \
case cs: \
search_knn_hamming_count<HammingComputer ## cs, store_pairs>( \
ivf, nx, x, keys, k, distances, labels, params); \
break;
HANDLE_CS(4);
HANDLE_CS(8);
HANDLE_CS(16);
HANDLE_CS(20);
HANDLE_CS(32);
HANDLE_CS(64);
#undef HANDLE_CS
default:
if (ivf.code_size % 8 == 0) {
search_knn_hamming_count<HammingComputerM8, store_pairs>
(ivf, nx, x, keys, k, distances, labels, params);
} else if (ivf.code_size % 4 == 0) {
search_knn_hamming_count<HammingComputerM4, store_pairs>
(ivf, nx, x, keys, k, distances, labels, params);
} else {
search_knn_hamming_count<HammingComputerDefault, store_pairs>
(ivf, nx, x, keys, k, distances, labels, params);
}
break;
}
}
} // namespace
BinaryInvertedListScanner *IndexBinaryIVF::get_InvertedListScanner
(bool store_pairs) const
{
if (store_pairs) {
return select_IVFBinaryScannerL2<true> (code_size);
} else {
return select_IVFBinaryScannerL2<false> (code_size);
}
}
void IndexBinaryIVF::search_preassigned(idx_t n, const uint8_t *x, idx_t k,
const idx_t *idx,
const int32_t * coarse_dis,
int32_t *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params
) const {
if (use_heap) {
search_knn_hamming_heap (*this, n, x, k, idx, coarse_dis,
distances, labels, store_pairs,
params);
} else {
if (store_pairs) {
search_knn_hamming_count_1<true>
(*this, n, x, idx, k, distances, labels, params);
} else {
search_knn_hamming_count_1<false>
(*this, n, x, idx, k, distances, labels, params);
}
}
}
IndexBinaryIVF::~IndexBinaryIVF() {
if (own_invlists) {
delete invlists;
}
if (own_fields) {
delete quantizer;
}
}
} // namespace faiss

View File

@ -0,0 +1,211 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_BINARY_IVF_H
#define FAISS_INDEX_BINARY_IVF_H
#include <vector>
#include <faiss/IndexBinary.h>
#include <faiss/IndexIVF.h>
#include <faiss/Clustering.h>
#include <faiss/utils/Heap.h>
namespace faiss {
struct BinaryInvertedListScanner;
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an IndexBinary instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is stored.
*
* Otherwise the object is similar to the IndexIVF
*/
struct IndexBinaryIVF : IndexBinary {
/// Acess to the actual data
InvertedLists *invlists;
bool own_invlists;
size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
/** Select between using a heap or counting to select the k smallest values
* when scanning inverted lists.
*/
bool use_heap = true;
/// map for direct access to the elements. Enables reconstruct().
bool maintain_direct_map;
std::vector<idx_t> direct_map;
IndexBinary *quantizer; ///< quantizer that maps vectors to inverted lists
size_t nlist; ///< number of possible key values
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
Index *clustering_index; ///< to override index used during clustering
/** The Inverted file takes a quantizer (an IndexBinary) on input,
* which implements the function mapping a vector to a list
* identifier. The pointer is borrowed: the quantizer should not
* be deleted while the IndexBinaryIVF is in use.
*/
IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
IndexBinaryIVF();
~IndexBinaryIVF() override;
void reset() override;
/// Trains the quantizer
void train(idx_t n, const uint8_t *x) override;
void add(idx_t n, const uint8_t *x) override;
void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) override;
/// same as add_with_ids, with precomputed coarse quantizer
void add_core (idx_t n, const uint8_t * x, const idx_t *xids,
const idx_t *precomputed_idx);
/** Search a set of vectors, that are pre-quantized by the IVF
* quantizer. Fill in the corresponding heaps with the query
* results. search() calls this.
*
* @param n nb of vectors to query
* @param x query vectors, size nx * d
* @param assign coarse quantization indices, size nx * nprobe
* @param centroid_dis
* distances to coarse centroids, size nx * nprobe
* @param distance
* output distances, size n * k
* @param labels output labels, size n * k
* @param store_pairs store inv list index + inv list offset
* instead in upper/lower 32 bit of result,
* instead of ids (used for reranking).
* @param params used to override the object's search parameters
*/
void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
const idx_t *assign,
const int32_t *centroid_dis,
int32_t *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const;
virtual BinaryInvertedListScanner *get_InvertedListScanner (
bool store_pairs=false) const;
/** assign the vectors, then call search_preassign */
virtual void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const override;
void reconstruct(idx_t key, uint8_t *recons) const override;
/** Reconstruct a subset of the indexed vectors.
*
* Overrides default implementation to bypass reconstruct() which requires
* direct_map to be maintained.
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d / 8
*/
void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* Overrides default implementation to avoid having to maintain direct_map
* and instead fetch the code offsets through the `store_pairs` flag in
* search_preassigned().
*
* @param recons reconstructed vectors size (n, k, d / 8)
*/
void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels,
uint8_t *recons) const override;
/** Reconstruct a vector given the location in terms of (inv list index +
* inv list offset) instead of the id.
*
* Useful for reconstructing when the direct_map is not maintained and
* the inv list offset is computed by search_preassigned() with
* `store_pairs` set.
*/
virtual void reconstruct_from_offset(idx_t list_no, idx_t offset,
uint8_t* recons) const;
/// Dataset manipulation functions
size_t remove_ids(const IDSelector& sel) override;
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
size_t get_list_size(size_t list_no) const
{ return invlists->list_size(list_no); }
/** intialize a direct map
*
* @param new_maintain_direct_map if true, create a direct map,
* else clear it
*/
void make_direct_map(bool new_maintain_direct_map=true);
void replace_invlists(InvertedLists *il, bool own=false);
};
struct BinaryInvertedListScanner {
using idx_t = Index::idx_t;
/// from now on we handle this query.
virtual void set_query (const uint8_t *query_vector) = 0;
/// following codes come from this inverted list
virtual void set_list (idx_t list_no, uint8_t coarse_dis) = 0;
/// compute a single query-to-code distance
virtual uint32_t distance_to_code (const uint8_t *code) const = 0;
/** compute the distances to codes. (distances, labels) should be
* organized as a min- or max-heap
*
* @param n number of codes to scan
* @param codes codes to scan (n * code_size)
* @param ids corresponding ids (ignored if store_pairs)
* @param distances heap distances (size k)
* @param labels heap labels (size k)
* @param k heap size
*/
virtual size_t scan_codes (size_t n,
const uint8_t *codes,
const idx_t *ids,
int32_t *distances, idx_t *labels,
size_t k) const = 0;
virtual ~BinaryInvertedListScanner () {}
};
} // namespace faiss
#endif // FAISS_INDEX_BINARY_IVF_H

View File

@ -0,0 +1,508 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexFlat.h>
#include <cstring>
#include <faiss/utils/distances.h>
#include <faiss/utils/extra_distances.h>
#include <faiss/utils/utils.h>
#include <faiss/utils/Heap.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
namespace faiss {
IndexFlat::IndexFlat (idx_t d, MetricType metric):
Index(d, metric)
{
}
void IndexFlat::add (idx_t n, const float *x) {
xb.insert(xb.end(), x, x + n * d);
ntotal += n;
}
void IndexFlat::reset() {
xb.clear();
ntotal = 0;
}
void IndexFlat::search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const
{
// we see the distances and labels as heaps
if (metric_type == METRIC_INNER_PRODUCT) {
float_minheap_array_t res = {
size_t(n), size_t(k), labels, distances};
knn_inner_product (x, xb.data(), d, n, ntotal, &res);
} else if (metric_type == METRIC_L2) {
float_maxheap_array_t res = {
size_t(n), size_t(k), labels, distances};
knn_L2sqr (x, xb.data(), d, n, ntotal, &res);
} else {
float_maxheap_array_t res = {
size_t(n), size_t(k), labels, distances};
knn_extra_metrics (x, xb.data(), d, n, ntotal,
metric_type, metric_arg,
&res);
}
}
void IndexFlat::range_search (idx_t n, const float *x, float radius,
RangeSearchResult *result) const
{
switch (metric_type) {
case METRIC_INNER_PRODUCT:
range_search_inner_product (x, xb.data(), d, n, ntotal,
radius, result);
break;
case METRIC_L2:
range_search_L2sqr (x, xb.data(), d, n, ntotal, radius, result);
break;
default:
FAISS_THROW_MSG("metric type not supported");
}
}
void IndexFlat::compute_distance_subset (
idx_t n,
const float *x,
idx_t k,
float *distances,
const idx_t *labels) const
{
switch (metric_type) {
case METRIC_INNER_PRODUCT:
fvec_inner_products_by_idx (
distances,
x, xb.data(), labels, d, n, k);
break;
case METRIC_L2:
fvec_L2sqr_by_idx (
distances,
x, xb.data(), labels, d, n, k);
break;
default:
FAISS_THROW_MSG("metric type not supported");
}
}
size_t IndexFlat::remove_ids (const IDSelector & sel)
{
idx_t j = 0;
for (idx_t i = 0; i < ntotal; i++) {
if (sel.is_member (i)) {
// should be removed
} else {
if (i > j) {
memmove (&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
}
j++;
}
}
size_t nremove = ntotal - j;
if (nremove > 0) {
ntotal = j;
xb.resize (ntotal * d);
}
return nremove;
}
namespace {
struct FlatL2Dis : DistanceComputer {
size_t d;
Index::idx_t nb;
const float *q;
const float *b;
size_t ndis;
float operator () (idx_t i) override {
ndis++;
return fvec_L2sqr(q, b + i * d, d);
}
float symmetric_dis(idx_t i, idx_t j) override {
return fvec_L2sqr(b + j * d, b + i * d, d);
}
explicit FlatL2Dis(const IndexFlat& storage, const float *q = nullptr)
: d(storage.d),
nb(storage.ntotal),
q(q),
b(storage.xb.data()),
ndis(0) {}
void set_query(const float *x) override {
q = x;
}
};
struct FlatIPDis : DistanceComputer {
size_t d;
Index::idx_t nb;
const float *q;
const float *b;
size_t ndis;
float operator () (idx_t i) override {
ndis++;
return fvec_inner_product (q, b + i * d, d);
}
float symmetric_dis(idx_t i, idx_t j) override {
return fvec_inner_product (b + j * d, b + i * d, d);
}
explicit FlatIPDis(const IndexFlat& storage, const float *q = nullptr)
: d(storage.d),
nb(storage.ntotal),
q(q),
b(storage.xb.data()),
ndis(0) {}
void set_query(const float *x) override {
q = x;
}
};
} // namespace
DistanceComputer * IndexFlat::get_distance_computer() const {
if (metric_type == METRIC_L2) {
return new FlatL2Dis(*this);
} else if (metric_type == METRIC_INNER_PRODUCT) {
return new FlatIPDis(*this);
} else {
return get_extra_distance_computer (d, metric_type, metric_arg,
ntotal, xb.data());
}
}
void IndexFlat::reconstruct (idx_t key, float * recons) const
{
memcpy (recons, &(xb[key * d]), sizeof(*recons) * d);
}
/* The standalone codec interface */
size_t IndexFlat::sa_code_size () const
{
return sizeof(float) * d;
}
void IndexFlat::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
{
memcpy (bytes, x, sizeof(float) * d * n);
}
void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
{
memcpy (x, bytes, sizeof(float) * d * n);
}
/***************************************************
* IndexFlatL2BaseShift
***************************************************/
IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
IndexFlatL2 (d), shift (nshift)
{
memcpy (this->shift.data(), shift, sizeof(float) * nshift);
}
void IndexFlatL2BaseShift::search (
idx_t n,
const float *x,
idx_t k,
float *distances,
idx_t *labels) const
{
FAISS_THROW_IF_NOT (shift.size() == ntotal);
float_maxheap_array_t res = {
size_t(n), size_t(k), labels, distances};
knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
}
/***************************************************
* IndexRefineFlat
***************************************************/
IndexRefineFlat::IndexRefineFlat (Index *base_index):
Index (base_index->d, base_index->metric_type),
refine_index (base_index->d, base_index->metric_type),
base_index (base_index), own_fields (false),
k_factor (1)
{
is_trained = base_index->is_trained;
FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
"base_index should be empty in the beginning");
}
IndexRefineFlat::IndexRefineFlat () {
base_index = nullptr;
own_fields = false;
k_factor = 1;
}
void IndexRefineFlat::train (idx_t n, const float *x)
{
base_index->train (n, x);
is_trained = true;
}
void IndexRefineFlat::add (idx_t n, const float *x) {
FAISS_THROW_IF_NOT (is_trained);
base_index->add (n, x);
refine_index.add (n, x);
ntotal = refine_index.ntotal;
}
void IndexRefineFlat::reset ()
{
base_index->reset ();
refine_index.reset ();
ntotal = 0;
}
namespace {
typedef faiss::Index::idx_t idx_t;
template<class C>
static void reorder_2_heaps (
idx_t n,
idx_t k, idx_t *labels, float *distances,
idx_t k_base, const idx_t *base_labels, const float *base_distances)
{
#pragma omp parallel for
for (idx_t i = 0; i < n; i++) {
idx_t *idxo = labels + i * k;
float *diso = distances + i * k;
const idx_t *idxi = base_labels + i * k_base;
const float *disi = base_distances + i * k_base;
heap_heapify<C> (k, diso, idxo, disi, idxi, k);
if (k_base != k) { // add remaining elements
heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
}
heap_reorder<C> (k, diso, idxo);
}
}
}
void IndexRefineFlat::search (
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const
{
FAISS_THROW_IF_NOT (is_trained);
idx_t k_base = idx_t (k * k_factor);
idx_t * base_labels = labels;
float * base_distances = distances;
ScopeDeleter<idx_t> del1;
ScopeDeleter<float> del2;
if (k != k_base) {
base_labels = new idx_t [n * k_base];
del1.set (base_labels);
base_distances = new float [n * k_base];
del2.set (base_distances);
}
base_index->search (n, x, k_base, base_distances, base_labels);
for (int i = 0; i < n * k_base; i++)
assert (base_labels[i] >= -1 &&
base_labels[i] < ntotal);
// compute refined distances
refine_index.compute_distance_subset (
n, x, k_base, base_distances, base_labels);
// sort and store result
if (metric_type == METRIC_L2) {
typedef CMax <float, idx_t> C;
reorder_2_heaps<C> (
n, k, labels, distances,
k_base, base_labels, base_distances);
} else if (metric_type == METRIC_INNER_PRODUCT) {
typedef CMin <float, idx_t> C;
reorder_2_heaps<C> (
n, k, labels, distances,
k_base, base_labels, base_distances);
} else {
FAISS_THROW_MSG("Metric type not supported");
}
}
IndexRefineFlat::~IndexRefineFlat ()
{
if (own_fields) delete base_index;
}
/***************************************************
* IndexFlat1D
***************************************************/
IndexFlat1D::IndexFlat1D (bool continuous_update):
IndexFlatL2 (1),
continuous_update (continuous_update)
{
}
/// if not continuous_update, call this between the last add and
/// the first search
void IndexFlat1D::update_permutation ()
{
perm.resize (ntotal);
if (ntotal < 1000000) {
fvec_argsort (ntotal, xb.data(), (size_t*)perm.data());
} else {
fvec_argsort_parallel (ntotal, xb.data(), (size_t*)perm.data());
}
}
void IndexFlat1D::add (idx_t n, const float *x)
{
IndexFlatL2::add (n, x);
if (continuous_update)
update_permutation();
}
void IndexFlat1D::reset()
{
IndexFlatL2::reset();
perm.clear();
}
void IndexFlat1D::search (
idx_t n,
const float *x,
idx_t k,
float *distances,
idx_t *labels) const
{
FAISS_THROW_IF_NOT_MSG (perm.size() == ntotal,
"Call update_permutation before search");
#pragma omp parallel for
for (idx_t i = 0; i < n; i++) {
float q = x[i]; // query
float *D = distances + i * k;
idx_t *I = labels + i * k;
// binary search
idx_t i0 = 0, i1 = ntotal;
idx_t wp = 0;
if (xb[perm[i0]] > q) {
i1 = 0;
goto finish_right;
}
if (xb[perm[i1 - 1]] <= q) {
i0 = i1 - 1;
goto finish_left;
}
while (i0 + 1 < i1) {
idx_t imed = (i0 + i1) / 2;
if (xb[perm[imed]] <= q) i0 = imed;
else i1 = imed;
}
// query is between xb[perm[i0]] and xb[perm[i1]]
// expand to nearest neighs
while (wp < k) {
float xleft = xb[perm[i0]];
float xright = xb[perm[i1]];
if (q - xleft < xright - q) {
D[wp] = q - xleft;
I[wp] = perm[i0];
i0--; wp++;
if (i0 < 0) { goto finish_right; }
} else {
D[wp] = xright - q;
I[wp] = perm[i1];
i1++; wp++;
if (i1 >= ntotal) { goto finish_left; }
}
}
goto done;
finish_right:
// grow to the right from i1
while (wp < k) {
if (i1 < ntotal) {
D[wp] = xb[perm[i1]] - q;
I[wp] = perm[i1];
i1++;
} else {
D[wp] = std::numeric_limits<float>::infinity();
I[wp] = -1;
}
wp++;
}
goto done;
finish_left:
// grow to the left from i0
while (wp < k) {
if (i0 >= 0) {
D[wp] = q - xb[perm[i0]];
I[wp] = perm[i0];
i0--;
} else {
D[wp] = std::numeric_limits<float>::infinity();
I[wp] = -1;
}
wp++;
}
done: ;
}
}
} // namespace faiss

View File

@ -0,0 +1,175 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef INDEX_FLAT_H
#define INDEX_FLAT_H
#include <vector>
#include <faiss/Index.h>
namespace faiss {
/** Index that stores the full vectors and performs exhaustive search */
struct IndexFlat: Index {
/// database vectors, size ntotal * d
std::vector<float> xb;
explicit IndexFlat (idx_t d, MetricType metric = METRIC_L2);
void add(idx_t n, const float* x) override;
void reset() override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const override;
void reconstruct(idx_t key, float* recons) const override;
/** compute distance with a subset of vectors
*
* @param x query vectors, size n * d
* @param labels indices of the vectors that should be compared
* for each query vector, size n * k
* @param distances
* corresponding output distances, size n * k
*/
void compute_distance_subset (
idx_t n,
const float *x,
idx_t k,
float *distances,
const idx_t *labels) const;
/** remove some ids. NB that Because of the structure of the
* indexing structure, the semantics of this operation are
* different from the usual ones: the new ids are shifted */
size_t remove_ids(const IDSelector& sel) override;
IndexFlat () {}
DistanceComputer * get_distance_computer() const override;
/* The stanadlone codec interface (just memcopies in this case) */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
};
struct IndexFlatIP:IndexFlat {
explicit IndexFlatIP (idx_t d): IndexFlat (d, METRIC_INNER_PRODUCT) {}
IndexFlatIP () {}
};
struct IndexFlatL2:IndexFlat {
explicit IndexFlatL2 (idx_t d): IndexFlat (d, METRIC_L2) {}
IndexFlatL2 () {}
};
// same as an IndexFlatL2 but a value is subtracted from each distance
struct IndexFlatL2BaseShift: IndexFlatL2 {
std::vector<float> shift;
IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift);
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
};
/** Index that queries in a base_index (a fast one) and refines the
* results with an exact search, hopefully improving the results.
*/
struct IndexRefineFlat: Index {
/// storage for full vectors
IndexFlat refine_index;
/// faster index to pre-select the vectors that should be filtered
Index *base_index;
bool own_fields; ///< should the base index be deallocated?
/// factor between k requested in search and the k requested from
/// the base_index (should be >= 1)
float k_factor;
explicit IndexRefineFlat (Index *base_index);
IndexRefineFlat ();
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
void reset() override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
~IndexRefineFlat() override;
};
/// optimized version for 1D "vectors"
struct IndexFlat1D:IndexFlatL2 {
bool continuous_update; ///< is the permutation updated continuously?
std::vector<idx_t> perm; ///< sorted database indices
explicit IndexFlat1D (bool continuous_update=true);
/// if not continuous_update, call this between the last add and
/// the first search
void update_permutation ();
void add(idx_t n, const float* x) override;
void reset() override;
/// Warn: the distances returned are L1 not L2
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,170 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include <faiss/impl/HNSW.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexPQ.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/utils/utils.h>
namespace faiss {
struct IndexHNSW;
struct ReconstructFromNeighbors {
typedef Index::idx_t idx_t;
typedef HNSW::storage_idx_t storage_idx_t;
const IndexHNSW & index;
size_t M; // number of neighbors
size_t k; // number of codebook entries
size_t nsq; // number of subvectors
size_t code_size;
int k_reorder; // nb to reorder. -1 = all
std::vector<float> codebook; // size nsq * k * (M + 1)
std::vector<uint8_t> codes; // size ntotal * code_size
size_t ntotal;
size_t d, dsub; // derived values
explicit ReconstructFromNeighbors(const IndexHNSW& index,
size_t k=256, size_t nsq=1);
/// codes must be added in the correct order and the IndexHNSW
/// must be populated and sorted
void add_codes(size_t n, const float *x);
size_t compute_distances(size_t n, const idx_t *shortlist,
const float *query, float *distances) const;
/// called by add_codes
void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const;
/// called by compute_distances
void reconstruct(storage_idx_t i, float *x, float *tmp) const;
void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float *x) const;
/// get the M+1 -by-d table for neighbor coordinates for vector i
void get_neighbor_table(storage_idx_t i, float *out) const;
};
/** The HNSW index is a normal random-access index with a HNSW
* link structure built on top */
struct IndexHNSW : Index {
typedef HNSW::storage_idx_t storage_idx_t;
// the link strcuture
HNSW hnsw;
// the sequential storage
bool own_fields;
Index *storage;
ReconstructFromNeighbors *reconstruct_from_neighbors;
explicit IndexHNSW (int d = 0, int M = 32);
explicit IndexHNSW (Index *storage, int M = 32);
~IndexHNSW() override;
void add(idx_t n, const float *x) override;
/// Trains the storage if needed
void train(idx_t n, const float* x) override;
/// entry point for search
void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
void reconstruct(idx_t key, float* recons) const override;
void reset () override;
void shrink_level_0_neighbors(int size);
/** Perform search only on level 0, given the starting points for
* each vertex.
*
* @param search_type 1:perform one search per nprobe, 2: enqueue
* all entry points
*/
void search_level_0(idx_t n, const float *x, idx_t k,
const storage_idx_t *nearest, const float *nearest_d,
float *distances, idx_t *labels, int nprobe = 1,
int search_type = 1) const;
/// alternative graph building
void init_level_0_from_knngraph(
int k, const float *D, const idx_t *I);
/// alternative graph building
void init_level_0_from_entry_points(
int npt, const storage_idx_t *points,
const storage_idx_t *nearests);
// reorder links from nearest to farthest
void reorder_links();
void link_singletons();
};
/** Flat index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWFlat : IndexHNSW {
IndexHNSWFlat();
IndexHNSWFlat(int d, int M);
};
/** PQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWPQ : IndexHNSW {
IndexHNSWPQ();
IndexHNSWPQ(int d, int pq_m, int M);
void train(idx_t n, const float* x) override;
};
/** SQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWSQ : IndexHNSW {
IndexHNSWSQ();
IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
};
/** 2-level code structure with fast random access
*/
struct IndexHNSW2Level : IndexHNSW {
IndexHNSW2Level();
IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
void flip_to_ivf();
/// entry point for search
void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
};
} // namespace faiss

View File

@ -0,0 +1,966 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVF.h>
#include <omp.h>
#include <cstdio>
#include <memory>
#include <iostream>
#include <faiss/utils/utils.h>
#include <faiss/utils/hamming.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/IndexFlat.h>
#include <faiss/impl/AuxIndexStructures.h>
namespace faiss {
using ScopedIds = InvertedLists::ScopedIds;
using ScopedCodes = InvertedLists::ScopedCodes;
/*****************************************
* Level1Quantizer implementation
******************************************/
Level1Quantizer::Level1Quantizer (Index * quantizer, size_t nlist):
quantizer (quantizer),
nlist (nlist),
quantizer_trains_alone (0),
own_fields (false),
clustering_index (nullptr)
{
// here we set a low # iterations because this is typically used
// for large clusterings (nb this is not used for the MultiIndex,
// for which quantizer_trains_alone = true)
cp.niter = 10;
}
Level1Quantizer::Level1Quantizer ():
quantizer (nullptr),
nlist (0),
quantizer_trains_alone (0), own_fields (false),
clustering_index (nullptr)
{}
Level1Quantizer::~Level1Quantizer ()
{
if (own_fields) {
if(quantizer == quantizer_backup) {
if(quantizer != nullptr) {
delete quantizer;
}
} else {
if(quantizer != nullptr) {
delete quantizer;
}
if(quantizer_backup != nullptr) {
delete quantizer_backup;
}
}
quantizer = nullptr;
quantizer_backup = nullptr;
}
}
void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricType metric_type)
{
size_t d = quantizer->d;
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (verbose)
printf ("IVF quantizer does not need training.\n");
} else if (quantizer_trains_alone == 1) {
if (verbose)
printf ("IVF quantizer trains alone...\n");
quantizer->train (n, x);
quantizer->verbose = verbose;
FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
"nlist not consistent with quantizer size");
} else if (quantizer_trains_alone == 0) {
if (verbose)
printf ("Training level-1 quantizer on %ld vectors in %ldD\n",
n, d);
Clustering clus (d, nlist, cp);
quantizer->reset();
if (clustering_index) {
clus.train (n, x, *clustering_index);
quantizer->add (nlist, clus.centroids.data());
} else {
clus.train (n, x, *quantizer);
}
quantizer->is_trained = true;
} else if (quantizer_trains_alone == 2) {
if (verbose)
printf (
"Training L2 quantizer on %ld vectors in %ldD%s\n",
n, d,
clustering_index ? "(user provided index)" : "");
FAISS_THROW_IF_NOT (metric_type == METRIC_L2);
Clustering clus (d, nlist, cp);
if (!clustering_index) {
IndexFlatL2 assigner (d);
clus.train(n, x, assigner);
} else {
clus.train(n, x, *clustering_index);
}
if (verbose)
printf ("Adding centroids to quantizer\n");
quantizer->add (nlist, clus.centroids.data());
}
}
size_t Level1Quantizer::coarse_code_size () const
{
size_t nl = nlist - 1;
size_t nbyte = 0;
while (nl > 0) {
nbyte ++;
nl >>= 8;
}
return nbyte;
}
void Level1Quantizer::encode_listno (Index::idx_t list_no, uint8_t *code) const
{
// little endian
size_t nl = nlist - 1;
while (nl > 0) {
*code++ = list_no & 0xff;
list_no >>= 8;
nl >>= 8;
}
}
Index::idx_t Level1Quantizer::decode_listno (const uint8_t *code) const
{
size_t nl = nlist - 1;
int64_t list_no = 0;
int nbit = 0;
while (nl > 0) {
list_no |= int64_t(*code++) << nbit;
nbit += 8;
nl >>= 8;
}
FAISS_THROW_IF_NOT (list_no >= 0 && list_no < nlist);
return list_no;
}
/*****************************************
* IndexIVF implementation
******************************************/
IndexIVF::IndexIVF (Index * quantizer, size_t d,
size_t nlist, size_t code_size,
MetricType metric):
Index (d, metric),
Level1Quantizer (quantizer, nlist),
invlists (new ArrayInvertedLists (nlist, code_size)),
own_invlists (true),
code_size (code_size),
nprobe (1),
max_codes (0),
parallel_mode (0),
maintain_direct_map (false)
{
FAISS_THROW_IF_NOT (d == quantizer->d);
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
// Spherical by default if the metric is inner_product
if (metric_type == METRIC_INNER_PRODUCT) {
cp.spherical = true;
}
}
IndexIVF::IndexIVF ():
invlists (nullptr), own_invlists (false),
code_size (0),
nprobe (1), max_codes (0), parallel_mode (0),
maintain_direct_map (false)
{}
void IndexIVF::add (idx_t n, const float * x)
{
add_with_ids (n, x, nullptr);
}
void IndexIVF::add_with_ids (idx_t n, const float * x, const idx_t *xids)
{
// do some blocking to avoid excessive allocs
idx_t bs = 65536;
if (n > bs) {
for (idx_t i0 = 0; i0 < n; i0 += bs) {
idx_t i1 = std::min (n, i0 + bs);
if (verbose) {
printf(" IndexIVF::add_with_ids %ld:%ld\n", i0, i1);
}
add_with_ids (i1 - i0, x + i0 * d,
xids ? xids + i0 : nullptr);
}
return;
}
FAISS_THROW_IF_NOT (is_trained);
std::unique_ptr<idx_t []> idx(new idx_t[n]);
quantizer->assign (n, x, idx.get());
size_t nadd = 0, nminus1 = 0;
for (size_t i = 0; i < n; i++) {
if (idx[i] < 0) nminus1++;
}
std::unique_ptr<uint8_t []> flat_codes(new uint8_t [n * code_size]);
encode_vectors (n, x, idx.get(), flat_codes.get());
#pragma omp parallel reduction(+: nadd)
{
int nt = omp_get_num_threads();
int rank = omp_get_thread_num();
// each thread takes care of a subset of lists
for (size_t i = 0; i < n; i++) {
idx_t list_no = idx [i];
if (list_no >= 0 && list_no % nt == rank) {
idx_t id = xids ? xids[i] : ntotal + i;
invlists->add_entry (list_no, id,
flat_codes.get() + i * code_size);
nadd++;
}
}
}
if (verbose) {
printf(" added %ld / %ld vectors (%ld -1s)\n", nadd, n, nminus1);
}
ntotal += n;
}
void IndexIVF::to_readonly() {
if (is_readonly()) return;
auto readonly_lists = this->invlists->to_readonly();
if (!readonly_lists) return;
this->replace_invlists(readonly_lists, true);
}
bool IndexIVF::is_readonly() const {
return this->invlists->is_readonly();
}
void IndexIVF::backup_quantizer() {
this->quantizer_backup = quantizer;
}
void IndexIVF::restore_quantizer() {
if(this->quantizer_backup != nullptr) {
quantizer = this->quantizer_backup;
}
}
void IndexIVF::make_direct_map (bool new_maintain_direct_map)
{
// nothing to do
if (new_maintain_direct_map == maintain_direct_map)
return;
if (new_maintain_direct_map) {
direct_map.resize (ntotal, -1);
for (size_t key = 0; key < nlist; key++) {
size_t list_size = invlists->list_size (key);
ScopedIds idlist (invlists, key);
for (long ofs = 0; ofs < list_size; ofs++) {
FAISS_THROW_IF_NOT_MSG (
0 <= idlist [ofs] && idlist[ofs] < ntotal,
"direct map supported only for seuquential ids");
direct_map [idlist [ofs]] = key << 32 | ofs;
}
}
} else {
direct_map.clear ();
}
maintain_direct_map = new_maintain_direct_map;
}
void IndexIVF::search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const
{
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
double t0 = getmillisecs();
quantizer->search (n, x, nprobe, coarse_dis.get(), idx.get());
indexIVF_stats.quantization_time += getmillisecs() - t0;
t0 = getmillisecs();
invlists->prefetch_lists (idx.get(), n * nprobe);
search_preassigned (n, x, k, idx.get(), coarse_dis.get(),
distances, labels, false);
indexIVF_stats.search_time += getmillisecs() - t0;
}
void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *keys,
const float *coarse_dis ,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params) const
{
long nprobe = params ? params->nprobe : this->nprobe;
long max_codes = params ? params->max_codes : this->max_codes;
size_t nlistv = 0, ndis = 0, nheap = 0;
using HeapForIP = CMin<float, idx_t>;
using HeapForL2 = CMax<float, idx_t>;
bool interrupt = false;
// don't start parallel section if single query
bool do_parallel =
parallel_mode == 0 ? n > 1 :
parallel_mode == 1 ? nprobe > 1 :
nprobe * n > 1;
#pragma omp parallel if(do_parallel) reduction(+: nlistv, ndis, nheap)
{
InvertedListScanner *scanner = get_InvertedListScanner(store_pairs);
ScopeDeleter1<InvertedListScanner> del(scanner);
/*****************************************************
* Depending on parallel_mode, there are two possible ways
* to organize the search. Here we define local functions
* that are in common between the two
******************************************************/
// intialize + reorder a result heap
auto init_result = [&](float *simi, idx_t *idxi) {
if (metric_type == METRIC_INNER_PRODUCT) {
heap_heapify<HeapForIP> (k, simi, idxi);
} else {
heap_heapify<HeapForL2> (k, simi, idxi);
}
};
auto reorder_result = [&] (float *simi, idx_t *idxi) {
if (metric_type == METRIC_INNER_PRODUCT) {
heap_reorder<HeapForIP> (k, simi, idxi);
} else {
heap_reorder<HeapForL2> (k, simi, idxi);
}
};
// single list scan using the current scanner (with query
// set porperly) and storing results in simi and idxi
auto scan_one_list = [&] (idx_t key, float coarse_dis_i,
float *simi, idx_t *idxi) {
if (key < 0) {
// not enough centroids for multiprobe
return (size_t)0;
}
FAISS_THROW_IF_NOT_FMT (key < (idx_t) nlist,
"Invalid key=%ld nlist=%ld\n",
key, nlist);
size_t list_size = invlists->list_size(key);
// don't waste time on empty lists
if (list_size == 0) {
return (size_t)0;
}
scanner->set_list (key, coarse_dis_i);
nlistv++;
InvertedLists::ScopedCodes scodes (invlists, key);
std::unique_ptr<InvertedLists::ScopedIds> sids;
const Index::idx_t * ids = nullptr;
if (!store_pairs) {
sids.reset (new InvertedLists::ScopedIds (invlists, key));
ids = sids->get();
}
nheap += scanner->scan_codes (list_size, scodes.get(),
ids, simi, idxi, k);
return list_size;
};
/****************************************************
* Actual loops, depending on parallel_mode
****************************************************/
if (parallel_mode == 0) {
#pragma omp for
for (size_t i = 0; i < n; i++) {
if (interrupt) {
continue;
}
// loop over queries
scanner->set_query (x + i * d);
float * simi = distances + i * k;
idx_t * idxi = labels + i * k;
init_result (simi, idxi);
long nscan = 0;
// loop over probes
for (size_t ik = 0; ik < nprobe; ik++) {
nscan += scan_one_list (
keys [i * nprobe + ik],
coarse_dis[i * nprobe + ik],
simi, idxi
);
if (max_codes && nscan >= max_codes) {
break;
}
}
ndis += nscan;
reorder_result (simi, idxi);
if (InterruptCallback::is_interrupted ()) {
interrupt = true;
}
} // parallel for
} else if (parallel_mode == 1) {
std::vector <idx_t> local_idx (k);
std::vector <float> local_dis (k);
for (size_t i = 0; i < n; i++) {
scanner->set_query (x + i * d);
init_result (local_dis.data(), local_idx.data());
#pragma omp for schedule(dynamic)
for (size_t ik = 0; ik < nprobe; ik++) {
ndis += scan_one_list
(keys [i * nprobe + ik],
coarse_dis[i * nprobe + ik],
local_dis.data(), local_idx.data());
// can't do the test on max_codes
}
// merge thread-local results
float * simi = distances + i * k;
idx_t * idxi = labels + i * k;
#pragma omp single
init_result (simi, idxi);
#pragma omp barrier
#pragma omp critical
{
if (metric_type == METRIC_INNER_PRODUCT) {
heap_addn<HeapForIP>
(k, simi, idxi,
local_dis.data(), local_idx.data(), k);
} else {
heap_addn<HeapForL2>
(k, simi, idxi,
local_dis.data(), local_idx.data(), k);
}
}
#pragma omp barrier
#pragma omp single
reorder_result (simi, idxi);
}
} else {
FAISS_THROW_FMT ("parallel_mode %d not supported\n",
parallel_mode);
}
} // parallel section
if (interrupt) {
FAISS_THROW_MSG ("computation interrupted");
}
indexIVF_stats.nq += n;
indexIVF_stats.nlist += nlistv;
indexIVF_stats.ndis += ndis;
indexIVF_stats.nheap_updates += nheap;
}
void IndexIVF::range_search (idx_t nx, const float *x, float radius,
RangeSearchResult *result) const
{
std::unique_ptr<idx_t[]> keys (new idx_t[nx * nprobe]);
std::unique_ptr<float []> coarse_dis (new float[nx * nprobe]);
double t0 = getmillisecs();
quantizer->search (nx, x, nprobe, coarse_dis.get (), keys.get ());
indexIVF_stats.quantization_time += getmillisecs() - t0;
t0 = getmillisecs();
invlists->prefetch_lists (keys.get(), nx * nprobe);
range_search_preassigned (nx, x, radius, keys.get (), coarse_dis.get (),
result);
indexIVF_stats.search_time += getmillisecs() - t0;
}
void IndexIVF::range_search_preassigned (
idx_t nx, const float *x, float radius,
const idx_t *keys, const float *coarse_dis,
RangeSearchResult *result) const
{
size_t nlistv = 0, ndis = 0;
bool store_pairs = false;
std::vector<RangeSearchPartialResult *> all_pres (omp_get_max_threads());
#pragma omp parallel reduction(+: nlistv, ndis)
{
RangeSearchPartialResult pres(result);
std::unique_ptr<InvertedListScanner> scanner
(get_InvertedListScanner(store_pairs));
FAISS_THROW_IF_NOT (scanner.get ());
all_pres[omp_get_thread_num()] = &pres;
// prepare the list scanning function
auto scan_list_func = [&](size_t i, size_t ik, RangeQueryResult &qres) {
idx_t key = keys[i * nprobe + ik]; /* select the list */
if (key < 0) return;
FAISS_THROW_IF_NOT_FMT (
key < (idx_t) nlist,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
key, ik, nlist);
const size_t list_size = invlists->list_size(key);
if (list_size == 0) return;
InvertedLists::ScopedCodes scodes (invlists, key);
InvertedLists::ScopedIds ids (invlists, key);
scanner->set_list (key, coarse_dis[i * nprobe + ik]);
nlistv++;
ndis += list_size;
scanner->scan_codes_range (list_size, scodes.get(),
ids.get(), radius, qres);
};
if (parallel_mode == 0) {
#pragma omp for
for (size_t i = 0; i < nx; i++) {
scanner->set_query (x + i * d);
RangeQueryResult & qres = pres.new_result (i);
for (size_t ik = 0; ik < nprobe; ik++) {
scan_list_func (i, ik, qres);
}
}
} else if (parallel_mode == 1) {
for (size_t i = 0; i < nx; i++) {
scanner->set_query (x + i * d);
RangeQueryResult & qres = pres.new_result (i);
#pragma omp for schedule(dynamic)
for (size_t ik = 0; ik < nprobe; ik++) {
scan_list_func (i, ik, qres);
}
}
} else if (parallel_mode == 2) {
std::vector<RangeQueryResult *> all_qres (nx);
RangeQueryResult *qres = nullptr;
#pragma omp for schedule(dynamic)
for (size_t iik = 0; iik < nx * nprobe; iik++) {
size_t i = iik / nprobe;
size_t ik = iik % nprobe;
if (qres == nullptr || qres->qno != i) {
FAISS_ASSERT (!qres || i > qres->qno);
qres = &pres.new_result (i);
scanner->set_query (x + i * d);
}
scan_list_func (i, ik, *qres);
}
} else {
FAISS_THROW_FMT ("parallel_mode %d not supported\n", parallel_mode);
}
if (parallel_mode == 0) {
pres.finalize ();
} else {
#pragma omp barrier
#pragma omp single
RangeSearchPartialResult::merge (all_pres, false);
#pragma omp barrier
}
}
indexIVF_stats.nq += nx;
indexIVF_stats.nlist += nlistv;
indexIVF_stats.ndis += ndis;
}
InvertedListScanner *IndexIVF::get_InvertedListScanner (
bool /*store_pairs*/) const
{
return nullptr;
}
void IndexIVF::reconstruct (idx_t key, float* recons) const
{
FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
"direct map is not initialized");
FAISS_THROW_IF_NOT_MSG (key >= 0 && key < direct_map.size(),
"invalid key");
idx_t list_no = direct_map[key] >> 32;
idx_t offset = direct_map[key] & 0xffffffff;
reconstruct_from_offset (list_no, offset, recons);
}
void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const
{
FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
for (idx_t list_no = 0; list_no < nlist; list_no++) {
size_t list_size = invlists->list_size (list_no);
ScopedIds idlist (invlists, list_no);
for (idx_t offset = 0; offset < list_size; offset++) {
idx_t id = idlist[offset];
if (!(id >= i0 && id < i0 + ni)) {
continue;
}
float* reconstructed = recons + (id - i0) * d;
reconstruct_from_offset (list_no, offset, reconstructed);
}
}
}
/* standalone codec interface */
size_t IndexIVF::sa_code_size () const
{
size_t coarse_size = coarse_code_size();
return code_size + coarse_size;
}
void IndexIVF::sa_encode (idx_t n, const float *x,
uint8_t *bytes) const
{
FAISS_THROW_IF_NOT (is_trained);
std::unique_ptr<int64_t []> idx (new int64_t [n]);
quantizer->assign (n, x, idx.get());
encode_vectors (n, x, idx.get(), bytes, true);
}
void IndexIVF::search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const
{
idx_t * idx = new idx_t [n * nprobe];
ScopeDeleter<idx_t> del (idx);
float * coarse_dis = new float [n * nprobe];
ScopeDeleter<float> del2 (coarse_dis);
quantizer->search (n, x, nprobe, coarse_dis, idx);
invlists->prefetch_lists (idx, n * nprobe);
// search_preassigned() with `store_pairs` enabled to obtain the list_no
// and offset into `codes` for reconstruction
search_preassigned (n, x, k, idx, coarse_dis,
distances, labels, true /* store_pairs */);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
float* reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
int list_no = key >> 32;
int offset = key & 0xffffffff;
// Update label to the actual id
labels[ij] = invlists->get_single_id (list_no, offset);
reconstruct_from_offset (list_no, offset, reconstructed);
}
}
}
}
void IndexIVF::reconstruct_from_offset(
int64_t /*list_no*/,
int64_t /*offset*/,
float* /*recons*/) const {
FAISS_THROW_MSG ("reconstruct_from_offset not implemented");
}
void IndexIVF::reset ()
{
direct_map.clear ();
invlists->reset ();
ntotal = 0;
}
size_t IndexIVF::remove_ids (const IDSelector & sel)
{
FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
"direct map remove not implemented");
std::vector<idx_t> toremove(nlist);
#pragma omp parallel for
for (idx_t i = 0; i < nlist; i++) {
idx_t l0 = invlists->list_size (i), l = l0, j = 0;
ScopedIds idsi (invlists, i);
while (j < l) {
if (sel.is_member (idsi[j])) {
l--;
invlists->update_entry (
i, j,
invlists->get_single_id (i, l),
ScopedCodes (invlists, i, l).get());
} else {
j++;
}
}
toremove[i] = l0 - l;
}
// this will not run well in parallel on ondisk because of possible shrinks
size_t nremove = 0;
for (idx_t i = 0; i < nlist; i++) {
if (toremove[i] > 0) {
nremove += toremove[i];
invlists->resize(
i, invlists->list_size(i) - toremove[i]);
}
}
ntotal -= nremove;
return nremove;
}
void IndexIVF::train (idx_t n, const float *x)
{
if (verbose)
printf ("Training level-1 quantizer\n");
train_q1 (n, x, verbose, metric_type);
if (verbose)
printf ("Training IVF residual\n");
train_residual (n, x);
is_trained = true;
}
void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
if (verbose)
printf("IndexIVF: no residual training\n");
// does nothing by default
}
void IndexIVF::check_compatible_for_merge (const IndexIVF &other) const
{
// minimal sanity checks
FAISS_THROW_IF_NOT (other.d == d);
FAISS_THROW_IF_NOT (other.nlist == nlist);
FAISS_THROW_IF_NOT (other.code_size == code_size);
FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other),
"can only merge indexes of the same type");
}
void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
{
check_compatible_for_merge (other);
FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
!other.maintain_direct_map),
"direct map copy not implemented");
invlists->merge_from (other.invlists, add_id);
ntotal += other.ntotal;
other.ntotal = 0;
}
void IndexIVF::replace_invlists (InvertedLists *il, bool own)
{
if (own_invlists) {
delete invlists;
}
// FAISS_THROW_IF_NOT (ntotal == 0);
if (il) {
FAISS_THROW_IF_NOT (il->nlist == nlist &&
il->code_size == code_size);
}
invlists = il;
own_invlists = own;
}
void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
idx_t a1, idx_t a2) const
{
FAISS_THROW_IF_NOT (nlist == other.nlist);
FAISS_THROW_IF_NOT (code_size == other.code_size);
FAISS_THROW_IF_NOT (!other.maintain_direct_map);
FAISS_THROW_IF_NOT_FMT (
subset_type == 0 || subset_type == 1 || subset_type == 2,
"subset type %d not implemented", subset_type);
size_t accu_n = 0;
size_t accu_a1 = 0;
size_t accu_a2 = 0;
InvertedLists *oivf = other.invlists;
for (idx_t list_no = 0; list_no < nlist; list_no++) {
size_t n = invlists->list_size (list_no);
ScopedIds ids_in (invlists, list_no);
if (subset_type == 0) {
for (idx_t i = 0; i < n; i++) {
idx_t id = ids_in[i];
if (a1 <= id && id < a2) {
oivf->add_entry (list_no,
invlists->get_single_id (list_no, i),
ScopedCodes (invlists, list_no, i).get());
other.ntotal++;
}
}
} else if (subset_type == 1) {
for (idx_t i = 0; i < n; i++) {
idx_t id = ids_in[i];
if (id % a1 == a2) {
oivf->add_entry (list_no,
invlists->get_single_id (list_no, i),
ScopedCodes (invlists, list_no, i).get());
other.ntotal++;
}
}
} else if (subset_type == 2) {
// see what is allocated to a1 and to a2
size_t next_accu_n = accu_n + n;
size_t next_accu_a1 = next_accu_n * a1 / ntotal;
size_t i1 = next_accu_a1 - accu_a1;
size_t next_accu_a2 = next_accu_n * a2 / ntotal;
size_t i2 = next_accu_a2 - accu_a2;
for (idx_t i = i1; i < i2; i++) {
oivf->add_entry (list_no,
invlists->get_single_id (list_no, i),
ScopedCodes (invlists, list_no, i).get());
}
other.ntotal += i2 - i1;
accu_a1 = next_accu_a1;
accu_a2 = next_accu_a2;
}
accu_n += n;
}
FAISS_ASSERT(accu_n == ntotal);
}
void
IndexIVF::dump() {
for (auto i = 0; i < invlists->nlist; ++ i) {
auto numVecs = invlists->list_size(i);
auto ids = invlists->get_ids(i);
auto codes = invlists->get_codes(i);
int code_size = invlists->code_size;
std::cout << "Bucket ID: " << i << ", with code size: " << code_size << ", vectors number: " << numVecs << std::endl;
if(code_size == 8) {
// int8 types
for (auto j=0; j < numVecs; ++j) {
std::cout << *(ids+j) << ": " << std::endl;
for(int k = 0; k < this->d; ++ k) {
printf("%u ", (uint8_t)(codes[j * d + k]));
}
std::cout << std::endl;
}
}
std::cout << "Bucket End." << std::endl;
}
}
IndexIVF::~IndexIVF()
{
if (own_invlists) {
delete invlists;
}
}
void IndexIVFStats::reset()
{
memset ((void*)this, 0, sizeof (*this));
}
IndexIVFStats indexIVF_stats;
void InvertedListScanner::scan_codes_range (size_t ,
const uint8_t *,
const idx_t *,
float ,
RangeQueryResult &) const
{
FAISS_THROW_MSG ("scan_codes_range not implemented");
}
} // namespace faiss

View File

@ -0,0 +1,363 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVF_H
#define FAISS_INDEX_IVF_H
#include <vector>
#include <stdint.h>
#include <faiss/Index.h>
#include <faiss/InvertedLists.h>
#include <faiss/Clustering.h>
#include <faiss/utils/Heap.h>
namespace faiss {
/** Encapsulates a quantizer object for the IndexIVF
*
* The class isolates the fields that are independent of the storage
* of the lists (especially training)
*/
struct Level1Quantizer {
Index * quantizer = nullptr; ///< quantizer that maps vectors to inverted lists
Index * quantizer_backup = nullptr; ///< quantizer for backup
size_t nlist; ///< number of possible key values
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
char quantizer_trains_alone;
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
Index *clustering_index; ///< to override index used during clustering
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train_q1 (size_t n, const float *x, bool verbose,
MetricType metric_type);
/// compute the number of bytes required to store list ids
size_t coarse_code_size () const;
void encode_listno (Index::idx_t list_no, uint8_t *code) const;
Index::idx_t decode_listno (const uint8_t *code) const;
Level1Quantizer (Index * quantizer, size_t nlist);
Level1Quantizer ();
~Level1Quantizer ();
};
struct IVFSearchParameters {
size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
virtual ~IVFSearchParameters () {}
};
struct InvertedListScanner;
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an Index instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is stored.
*
* The inverted list object is required only after trainng. If none is
* set externally, an ArrayInvertedLists is used automatically.
*
* At search time, the vector to be searched is also quantized, and
* only the list corresponding to the quantization index is
* searched. This speeds up the search by making it
* non-exhaustive. This can be relaxed using multi-probe search: a few
* (nprobe) quantization indices are selected and several inverted
* lists are visited.
*
* Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to databse vectors.
*/
struct IndexIVF: Index, Level1Quantizer {
/// Acess to the actual data
InvertedLists *invlists;
bool own_invlists;
size_t code_size; ///< code size per vector in bytes
size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
/** Parallel mode determines how queries are parallelized with OpenMP
*
* 0 (default): parallelize over queries
* 1: parallelize over over inverted lists
* 2: parallelize over both
*/
int parallel_mode;
/// map for direct access to the elements. Enables reconstruct().
bool maintain_direct_map;
std::vector <idx_t> direct_map;
/** The Inverted file takes a quantizer (an Index) on input,
* which implements the function mapping a vector to a list
* identifier. The pointer is borrowed: the quantizer should not
* be deleted while the IndexIVF is in use.
*/
IndexIVF (Index * quantizer, size_t d,
size_t nlist, size_t code_size,
MetricType metric = METRIC_L2);
void reset() override;
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train(idx_t n, const float* x) override;
/// Calls add_with_ids with NULL ids
void add(idx_t n, const float* x) override;
/// default implementation that calls encode_vectors
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
/** Encodes a set of vectors as they would appear in the inverted lists
*
* @param list_nos inverted list ids as returned by the
* quantizer (size n). -1s are ignored.
* @param codes output codes, size n * code_size
* @param include_listno
* include the list ids in the code (in this case add
* ceil(log8(nlist)) to the code size)
*/
virtual void encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listno = false) const = 0;
/// Sub-classes that encode the residuals can train their encoders here
/// does nothing by default
virtual void train_residual (idx_t n, const float *x);
/** search a set of vectors, that are pre-quantized by the IVF
* quantizer. Fill in the corresponding heaps with the query
* results. The default implementation uses InvertedListScanners
* to do the search.
*
* @param n nb of vectors to query
* @param x query vectors, size nx * d
* @param assign coarse quantization indices, size nx * nprobe
* @param centroid_dis
* distances to coarse centroids, size nx * nprobe
* @param distance
* output distances, size n * k
* @param labels output labels, size n * k
* @param store_pairs store inv list index + inv list offset
* instead in upper/lower 32 bit of result,
* instead of ids (used for reranking).
* @param params used to override the object's search parameters
*/
virtual void search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const;
/** assign the vectors, then call search_preassign */
void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
void range_search (idx_t n, const float* x, float radius,
RangeSearchResult* result) const override;
void range_search_preassigned(idx_t nx, const float *x, float radius,
const idx_t *keys, const float *coarse_dis,
RangeSearchResult *result) const;
/// get a scanner for this index (store_pairs means ignore labels)
virtual InvertedListScanner *get_InvertedListScanner (
bool store_pairs=false) const;
void reconstruct (idx_t key, float* recons) const override;
/** Reconstruct a subset of the indexed vectors.
*
* Overrides default implementation to bypass reconstruct() which requires
* direct_map to be maintained.
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d
*/
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* Overrides default implementation to avoid having to maintain direct_map
* and instead fetch the code offsets through the `store_pairs` flag in
* search_preassigned().
*
* @param recons reconstructed vectors size (n, k, d)
*/
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const override;
/** Reconstruct a vector given the location in terms of (inv list index +
* inv list offset) instead of the id.
*
* Useful for reconstructing when the direct_map is not maintained and
* the inv list offset is computed by search_preassigned() with
* `store_pairs` set.
*/
virtual void reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const;
/// Dataset manipulation functions
size_t remove_ids(const IDSelector& sel) override;
/** check that the two indexes are compatible (ie, they are
* trained in the same way and have the same
* parameters). Otherwise throw. */
void check_compatible_for_merge (const IndexIVF &other) const;
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
virtual void merge_from (IndexIVF &other, idx_t add_id);
/** copy a subset of the entries index to the other index
*
* if subset_type == 0: copies ids in [a1, a2)
* if subset_type == 1: copies ids if id % a1 == a2
* if subset_type == 2: copies inverted lists such that a1
* elements are left before and a2 elements are after
*/
virtual void copy_subset_to (IndexIVF & other, int subset_type,
idx_t a1, idx_t a2) const;
virtual void to_readonly();
virtual bool is_readonly() const;
virtual void backup_quantizer();
virtual void restore_quantizer();
~IndexIVF() override;
size_t get_list_size (size_t list_no) const
{ return invlists->list_size(list_no); }
/** intialize a direct map
*
* @param new_maintain_direct_map if true, create a direct map,
* else clear it
*/
void make_direct_map (bool new_maintain_direct_map=true);
/// replace the inverted lists, old one is deallocated if own_invlists
void replace_invlists (InvertedLists *il, bool own=false);
/* The standalone codec interface (except sa_decode that is specific) */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const override;
void dump();
IndexIVF ();
};
struct RangeQueryResult;
/** Object that handles a query. The inverted lists to scan are
* provided externally. The object has a lot of state, but
* distance_to_code and scan_codes can be called in multiple
* threads */
struct InvertedListScanner {
using idx_t = Index::idx_t;
/// from now on we handle this query.
virtual void set_query (const float *query_vector) = 0;
/// following codes come from this inverted list
virtual void set_list (idx_t list_no, float coarse_dis) = 0;
/// compute a single query-to-code distance
virtual float distance_to_code (const uint8_t *code) const = 0;
/** scan a set of codes, compute distances to current query and
* update heap of results if necessary.
*
* @param n number of codes to scan
* @param codes codes to scan (n * code_size)
* @param ids corresponding ids (ignored if store_pairs)
* @param distances heap distances (size k)
* @param labels heap labels (size k)
* @param k heap size
* @return number of heap updates performed
*/
virtual size_t scan_codes (size_t n,
const uint8_t *codes,
const idx_t *ids,
float *distances, idx_t *labels,
size_t k) const = 0;
/** scan a set of codes, compute distances to current query and
* update results if distances are below radius
*
* (default implementation fails) */
virtual void scan_codes_range (size_t n,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult &result) const;
virtual ~InvertedListScanner () {}
};
struct IndexIVFStats {
size_t nq; // nb of queries run
size_t nlist; // nb of inverted lists scanned
size_t ndis; // nb of distancs computed
size_t nheap_updates; // nb of times the heap was updated
double quantization_time; // time spent quantizing vectors (in ms)
double search_time; // time spent searching lists (in ms)
IndexIVFStats () {reset (); }
void reset ();
};
// global var that collects them all
extern IndexIVFStats indexIVF_stats;
} // namespace faiss
#endif

View File

@ -0,0 +1,502 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVFFlat.h>
#include <cstdio>
#include <faiss/IndexFlat.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
namespace faiss {
/*****************************************
* IndexIVFFlat implementation
******************************************/
IndexIVFFlat::IndexIVFFlat (Index * quantizer,
size_t d, size_t nlist, MetricType metric):
IndexIVF (quantizer, d, nlist, sizeof(float) * d, metric)
{
code_size = sizeof(float) * d;
}
void IndexIVFFlat::add_with_ids (idx_t n, const float * x, const idx_t *xids)
{
add_core (n, x, xids, nullptr);
}
void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
const int64_t *precomputed_idx)
{
FAISS_THROW_IF_NOT (is_trained);
assert (invlists);
FAISS_THROW_IF_NOT_MSG (!(maintain_direct_map && xids),
"cannot have direct map and add with ids");
const int64_t * idx;
ScopeDeleter<int64_t> del;
if (precomputed_idx) {
idx = precomputed_idx;
} else {
int64_t * idx0 = new int64_t [n];
del.set (idx0);
quantizer->assign (n, x, idx0);
idx = idx0;
}
int64_t n_add = 0;
for (size_t i = 0; i < n; i++) {
int64_t id = xids ? xids[i] : ntotal + i;
int64_t list_no = idx [i];
if (list_no < 0)
continue;
const float *xi = x + i * d;
size_t offset = invlists->add_entry (
list_no, id, (const uint8_t*) xi);
if (maintain_direct_map)
direct_map.push_back (list_no << 32 | offset);
n_add++;
}
if (verbose) {
printf("IndexIVFFlat::add_core: added %ld / %ld vectors\n",
n_add, n);
}
ntotal += n;
}
void IndexIVFFlat::encode_vectors(idx_t n, const float* x,
const idx_t * list_nos,
uint8_t * codes,
bool include_listnos) const
{
if (!include_listnos) {
memcpy (codes, x, code_size * n);
} else {
size_t coarse_size = coarse_code_size ();
for (size_t i = 0; i < n; i++) {
int64_t list_no = list_nos [i];
uint8_t *code = codes + i * (code_size + coarse_size);
const float *xi = x + i * d;
if (list_no >= 0) {
encode_listno (list_no, code);
memcpy (code + coarse_size, xi, code_size);
} else {
memset (code, 0, code_size + coarse_size);
}
}
}
}
void IndexIVFFlat::sa_decode (idx_t n, const uint8_t *bytes,
float *x) const
{
size_t coarse_size = coarse_code_size ();
for (size_t i = 0; i < n; i++) {
const uint8_t *code = bytes + i * (code_size + coarse_size);
float *xi = x + i * d;
memcpy (xi, code + coarse_size, code_size);
}
}
namespace {
template<MetricType metric, class C>
struct IVFFlatScanner: InvertedListScanner {
size_t d;
bool store_pairs;
IVFFlatScanner(size_t d, bool store_pairs):
d(d), store_pairs(store_pairs) {}
const float *xi;
void set_query (const float *query) override {
this->xi = query;
}
idx_t list_no;
void set_list (idx_t list_no, float /* coarse_dis */) override {
this->list_no = list_no;
}
float distance_to_code (const uint8_t *code) const override {
const float *yj = (float*)code;
float dis = metric == METRIC_INNER_PRODUCT ?
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
return dis;
}
size_t scan_codes (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float *simi, idx_t *idxi,
size_t k) const override
{
const float *list_vecs = (const float*)codes;
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
const float * yj = list_vecs + d * j;
float dis = metric == METRIC_INNER_PRODUCT ?
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
if (C::cmp (simi[0], dis)) {
heap_pop<C> (k, simi, idxi);
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
heap_push<C> (k, simi, idxi, dis, id);
nup++;
}
}
return nup;
}
void scan_codes_range (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult & res) const override
{
const float *list_vecs = (const float*)codes;
for (size_t j = 0; j < list_size; j++) {
const float * yj = list_vecs + d * j;
float dis = metric == METRIC_INNER_PRODUCT ?
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
if (C::cmp (radius, dis)) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
res.add (dis, id);
}
}
}
};
} // anonymous namespace
InvertedListScanner* IndexIVFFlat::get_InvertedListScanner
(bool store_pairs) const
{
if (metric_type == METRIC_INNER_PRODUCT) {
return new IVFFlatScanner<
METRIC_INNER_PRODUCT, CMin<float, int64_t> > (d, store_pairs);
} else if (metric_type == METRIC_L2) {
return new IVFFlatScanner<
METRIC_L2, CMax<float, int64_t> >(d, store_pairs);
} else {
FAISS_THROW_MSG("metric type not supported");
}
return nullptr;
}
void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
{
FAISS_THROW_IF_NOT (maintain_direct_map);
FAISS_THROW_IF_NOT (is_trained);
std::vector<idx_t> assign (n);
quantizer->assign (n, x, assign.data());
for (size_t i = 0; i < n; i++) {
idx_t id = new_ids[i];
FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
"id to update out of range");
{ // remove old one
int64_t dm = direct_map[id];
int64_t ofs = dm & 0xffffffff;
int64_t il = dm >> 32;
size_t l = invlists->list_size (il);
if (ofs != l - 1) { // move l - 1 to ofs
int64_t id2 = invlists->get_single_id (il, l - 1);
direct_map[id2] = (il << 32) | ofs;
invlists->update_entry (il, ofs, id2,
invlists->get_single_code (il, l - 1));
}
invlists->resize (il, l - 1);
}
{ // insert new one
int64_t il = assign[i];
size_t l = invlists->list_size (il);
int64_t dm = (il << 32) | l;
direct_map[id] = dm;
invlists->add_entry (il, id, (const uint8_t*)(x + i * d));
}
}
}
void IndexIVFFlat::reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const
{
memcpy (recons, invlists->get_single_code (list_no, offset), code_size);
}
/*****************************************
* IndexIVFFlatDedup implementation
******************************************/
IndexIVFFlatDedup::IndexIVFFlatDedup (
Index * quantizer, size_t d, size_t nlist_,
MetricType metric_type):
IndexIVFFlat (quantizer, d, nlist_, metric_type)
{}
void IndexIVFFlatDedup::train(idx_t n, const float* x)
{
std::unordered_map<uint64_t, idx_t> map;
float * x2 = new float [n * d];
ScopeDeleter<float> del (x2);
int64_t n2 = 0;
for (int64_t i = 0; i < n; i++) {
uint64_t hash = hash_bytes((uint8_t *)(x + i * d), code_size);
if (map.count(hash) &&
!memcmp (x2 + map[hash] * d, x + i * d, code_size)) {
// is duplicate, skip
} else {
map [hash] = n2;
memcpy (x2 + n2 * d, x + i * d, code_size);
n2 ++;
}
}
if (verbose) {
printf ("IndexIVFFlatDedup::train: train on %ld points after dedup "
"(was %ld points)\n", n2, n);
}
IndexIVFFlat::train (n2, x2);
}
void IndexIVFFlatDedup::add_with_ids(
idx_t na, const float* x, const idx_t* xids)
{
FAISS_THROW_IF_NOT (is_trained);
assert (invlists);
FAISS_THROW_IF_NOT_MSG (
!maintain_direct_map,
"IVFFlatDedup not implemented with direct_map");
int64_t * idx = new int64_t [na];
ScopeDeleter<int64_t> del (idx);
quantizer->assign (na, x, idx);
int64_t n_add = 0, n_dup = 0;
// TODO make a omp loop with this
for (size_t i = 0; i < na; i++) {
idx_t id = xids ? xids[i] : ntotal + i;
int64_t list_no = idx [i];
if (list_no < 0) {
continue;
}
const float *xi = x + i * d;
// search if there is already an entry with that id
InvertedLists::ScopedCodes codes (invlists, list_no);
int64_t n = invlists->list_size (list_no);
int64_t offset = -1;
for (int64_t o = 0; o < n; o++) {
if (!memcmp (codes.get() + o * code_size,
xi, code_size)) {
offset = o;
break;
}
}
if (offset == -1) { // not found
invlists->add_entry (list_no, id, (const uint8_t*) xi);
} else {
// mark equivalence
idx_t id2 = invlists->get_single_id (list_no, offset);
std::pair<idx_t, idx_t> pair (id2, id);
instances.insert (pair);
n_dup ++;
}
n_add++;
}
if (verbose) {
printf("IndexIVFFlat::add_with_ids: added %ld / %ld vectors"
" (out of which %ld are duplicates)\n",
n_add, na, n_dup);
}
ntotal += n_add;
}
void IndexIVFFlatDedup::search_preassigned (
idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params) const
{
FAISS_THROW_IF_NOT_MSG (
!store_pairs, "store_pairs not supported in IVFDedup");
IndexIVFFlat::search_preassigned (n, x, k, assign, centroid_dis,
distances, labels, false,
params);
std::vector <idx_t> labels2 (k);
std::vector <float> dis2 (k);
for (int64_t i = 0; i < n; i++) {
idx_t *labels1 = labels + i * k;
float *dis1 = distances + i * k;
int64_t j = 0;
for (; j < k; j++) {
if (instances.find (labels1[j]) != instances.end ()) {
// a duplicate: special handling
break;
}
}
if (j < k) {
// there are duplicates, special handling
int64_t j0 = j;
int64_t rp = j;
while (j < k) {
auto range = instances.equal_range (labels1[rp]);
float dis = dis1[rp];
labels2[j] = labels1[rp];
dis2[j] = dis;
j ++;
for (auto it = range.first; j < k && it != range.second; ++it) {
labels2[j] = it->second;
dis2[j] = dis;
j++;
}
rp++;
}
memcpy (labels1 + j0, labels2.data() + j0,
sizeof(labels1[0]) * (k - j0));
memcpy (dis1 + j0, dis2.data() + j0,
sizeof(dis2[0]) * (k - j0));
}
}
}
size_t IndexIVFFlatDedup::remove_ids(const IDSelector& sel)
{
std::unordered_map<idx_t, idx_t> replace;
std::vector<std::pair<idx_t, idx_t> > toadd;
for (auto it = instances.begin(); it != instances.end(); ) {
if (sel.is_member(it->first)) {
// then we erase this entry
if (!sel.is_member(it->second)) {
// if the second is not erased
if (replace.count(it->first) == 0) {
replace[it->first] = it->second;
} else { // remember we should add an element
std::pair<idx_t, idx_t> new_entry (
replace[it->first], it->second);
toadd.push_back(new_entry);
}
}
it = instances.erase(it);
} else {
if (sel.is_member(it->second)) {
it = instances.erase(it);
} else {
++it;
}
}
}
instances.insert (toadd.begin(), toadd.end());
// mostly copied from IndexIVF.cpp
FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
"direct map remove not implemented");
std::vector<int64_t> toremove(nlist);
#pragma omp parallel for
for (int64_t i = 0; i < nlist; i++) {
int64_t l0 = invlists->list_size (i), l = l0, j = 0;
InvertedLists::ScopedIds idsi (invlists, i);
while (j < l) {
if (sel.is_member (idsi[j])) {
if (replace.count(idsi[j]) == 0) {
l--;
invlists->update_entry (
i, j,
invlists->get_single_id (i, l),
InvertedLists::ScopedCodes (invlists, i, l).get());
} else {
invlists->update_entry (
i, j,
replace[idsi[j]],
InvertedLists::ScopedCodes (invlists, i, j).get());
j++;
}
} else {
j++;
}
}
toremove[i] = l0 - l;
}
// this will not run well in parallel on ondisk because of possible shrinks
int64_t nremove = 0;
for (int64_t i = 0; i < nlist; i++) {
if (toremove[i] > 0) {
nremove += toremove[i];
invlists->resize(
i, invlists->list_size(i) - toremove[i]);
}
}
ntotal -= nremove;
return nremove;
}
void IndexIVFFlatDedup::range_search(
idx_t ,
const float* ,
float ,
RangeSearchResult* ) const
{
FAISS_THROW_MSG ("not implemented");
}
void IndexIVFFlatDedup::update_vectors (int , idx_t *, const float *)
{
FAISS_THROW_MSG ("not implemented");
}
void IndexIVFFlatDedup::reconstruct_from_offset (
int64_t , int64_t , float* ) const
{
FAISS_THROW_MSG ("not implemented");
}
} // namespace faiss

View File

@ -0,0 +1,118 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVF_FLAT_H
#define FAISS_INDEX_IVF_FLAT_H
#include <unordered_map>
#include <stdint.h>
#include <faiss/IndexIVF.h>
namespace faiss {
/** Inverted file with stored vectors. Here the inverted file
* pre-selects the vectors to be searched, but they are not otherwise
* encoded, the code array just contains the raw float entries.
*/
struct IndexIVFFlat: IndexIVF {
IndexIVFFlat (
Index * quantizer, size_t d, size_t nlist_,
MetricType = METRIC_L2);
/// same as add_with_ids, with precomputed coarse quantizer
virtual void add_core (idx_t n, const float * x, const int64_t *xids,
const int64_t *precomputed_idx);
/// implemented for all IndexIVF* classes
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
void encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos=false) const override;
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
const override;
/** Update a subset of vectors.
*
* The index must have a direct_map
*
* @param nv nb of vectors to update
* @param idx vector indices to update, size nv
* @param v vectors of new values, size nv*d
*/
virtual void update_vectors (int nv, idx_t *idx, const float *v);
void reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
IndexIVFFlat () {}
};
struct IndexIVFFlatDedup: IndexIVFFlat {
/** Maps ids stored in the index to the ids of vectors that are
* the same. When a vector is unique, it does not appear in the
* instances map */
std::unordered_multimap <idx_t, idx_t> instances;
IndexIVFFlatDedup (
Index * quantizer, size_t d, size_t nlist_,
MetricType = METRIC_L2);
/// also dedups the training set
void train(idx_t n, const float* x) override;
/// implemented for all IndexIVF* classes
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
void search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
size_t remove_ids(const IDSelector& sel) override;
/// not implemented
void range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const override;
/// not implemented
void update_vectors (int nv, idx_t *idx, const float *v) override;
/// not implemented
void reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const override;
IndexIVFFlatDedup () {}
};
} // namespace faiss
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,161 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVFPQ_H
#define FAISS_INDEX_IVFPQ_H
#include <vector>
#include <faiss/IndexIVF.h>
#include <faiss/IndexPQ.h>
namespace faiss {
struct IVFPQSearchParameters: IVFSearchParameters {
size_t scan_table_threshold; ///< use table computation or on-the-fly?
int polysemous_ht; ///< Hamming thresh for polysemous filtering
~IVFPQSearchParameters () {}
};
/** Inverted file with Product Quantizer encoding. Each residual
* vector is encoded as a product quantizer code.
*/
struct IndexIVFPQ: IndexIVF {
bool by_residual; ///< Encode residual or plain vector?
ProductQuantizer pq; ///< produces the codes
bool do_polysemous_training; ///< reorder PQ centroids after training?
PolysemousTraining *polysemous_training; ///< if NULL, use default
// search-time parameters
size_t scan_table_threshold; ///< use table computation or on-the-fly?
int polysemous_ht; ///< Hamming thresh for polysemous filtering
/** Precompute table that speed up query preprocessing at some
* memory cost
* =-1: force disable
* =0: decide heuristically (default: use tables only if they are
* < precomputed_tables_max_bytes)
* =1: tables that work for all quantizers (size 256 * nlist * M)
* =2: specific version for MultiIndexQuantizer (much more compact)
*/
int use_precomputed_table; ///< if by_residual, build precompute tables
static size_t precomputed_table_max_bytes;
/// if use_precompute_table
/// size nlist * pq.M * pq.ksub
std::vector <float> precomputed_table;
IndexIVFPQ (
Index * quantizer, size_t d, size_t nlist,
size_t M, size_t nbits_per_idx);
void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
override;
void encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos = false) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
/// same as add_core, also:
/// - output 2nd level residuals if residuals_2 != NULL
/// - use precomputed list numbers if precomputed_idx != NULL
void add_core_o (idx_t n, const float *x,
const idx_t *xids, float *residuals_2,
const idx_t *precomputed_idx = nullptr);
/// trains the product quantizer
void train_residual(idx_t n, const float* x) override;
/// same as train_residual, also output 2nd level residuals
void train_residual_o (idx_t n, const float *x, float *residuals_2);
void reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const override;
/** Find exact duplicates in the dataset.
*
* the duplicates are returned in pre-allocated arrays (see the
* max sizes).
*
* @params lims limits between groups of duplicates
* (max size ntotal / 2 + 1)
* @params ids ids[lims[i]] : ids[lims[i+1]-1] is a group of
* duplicates (max size ntotal)
* @return n number of groups found
*/
size_t find_duplicates (idx_t *ids, size_t *lims) const;
// map a vector to a binary code knowning the index
void encode (idx_t key, const float * x, uint8_t * code) const;
/** Encode multiple vectors
*
* @param n nb vectors to encode
* @param keys posting list ids for those vectors (size n)
* @param x vectors (size n * d)
* @param codes output codes (size n * code_size)
* @param compute_keys if false, assume keys are precomputed,
* otherwise compute them
*/
void encode_multiple (size_t n, idx_t *keys,
const float * x, uint8_t * codes,
bool compute_keys = false) const;
/// inverse of encode_multiple
void decode_multiple (size_t n, const idx_t *keys,
const uint8_t * xcodes, float * x) const;
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
const override;
/// build precomputed table
void precompute_table ();
IndexIVFPQ ();
};
/// statistics are robust to internal threading, but not if
/// IndexIVFPQ::search_preassigned is called by multiple threads
struct IndexIVFPQStats {
size_t nrefine; // nb of refines (IVFPQR)
size_t n_hamming_pass;
// nb of passed Hamming distance tests (for polysemous)
// timings measured with the CPU RTC
// on all threads
size_t search_cycles;
size_t refine_cycles; // only for IVFPQR
IndexIVFPQStats () {reset (); }
void reset ();
};
// global var that collects them all
extern IndexIVFPQStats indexIVFPQ_stats;
} // namespace faiss
#endif

View File

@ -0,0 +1,219 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVFPQR.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/utils.h>
#include <faiss/utils/distances.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
/*****************************************
* IndexIVFPQR implementation
******************************************/
IndexIVFPQR::IndexIVFPQR (
Index * quantizer, size_t d, size_t nlist,
size_t M, size_t nbits_per_idx,
size_t M_refine, size_t nbits_per_idx_refine):
IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
refine_pq (d, M_refine, nbits_per_idx_refine),
k_factor (4)
{
by_residual = true;
}
IndexIVFPQR::IndexIVFPQR ():
k_factor (1)
{
by_residual = true;
}
void IndexIVFPQR::reset()
{
IndexIVFPQ::reset();
refine_codes.clear();
}
void IndexIVFPQR::train_residual (idx_t n, const float *x)
{
float * residual_2 = new float [n * d];
ScopeDeleter <float> del(residual_2);
train_residual_o (n, x, residual_2);
if (verbose)
printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
refine_pq.M, refine_pq.ksub, n, d);
refine_pq.cp.max_points_per_centroid = 1000;
refine_pq.cp.verbose = verbose;
refine_pq.train (n, residual_2);
}
void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const idx_t *xids) {
add_core (n, x, xids, nullptr);
}
void IndexIVFPQR::add_core (idx_t n, const float *x, const idx_t *xids,
const idx_t *precomputed_idx) {
float * residual_2 = new float [n * d];
ScopeDeleter <float> del(residual_2);
idx_t n0 = ntotal;
add_core_o (n, x, xids, residual_2, precomputed_idx);
refine_codes.resize (ntotal * refine_pq.code_size);
refine_pq.compute_codes (
residual_2, &refine_codes[n0 * refine_pq.code_size], n);
}
#define TIC t0 = get_cycles()
#define TOC get_cycles () - t0
void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *idx,
const float *L1_dis,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params
) const
{
uint64_t t0;
TIC;
size_t k_coarse = long(k * k_factor);
idx_t *coarse_labels = new idx_t [k_coarse * n];
ScopeDeleter<idx_t> del1 (coarse_labels);
{ // query with quantizer levels 1 and 2.
float *coarse_distances = new float [k_coarse * n];
ScopeDeleter<float> del(coarse_distances);
IndexIVFPQ::search_preassigned (
n, x, k_coarse,
idx, L1_dis, coarse_distances, coarse_labels,
true, params);
}
indexIVFPQ_stats.search_cycles += TOC;
TIC;
// 3rd level refinement
size_t n_refine = 0;
#pragma omp parallel reduction(+ : n_refine)
{
// tmp buffers
float *residual_1 = new float [2 * d];
ScopeDeleter<float> del (residual_1);
float *residual_2 = residual_1 + d;
#pragma omp for
for (idx_t i = 0; i < n; i++) {
const float *xq = x + i * d;
const idx_t * shortlist = coarse_labels + k_coarse * i;
float * heap_sim = distances + k * i;
idx_t * heap_ids = labels + k * i;
maxheap_heapify (k, heap_sim, heap_ids);
for (int j = 0; j < k_coarse; j++) {
idx_t sl = shortlist[j];
if (sl == -1) continue;
int list_no = sl >> 32;
int ofs = sl & 0xffffffff;
assert (list_no >= 0 && list_no < nlist);
assert (ofs >= 0 && ofs < invlists->list_size (list_no));
// 1st level residual
quantizer->compute_residual (xq, residual_1, list_no);
// 2nd level residual
const uint8_t * l2code =
invlists->get_single_code (list_no, ofs);
pq.decode (l2code, residual_2);
for (int l = 0; l < d; l++)
residual_2[l] = residual_1[l] - residual_2[l];
// 3rd level residual's approximation
idx_t id = invlists->get_single_id (list_no, ofs);
assert (0 <= id && id < ntotal);
refine_pq.decode (&refine_codes [id * refine_pq.code_size],
residual_1);
float dis = fvec_L2sqr (residual_1, residual_2, d);
if (dis < heap_sim[0]) {
maxheap_pop (k, heap_sim, heap_ids);
idx_t id_or_pair = store_pairs ? sl : id;
maxheap_push (k, heap_sim, heap_ids, dis, id_or_pair);
}
n_refine ++;
}
maxheap_reorder (k, heap_sim, heap_ids);
}
}
indexIVFPQ_stats.nrefine += n_refine;
indexIVFPQ_stats.refine_cycles += TOC;
}
void IndexIVFPQR::reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const
{
IndexIVFPQ::reconstruct_from_offset (list_no, offset, recons);
idx_t id = invlists->get_single_id (list_no, offset);
assert (0 <= id && id < ntotal);
std::vector<float> r3(d);
refine_pq.decode (&refine_codes [id * refine_pq.code_size], r3.data());
for (int i = 0; i < d; ++i) {
recons[i] += r3[i];
}
}
void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id)
{
IndexIVFPQR *other = dynamic_cast<IndexIVFPQR *> (&other_in);
FAISS_THROW_IF_NOT(other);
IndexIVF::merge_from (other_in, add_id);
refine_codes.insert (refine_codes.end(),
other->refine_codes.begin(),
other->refine_codes.end());
other->refine_codes.clear();
}
size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
FAISS_THROW_MSG("not implemented");
return 0;
}
} // namespace faiss

View File

@ -0,0 +1,65 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include <faiss/IndexIVFPQ.h>
namespace faiss {
/** Index with an additional level of PQ refinement */
struct IndexIVFPQR: IndexIVFPQ {
ProductQuantizer refine_pq; ///< 3rd level quantizer
std::vector <uint8_t> refine_codes; ///< corresponding codes
/// factor between k requested in search and the k requested from the IVFPQ
float k_factor;
IndexIVFPQR (
Index * quantizer, size_t d, size_t nlist,
size_t M, size_t nbits_per_idx,
size_t M_refine, size_t nbits_per_idx_refine);
void reset() override;
size_t remove_ids(const IDSelector& sel) override;
/// trains the two product quantizers
void train_residual(idx_t n, const float* x) override;
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
/// same as add_with_ids, but optionally use the precomputed list ids
void add_core (idx_t n, const float *x, const idx_t *xids,
const idx_t *precomputed_idx = nullptr);
void reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const override;
void merge_from (IndexIVF &other, idx_t add_id) override;
void search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
IndexIVFPQR();
};
} // namespace faiss

View File

@ -0,0 +1,331 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVFSpectralHash.h>
#include <memory>
#include <algorithm>
#include <stdint.h>
#include <faiss/utils/hamming.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/VectorTransform.h>
namespace faiss {
IndexIVFSpectralHash::IndexIVFSpectralHash (
Index * quantizer, size_t d, size_t nlist,
int nbit, float period):
IndexIVF (quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
nbit (nbit), period (period), threshold_type (Thresh_global)
{
FAISS_THROW_IF_NOT (code_size % 4 == 0);
RandomRotationMatrix *rr = new RandomRotationMatrix (d, nbit);
rr->init (1234);
vt = rr;
own_fields = true;
is_trained = false;
}
IndexIVFSpectralHash::IndexIVFSpectralHash():
IndexIVF(), vt(nullptr), own_fields(false),
nbit(0), period(0), threshold_type(Thresh_global)
{}
IndexIVFSpectralHash::~IndexIVFSpectralHash ()
{
if (own_fields) {
delete vt;
}
}
namespace {
float median (size_t n, float *x) {
std::sort(x, x + n);
if (n % 2 == 1) {
return x [n / 2];
} else {
return (x [n / 2 - 1] + x [n / 2]) / 2;
}
}
}
void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
{
if (!vt->is_trained) {
vt->train (n, x);
}
if (threshold_type == Thresh_global) {
// nothing to do
return;
} else if (threshold_type == Thresh_centroid ||
threshold_type == Thresh_centroid_half) {
// convert all centroids with vt
std::vector<float> centroids (nlist * d);
quantizer->reconstruct_n (0, nlist, centroids.data());
trained.resize(nlist * nbit);
vt->apply_noalloc (nlist, centroids.data(), trained.data());
if (threshold_type == Thresh_centroid_half) {
for (size_t i = 0; i < nlist * nbit; i++) {
trained[i] -= 0.25 * period;
}
}
return;
}
// otherwise train medians
// assign
std::unique_ptr<idx_t []> idx (new idx_t [n]);
quantizer->assign (n, x, idx.get());
std::vector<size_t> sizes(nlist + 1);
for (size_t i = 0; i < n; i++) {
FAISS_THROW_IF_NOT (idx[i] >= 0);
sizes[idx[i]]++;
}
size_t ofs = 0;
for (int j = 0; j < nlist; j++) {
size_t o0 = ofs;
ofs += sizes[j];
sizes[j] = o0;
}
// transform
std::unique_ptr<float []> xt (vt->apply (n, x));
// transpose + reorder
std::unique_ptr<float []> xo (new float[n * nbit]);
for (size_t i = 0; i < n; i++) {
size_t idest = sizes[idx[i]]++;
for (size_t j = 0; j < nbit; j++) {
xo[idest + n * j] = xt[i * nbit + j];
}
}
trained.resize (n * nbit);
// compute medians
#pragma omp for
for (int i = 0; i < nlist; i++) {
size_t i0 = i == 0 ? 0 : sizes[i - 1];
size_t i1 = sizes[i];
for (int j = 0; j < nbit; j++) {
float *xoi = xo.get() + i0 + n * j;
if (i0 == i1) { // nothing to train
trained[i * nbit + j] = 0.0;
} else if (i1 == i0 + 1) {
trained[i * nbit + j] = xoi[0];
} else {
trained[i * nbit + j] = median(i1 - i0, xoi);
}
}
}
}
namespace {
void binarize_with_freq(size_t nbit, float freq,
const float *x, const float *c,
uint8_t *codes)
{
memset (codes, 0, (nbit + 7) / 8);
for (size_t i = 0; i < nbit; i++) {
float xf = (x[i] - c[i]);
int xi = int(floor(xf * freq));
int bit = xi & 1;
codes[i >> 3] |= bit << (i & 7);
}
}
};
void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos) const
{
FAISS_THROW_IF_NOT (is_trained);
float freq = 2.0 / period;
FAISS_THROW_IF_NOT_MSG (!include_listnos, "listnos encoding not supported");
// transform with vt
std::unique_ptr<float []> x (vt->apply (n, x_in));
#pragma omp parallel
{
std::vector<float> zero (nbit);
// each thread takes care of a subset of lists
#pragma omp for
for (size_t i = 0; i < n; i++) {
int64_t list_no = list_nos [i];
if (list_no >= 0) {
const float *c;
if (threshold_type == Thresh_global) {
c = zero.data();
} else {
c = trained.data() + list_no * nbit;
}
binarize_with_freq (nbit, freq,
x.get() + i * nbit, c,
codes + i * code_size) ;
}
}
}
}
namespace {
template<class HammingComputer>
struct IVFScanner: InvertedListScanner {
// copied from index structure
const IndexIVFSpectralHash *index;
size_t code_size;
size_t nbit;
bool store_pairs;
float period, freq;
std::vector<float> q;
std::vector<float> zero;
std::vector<uint8_t> qcode;
HammingComputer hc;
using idx_t = Index::idx_t;
IVFScanner (const IndexIVFSpectralHash * index,
bool store_pairs):
index (index),
code_size(index->code_size),
nbit(index->nbit),
store_pairs(store_pairs),
period(index->period), freq(2.0 / index->period),
q(nbit), zero(nbit), qcode(code_size),
hc(qcode.data(), code_size)
{
}
void set_query (const float *query) override {
FAISS_THROW_IF_NOT(query);
FAISS_THROW_IF_NOT(q.size() == nbit);
index->vt->apply_noalloc (1, query, q.data());
if (index->threshold_type ==
IndexIVFSpectralHash::Thresh_global) {
binarize_with_freq
(nbit, freq, q.data(), zero.data(), qcode.data());
hc.set (qcode.data(), code_size);
}
}
idx_t list_no;
void set_list (idx_t list_no, float /*coarse_dis*/) override {
this->list_no = list_no;
if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
const float *c = index->trained.data() + list_no * nbit;
binarize_with_freq (nbit, freq, q.data(), c, qcode.data());
hc.set (qcode.data(), code_size);
}
}
float distance_to_code (const uint8_t *code) const final {
return hc.hamming (code);
}
size_t scan_codes (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float *simi, idx_t *idxi,
size_t k) const override
{
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
float dis = hc.hamming (codes);
if (dis < simi [0]) {
maxheap_pop (k, simi, idxi);
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
maxheap_push (k, simi, idxi, dis, id);
nup++;
}
codes += code_size;
}
return nup;
}
void scan_codes_range (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult & res) const override
{
for (size_t j = 0; j < list_size; j++) {
float dis = hc.hamming (codes);
if (dis < radius) {
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
res.add (dis, id);
}
codes += code_size;
}
}
};
} // anonymous namespace
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
(bool store_pairs) const
{
switch (code_size) {
#define HANDLE_CODE_SIZE(cs) \
case cs: \
return new IVFScanner<HammingComputer ## cs> (this, store_pairs)
HANDLE_CODE_SIZE(4);
HANDLE_CODE_SIZE(8);
HANDLE_CODE_SIZE(16);
HANDLE_CODE_SIZE(20);
HANDLE_CODE_SIZE(32);
HANDLE_CODE_SIZE(64);
#undef HANDLE_CODE_SIZE
default:
if (code_size % 8 == 0) {
return new IVFScanner<HammingComputerM8>(this, store_pairs);
} else if (code_size % 4 == 0) {
return new IVFScanner<HammingComputerM4>(this, store_pairs);
} else {
FAISS_THROW_MSG("not supported");
}
}
}
} // namespace faiss

View File

@ -0,0 +1,75 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVFSH_H
#define FAISS_INDEX_IVFSH_H
#include <vector>
#include <faiss/IndexIVF.h>
namespace faiss {
struct VectorTransform;
/** Inverted list that stores binary codes of size nbit. Before the
* binary conversion, the dimension of the vectors is transformed from
* dim d into dim nbit by vt (a random rotation by default).
*
* Each coordinate is subtracted from a value determined by
* threshold_type, and split into intervals of size period. Half of
* the interval is a 0 bit, the other half a 1.
*/
struct IndexIVFSpectralHash: IndexIVF {
VectorTransform *vt; // transformation from d to nbit dim
bool own_fields;
int nbit;
float period;
enum ThresholdType {
Thresh_global,
Thresh_centroid,
Thresh_centroid_half,
Thresh_median
};
ThresholdType threshold_type;
// size nlist * nbit or 0 if Thresh_global
std::vector<float> trained;
IndexIVFSpectralHash (Index * quantizer, size_t d, size_t nlist,
int nbit, float period);
IndexIVFSpectralHash ();
void train_residual(idx_t n, const float* x) override;
void encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos = false) const override;
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
const override;
~IndexIVFSpectralHash () override;
};
}; // namespace faiss
#endif

View File

@ -0,0 +1,225 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexLSH.h>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <faiss/utils/utils.h>
#include <faiss/utils/hamming.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
/***************************************************************
* IndexLSH
***************************************************************/
IndexLSH::IndexLSH (idx_t d, int nbits, bool rotate_data, bool train_thresholds):
Index(d), nbits(nbits), rotate_data(rotate_data),
train_thresholds (train_thresholds), rrot(d, nbits)
{
is_trained = !train_thresholds;
bytes_per_vec = (nbits + 7) / 8;
if (rotate_data) {
rrot.init(5);
} else {
FAISS_THROW_IF_NOT (d >= nbits);
}
}
IndexLSH::IndexLSH ():
nbits (0), bytes_per_vec(0), rotate_data (false), train_thresholds (false)
{
}
const float * IndexLSH::apply_preprocess (idx_t n, const float *x) const
{
float *xt = nullptr;
if (rotate_data) {
// also applies bias if exists
xt = rrot.apply (n, x);
} else if (d != nbits) {
assert (nbits < d);
xt = new float [nbits * n];
float *xp = xt;
for (idx_t i = 0; i < n; i++) {
const float *xl = x + i * d;
for (int j = 0; j < nbits; j++)
*xp++ = xl [j];
}
}
if (train_thresholds) {
if (xt == NULL) {
xt = new float [nbits * n];
memcpy (xt, x, sizeof(*x) * n * nbits);
}
float *xp = xt;
for (idx_t i = 0; i < n; i++)
for (int j = 0; j < nbits; j++)
*xp++ -= thresholds [j];
}
return xt ? xt : x;
}
void IndexLSH::train (idx_t n, const float *x)
{
if (train_thresholds) {
thresholds.resize (nbits);
train_thresholds = false;
const float *xt = apply_preprocess (n, x);
ScopeDeleter<float> del (xt == x ? nullptr : xt);
train_thresholds = true;
float * transposed_x = new float [n * nbits];
ScopeDeleter<float> del2 (transposed_x);
for (idx_t i = 0; i < n; i++)
for (idx_t j = 0; j < nbits; j++)
transposed_x [j * n + i] = xt [i * nbits + j];
for (idx_t i = 0; i < nbits; i++) {
float *xi = transposed_x + i * n;
// std::nth_element
std::sort (xi, xi + n);
if (n % 2 == 1)
thresholds [i] = xi [n / 2];
else
thresholds [i] = (xi [n / 2 - 1] + xi [n / 2]) / 2;
}
}
is_trained = true;
}
void IndexLSH::add (idx_t n, const float *x)
{
FAISS_THROW_IF_NOT (is_trained);
codes.resize ((ntotal + n) * bytes_per_vec);
sa_encode (n, x, &codes[ntotal * bytes_per_vec]);
ntotal += n;
}
void IndexLSH::search (
idx_t n,
const float *x,
idx_t k,
float *distances,
idx_t *labels) const
{
FAISS_THROW_IF_NOT (is_trained);
const float *xt = apply_preprocess (n, x);
ScopeDeleter<float> del (xt == x ? nullptr : xt);
uint8_t * qcodes = new uint8_t [n * bytes_per_vec];
ScopeDeleter<uint8_t> del2 (qcodes);
fvecs2bitvecs (xt, qcodes, nbits, n);
int * idistances = new int [n * k];
ScopeDeleter<int> del3 (idistances);
int_maxheap_array_t res = { size_t(n), size_t(k), labels, idistances};
hammings_knn_hc (&res, qcodes, codes.data(),
ntotal, bytes_per_vec, true);
// convert distances to floats
for (int i = 0; i < k * n; i++)
distances[i] = idistances[i];
}
void IndexLSH::transfer_thresholds (LinearTransform *vt) {
if (!train_thresholds) return;
FAISS_THROW_IF_NOT (nbits == vt->d_out);
if (!vt->have_bias) {
vt->b.resize (nbits, 0);
vt->have_bias = true;
}
for (int i = 0; i < nbits; i++)
vt->b[i] -= thresholds[i];
train_thresholds = false;
thresholds.clear();
}
void IndexLSH::reset() {
codes.clear();
ntotal = 0;
}
size_t IndexLSH::sa_code_size () const
{
return bytes_per_vec;
}
void IndexLSH::sa_encode (idx_t n, const float *x,
uint8_t *bytes) const
{
FAISS_THROW_IF_NOT (is_trained);
const float *xt = apply_preprocess (n, x);
ScopeDeleter<float> del (xt == x ? nullptr : xt);
fvecs2bitvecs (xt, bytes, nbits, n);
}
void IndexLSH::sa_decode (idx_t n, const uint8_t *bytes,
float *x) const
{
float *xt = x;
ScopeDeleter<float> del;
if (rotate_data || nbits != d) {
xt = new float [n * nbits];
del.set(xt);
}
bitvecs2fvecs (bytes, xt, nbits, n);
if (train_thresholds) {
float *xp = xt;
for (idx_t i = 0; i < n; i++) {
for (int j = 0; j < nbits; j++) {
*xp++ += thresholds [j];
}
}
}
if (rotate_data) {
rrot.reverse_transform (n, xt, x);
} else if (nbits != d) {
for (idx_t i = 0; i < n; i++) {
memcpy (x + i * d, xt + i * nbits,
nbits * sizeof(xt[0]));
}
}
}
} // namespace faiss

View File

@ -0,0 +1,87 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef INDEX_LSH_H
#define INDEX_LSH_H
#include <vector>
#include <faiss/Index.h>
#include <faiss/VectorTransform.h>
namespace faiss {
/** The sign of each vector component is put in a binary signature */
struct IndexLSH:Index {
typedef unsigned char uint8_t;
int nbits; ///< nb of bits per vector
int bytes_per_vec; ///< nb of 8-bits per encoded vector
bool rotate_data; ///< whether to apply a random rotation to input
bool train_thresholds; ///< whether we train thresholds or use 0
RandomRotationMatrix rrot; ///< optional random rotation
std::vector <float> thresholds; ///< thresholds to compare with
/// encoded dataset
std::vector<uint8_t> codes;
IndexLSH (
idx_t d, int nbits,
bool rotate_data = true,
bool train_thresholds = false);
/** Preprocesses and resizes the input to the size required to
* binarize the data
*
* @param x input vectors, size n * d
* @return output vectors, size n * bits. May be the same pointer
* as x, otherwise it should be deleted by the caller
*/
const float *apply_preprocess (idx_t n, const float *x) const;
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void reset() override;
/// transfer the thresholds to a pre-processing stage (and unset
/// train_thresholds)
void transfer_thresholds (LinearTransform * vt);
~IndexLSH() override {}
IndexLSH ();
/* standalone codec interface */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
};
}
#endif

View File

@ -0,0 +1,143 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexLattice.h>
#include <faiss/utils/hamming.h> // for the bitstring routines
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>
namespace faiss {
IndexLattice::IndexLattice (idx_t d, int nsq, int scale_nbit, int r2):
Index (d),
nsq (nsq),
dsq (d / nsq),
zn_sphere_codec (dsq, r2),
scale_nbit (scale_nbit)
{
FAISS_THROW_IF_NOT (d % nsq == 0);
lattice_nbit = 0;
while (!( ((uint64_t)1 << lattice_nbit) >= zn_sphere_codec.nv)) {
lattice_nbit++;
}
int total_nbit = (lattice_nbit + scale_nbit) * nsq;
code_size = (total_nbit + 7) / 8;
is_trained = false;
}
void IndexLattice::train(idx_t n, const float* x)
{
// compute ranges per sub-block
trained.resize (nsq * 2);
float * mins = trained.data();
float * maxs = trained.data() + nsq;
for (int sq = 0; sq < nsq; sq++) {
mins[sq] = HUGE_VAL;
maxs[sq] = -1;
}
for (idx_t i = 0; i < n; i++) {
for (int sq = 0; sq < nsq; sq++) {
float norm2 = fvec_norm_L2sqr (x + i * d + sq * dsq, dsq);
if (norm2 > maxs[sq]) maxs[sq] = norm2;
if (norm2 < mins[sq]) mins[sq] = norm2;
}
}
for (int sq = 0; sq < nsq; sq++) {
mins[sq] = sqrtf (mins[sq]);
maxs[sq] = sqrtf (maxs[sq]);
}
is_trained = true;
}
/* The standalone codec interface */
size_t IndexLattice::sa_code_size () const
{
return code_size;
}
void IndexLattice::sa_encode (idx_t n, const float *x, uint8_t *codes) const
{
const float * mins = trained.data();
const float * maxs = mins + nsq;
int64_t sc = int64_t(1) << scale_nbit;
#pragma omp parallel for
for (idx_t i = 0; i < n; i++) {
BitstringWriter wr(codes + i * code_size, code_size);
const float *xi = x + i * d;
for (int j = 0; j < nsq; j++) {
float nj =
(sqrtf(fvec_norm_L2sqr(xi, dsq)) - mins[j])
* sc / (maxs[j] - mins[j]);
if (nj < 0) nj = 0;
if (nj >= sc) nj = sc - 1;
wr.write((int64_t)nj, scale_nbit);
wr.write(zn_sphere_codec.encode(xi), lattice_nbit);
xi += dsq;
}
}
}
void IndexLattice::sa_decode (idx_t n, const uint8_t *codes, float *x) const
{
const float * mins = trained.data();
const float * maxs = mins + nsq;
float sc = int64_t(1) << scale_nbit;
float r = sqrtf(zn_sphere_codec.r2);
#pragma omp parallel for
for (idx_t i = 0; i < n; i++) {
BitstringReader rd(codes + i * code_size, code_size);
float *xi = x + i * d;
for (int j = 0; j < nsq; j++) {
float norm =
(rd.read (scale_nbit) + 0.5) *
(maxs[j] - mins[j]) / sc + mins[j];
norm /= r;
zn_sphere_codec.decode (rd.read (lattice_nbit), xi);
for (int l = 0; l < dsq; l++) {
xi[l] *= norm;
}
xi += dsq;
}
}
}
void IndexLattice::add(idx_t , const float* )
{
FAISS_THROW_MSG("not implemented");
}
void IndexLattice::search(idx_t , const float* , idx_t ,
float* , idx_t* ) const
{
FAISS_THROW_MSG("not implemented");
}
void IndexLattice::reset()
{
FAISS_THROW_MSG("not implemented");
}
} // namespace faiss

View File

@ -0,0 +1,68 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_LATTICE_H
#define FAISS_INDEX_LATTICE_H
#include <vector>
#include <faiss/IndexIVF.h>
#include <faiss/impl/lattice_Zn.h>
namespace faiss {
/** Index that encodes a vector with a series of Zn lattice quantizers
*/
struct IndexLattice: Index {
/// number of sub-vectors
int nsq;
/// dimension of sub-vectors
size_t dsq;
/// the lattice quantizer
ZnSphereCodecAlt zn_sphere_codec;
/// nb bits used to encode the scale, per subvector
int scale_nbit, lattice_nbit;
/// total, in bytes
size_t code_size;
/// mins and maxes of the vector norms, per subquantizer
std::vector<float> trained;
IndexLattice (idx_t d, int nsq, int scale_nbit, int r2);
void train(idx_t n, const float* x) override;
/* The standalone codec interface */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
/// not implemented
void add(idx_t n, const float* x) override;
void search(idx_t n, const float* x, idx_t k,
float* distances, idx_t* labels) const override;
void reset() override;
};
} // namespace faiss
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,199 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_PQ_H
#define FAISS_INDEX_PQ_H
#include <stdint.h>
#include <vector>
#include <faiss/Index.h>
#include <faiss/impl/ProductQuantizer.h>
#include <faiss/impl/PolysemousTraining.h>
namespace faiss {
/** Index based on a product quantizer. Stored vectors are
* approximated by PQ codes. */
struct IndexPQ: Index {
/// The product quantizer used to encode the vectors
ProductQuantizer pq;
/// Codes. Size ntotal * pq.code_size
std::vector<uint8_t> codes;
/** Constructor.
*
* @param d dimensionality of the input vectors
* @param M number of subquantizers
* @param nbits number of bit per subvector index
*/
IndexPQ (int d, ///< dimensionality of the input vectors
size_t M, ///< number of subquantizers
size_t nbits, ///< number of bit per subvector index
MetricType metric = METRIC_L2);
IndexPQ ();
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void reset() override;
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
size_t remove_ids(const IDSelector& sel) override;
/* The standalone codec interface */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
DistanceComputer * get_distance_computer() const override;
/******************************************************
* Polysemous codes implementation
******************************************************/
bool do_polysemous_training; ///< false = standard PQ
/// parameters used for the polysemous training
PolysemousTraining polysemous_training;
/// how to perform the search in search_core
enum Search_type_t {
ST_PQ, ///< asymmetric product quantizer (default)
ST_HE, ///< Hamming distance on codes
ST_generalized_HE, ///< nb of same codes
ST_SDC, ///< symmetric product quantizer (SDC)
ST_polysemous, ///< HE filter (using ht) + PQ combination
ST_polysemous_generalize, ///< Filter on generalized Hamming
};
Search_type_t search_type;
// just encode the sign of the components, instead of using the PQ encoder
// used only for the queries
bool encode_signs;
/// Hamming threshold used for polysemy
int polysemous_ht;
// actual polysemous search
void search_core_polysemous (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const;
/// prepare query for a polysemous search, but instead of
/// computing the result, just get the histogram of Hamming
/// distances. May be computed on a provided dataset if xb != NULL
/// @param dist_histogram (M * nbits + 1)
void hamming_distance_histogram (idx_t n, const float *x,
idx_t nb, const float *xb,
int64_t *dist_histogram);
/** compute pairwise distances between queries and database
*
* @param n nb of query vectors
* @param x query vector, size n * d
* @param dis output distances, size n * ntotal
*/
void hamming_distance_table (idx_t n, const float *x,
int32_t *dis) const;
};
/// statistics are robust to internal threading, but not if
/// IndexPQ::search is called by multiple threads
struct IndexPQStats {
size_t nq; // nb of queries run
size_t ncode; // nb of codes visited
size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
IndexPQStats () {reset (); }
void reset ();
};
extern IndexPQStats indexPQ_stats;
/** Quantizer where centroids are virtual: they are the Cartesian
* product of sub-centroids. */
struct MultiIndexQuantizer: Index {
ProductQuantizer pq;
MultiIndexQuantizer (int d, ///< dimension of the input vectors
size_t M, ///< number of subquantizers
size_t nbits); ///< number of bit per subvector index
void train(idx_t n, const float* x) override;
void search(
idx_t n, const float* x, idx_t k,
float* distances, idx_t* labels) const override;
/// add and reset will crash at runtime
void add(idx_t n, const float* x) override;
void reset() override;
MultiIndexQuantizer () {}
void reconstruct(idx_t key, float* recons) const override;
};
/** MultiIndexQuantizer where the PQ assignmnet is performed by sub-indexes
*/
struct MultiIndexQuantizer2: MultiIndexQuantizer {
/// M Indexes on d / M dimensions
std::vector<Index*> assign_indexes;
bool own_fields;
MultiIndexQuantizer2 (
int d, size_t M, size_t nbits,
Index **indexes);
MultiIndexQuantizer2 (
int d, size_t nbits,
Index *assign_index_0,
Index *assign_index_1);
void train(idx_t n, const float* x) override;
void search(
idx_t n, const float* x, idx_t k,
float* distances, idx_t* labels) const override;
};
} // namespace faiss
#endif

View File

@ -0,0 +1,288 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexPreTransform.h>
#include <cstdio>
#include <cmath>
#include <cstring>
#include <memory>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
/*********************************************
* IndexPreTransform
*********************************************/
IndexPreTransform::IndexPreTransform ():
index(nullptr), own_fields (false)
{
}
IndexPreTransform::IndexPreTransform (
Index * index):
Index (index->d, index->metric_type),
index (index), own_fields (false)
{
is_trained = index->is_trained;
ntotal = index->ntotal;
}
IndexPreTransform::IndexPreTransform (
VectorTransform * ltrans,
Index * index):
Index (index->d, index->metric_type),
index (index), own_fields (false)
{
is_trained = index->is_trained;
ntotal = index->ntotal;
prepend_transform (ltrans);
}
void IndexPreTransform::prepend_transform (VectorTransform *ltrans)
{
FAISS_THROW_IF_NOT (ltrans->d_out == d);
is_trained = is_trained && ltrans->is_trained;
chain.insert (chain.begin(), ltrans);
d = ltrans->d_in;
}
IndexPreTransform::~IndexPreTransform ()
{
if (own_fields) {
for (int i = 0; i < chain.size(); i++)
delete chain[i];
delete index;
}
}
void IndexPreTransform::train (idx_t n, const float *x)
{
int last_untrained = 0;
if (!index->is_trained) {
last_untrained = chain.size();
} else {
for (int i = chain.size() - 1; i >= 0; i--) {
if (!chain[i]->is_trained) {
last_untrained = i;
break;
}
}
}
const float *prev_x = x;
ScopeDeleter<float> del;
if (verbose) {
printf("IndexPreTransform::train: training chain 0 to %d\n",
last_untrained);
}
for (int i = 0; i <= last_untrained; i++) {
if (i < chain.size()) {
VectorTransform *ltrans = chain [i];
if (!ltrans->is_trained) {
if (verbose) {
printf(" Training chain component %d/%zd\n",
i, chain.size());
if (OPQMatrix *opqm = dynamic_cast<OPQMatrix*>(ltrans)) {
opqm->verbose = true;
}
}
ltrans->train (n, prev_x);
}
} else {
if (verbose) {
printf(" Training sub-index\n");
}
index->train (n, prev_x);
}
if (i == last_untrained) break;
if (verbose) {
printf(" Applying transform %d/%zd\n",
i, chain.size());
}
float * xt = chain[i]->apply (n, prev_x);
if (prev_x != x) delete [] prev_x;
prev_x = xt;
del.set(xt);
}
is_trained = true;
}
const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const
{
const float *prev_x = x;
ScopeDeleter<float> del;
for (int i = 0; i < chain.size(); i++) {
float * xt = chain[i]->apply (n, prev_x);
ScopeDeleter<float> del2 (xt);
del2.swap (del);
prev_x = xt;
}
del.release ();
return prev_x;
}
void IndexPreTransform::reverse_chain (idx_t n, const float* xt, float* x) const
{
const float* next_x = xt;
ScopeDeleter<float> del;
for (int i = chain.size() - 1; i >= 0; i--) {
float* prev_x = (i == 0) ? x : new float [n * chain[i]->d_in];
ScopeDeleter<float> del2 ((prev_x == x) ? nullptr : prev_x);
chain [i]->reverse_transform (n, next_x, prev_x);
del2.swap (del);
next_x = prev_x;
}
}
void IndexPreTransform::add (idx_t n, const float *x)
{
FAISS_THROW_IF_NOT (is_trained);
const float *xt = apply_chain (n, x);
ScopeDeleter<float> del(xt == x ? nullptr : xt);
index->add (n, xt);
ntotal = index->ntotal;
}
void IndexPreTransform::add_with_ids (idx_t n, const float * x,
const idx_t *xids)
{
FAISS_THROW_IF_NOT (is_trained);
const float *xt = apply_chain (n, x);
ScopeDeleter<float> del(xt == x ? nullptr : xt);
index->add_with_ids (n, xt, xids);
ntotal = index->ntotal;
}
void IndexPreTransform::search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const
{
FAISS_THROW_IF_NOT (is_trained);
const float *xt = apply_chain (n, x);
ScopeDeleter<float> del(xt == x ? nullptr : xt);
index->search (n, xt, k, distances, labels);
}
void IndexPreTransform::range_search (idx_t n, const float* x, float radius,
RangeSearchResult* result) const
{
FAISS_THROW_IF_NOT (is_trained);
const float *xt = apply_chain (n, x);
ScopeDeleter<float> del(xt == x ? nullptr : xt);
index->range_search (n, xt, radius, result);
}
void IndexPreTransform::reset () {
index->reset();
ntotal = 0;
}
size_t IndexPreTransform::remove_ids (const IDSelector & sel) {
size_t nremove = index->remove_ids (sel);
ntotal = index->ntotal;
return nremove;
}
void IndexPreTransform::reconstruct (idx_t key, float * recons) const
{
float *x = chain.empty() ? recons : new float [index->d];
ScopeDeleter<float> del (recons == x ? nullptr : x);
// Initial reconstruction
index->reconstruct (key, x);
// Revert transformations from last to first
reverse_chain (1, x, recons);
}
void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
{
float *x = chain.empty() ? recons : new float [ni * index->d];
ScopeDeleter<float> del (recons == x ? nullptr : x);
// Initial reconstruction
index->reconstruct_n (i0, ni, x);
// Revert transformations from last to first
reverse_chain (ni, x, recons);
}
void IndexPreTransform::search_and_reconstruct (
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels, float* recons) const
{
FAISS_THROW_IF_NOT (is_trained);
const float* xt = apply_chain (n, x);
ScopeDeleter<float> del ((xt == x) ? nullptr : xt);
float* recons_temp = chain.empty() ? recons : new float [n * k * index->d];
ScopeDeleter<float> del2 ((recons_temp == recons) ? nullptr : recons_temp);
index->search_and_reconstruct (n, xt, k, distances, labels, recons_temp);
// Revert transformations from last to first
reverse_chain (n * k, recons_temp, recons);
}
size_t IndexPreTransform::sa_code_size () const
{
return index->sa_code_size ();
}
void IndexPreTransform::sa_encode (idx_t n, const float *x,
uint8_t *bytes) const
{
if (chain.empty()) {
index->sa_encode (n, x, bytes);
} else {
const float *xt = apply_chain (n, x);
ScopeDeleter<float> del(xt == x ? nullptr : xt);
index->sa_encode (n, xt, bytes);
}
}
void IndexPreTransform::sa_decode (idx_t n, const uint8_t *bytes,
float *x) const
{
if (chain.empty()) {
index->sa_decode (n, bytes, x);
} else {
std::unique_ptr<float []> x1 (new float [index->d * n]);
index->sa_decode (n, bytes, x1.get());
// Revert transformations from last to first
reverse_chain (n, x1.get(), x);
}
}
} // namespace faiss

View File

@ -0,0 +1,91 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <faiss/Index.h>
#include <faiss/VectorTransform.h>
namespace faiss {
/** Index that applies a LinearTransform transform on vectors before
* handing them over to a sub-index */
struct IndexPreTransform: Index {
std::vector<VectorTransform *> chain; ///! chain of tranforms
Index * index; ///! the sub-index
bool own_fields; ///! whether pointers are deleted in destructor
explicit IndexPreTransform (Index *index);
IndexPreTransform ();
/// ltrans is the last transform before the index
IndexPreTransform (VectorTransform * ltrans, Index * index);
void prepend_transform (VectorTransform * ltrans);
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
void reset() override;
/** removes IDs from the index. Not supported by all indexes.
*/
size_t remove_ids(const IDSelector& sel) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
/* range search, no attempt is done to change the radius */
void range_search (idx_t n, const float* x, float radius,
RangeSearchResult* result) const override;
void reconstruct (idx_t key, float * recons) const override;
void reconstruct_n (idx_t i0, idx_t ni, float *recons)
const override;
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const override;
/// apply the transforms in the chain. The returned float * may be
/// equal to x, otherwise it should be deallocated.
const float * apply_chain (idx_t n, const float *x) const;
/// Reverse the transforms in the chain. May not be implemented for
/// all transforms in the chain or may return approximate results.
void reverse_chain (idx_t n, const float* xt, float* x) const;
/* standalone codec interface */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
~IndexPreTransform() override;
};
} // namespace faiss

View File

@ -0,0 +1,123 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/IndexReplicas.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
template <typename IndexT>
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(bool threaded)
: ThreadedIndex<IndexT>(threaded) {
}
template <typename IndexT>
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(idx_t d, bool threaded)
: ThreadedIndex<IndexT>(d, threaded) {
}
template <typename IndexT>
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(int d, bool threaded)
: ThreadedIndex<IndexT>(d, threaded) {
}
template <typename IndexT>
void
IndexReplicasTemplate<IndexT>::onAfterAddIndex(IndexT* index) {
// Make sure that the parameters are the same for all prior indices, unless
// we're the first index to be added
if (this->count() > 0 && this->at(0) != index) {
auto existing = this->at(0);
FAISS_THROW_IF_NOT_FMT(index->ntotal == existing->ntotal,
"IndexReplicas: newly added index does "
"not have same number of vectors as prior index; "
"prior index has %ld vectors, new index has %ld",
existing->ntotal, index->ntotal);
FAISS_THROW_IF_NOT_MSG(index->is_trained == existing->is_trained,
"IndexReplicas: newly added index does "
"not have same train status as prior index");
} else {
// Set our parameters based on the first index we're adding
// (dimension is handled in ThreadedIndex)
this->ntotal = index->ntotal;
this->verbose = index->verbose;
this->is_trained = index->is_trained;
this->metric_type = index->metric_type;
}
}
template <typename IndexT>
void
IndexReplicasTemplate<IndexT>::train(idx_t n, const component_t* x) {
this->runOnIndex([n, x](int, IndexT* index){ index->train(n, x); });
}
template <typename IndexT>
void
IndexReplicasTemplate<IndexT>::add(idx_t n, const component_t* x) {
this->runOnIndex([n, x](int, IndexT* index){ index->add(n, x); });
this->ntotal += n;
}
template <typename IndexT>
void
IndexReplicasTemplate<IndexT>::reconstruct(idx_t n, component_t* x) const {
FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
// Just pass to the first replica
this->at(0)->reconstruct(n, x);
}
template <typename IndexT>
void
IndexReplicasTemplate<IndexT>::search(idx_t n,
const component_t* x,
idx_t k,
distance_t* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
if (n == 0) {
return;
}
auto dim = this->d;
size_t componentsPerVec =
sizeof(component_t) == 1 ? (dim + 7) / 8 : dim;
// Partition the query by the number of indices we have
faiss::Index::idx_t queriesPerIndex =
(faiss::Index::idx_t) (n + this->count() - 1) /
(faiss::Index::idx_t) this->count();
FAISS_ASSERT(n / queriesPerIndex <= this->count());
auto fn =
[queriesPerIndex, componentsPerVec,
n, x, k, distances, labels](int i, const IndexT* index) {
faiss::Index::idx_t base = (faiss::Index::idx_t) i * queriesPerIndex;
if (base < n) {
auto numForIndex = std::min(queriesPerIndex, n - base);
index->search(numForIndex,
x + base * componentsPerVec,
k,
distances + base * k,
labels + base * k);
}
};
this->runOnIndex(fn);
}
// explicit instantiations
template struct IndexReplicasTemplate<Index>;
template struct IndexReplicasTemplate<IndexBinary>;
} // namespace

View File

@ -0,0 +1,76 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>
#include <faiss/impl/ThreadedIndex.h>
namespace faiss {
/// Takes individual faiss::Index instances, and splits queries for
/// sending to each Index instance, and joins the results together
/// when done.
/// Each index is managed by a separate CPU thread.
template <typename IndexT>
class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
public:
using idx_t = typename IndexT::idx_t;
using component_t = typename IndexT::component_t;
using distance_t = typename IndexT::distance_t;
/// The dimension that all sub-indices must share will be the dimension of the
/// first sub-index added
/// @param threaded do we use one thread per sub-index or do queries
/// sequentially?
explicit IndexReplicasTemplate(bool threaded = true);
/// @param d the dimension that all sub-indices must share
/// @param threaded do we use one thread per sub index or do queries
/// sequentially?
explicit IndexReplicasTemplate(idx_t d, bool threaded = true);
/// int version due to the implicit bool conversion ambiguity of int as
/// dimension
explicit IndexReplicasTemplate(int d, bool threaded = true);
/// Alias for addIndex()
void add_replica(IndexT* index) { this->addIndex(index); }
/// Alias for removeIndex()
void remove_replica(IndexT* index) { this->removeIndex(index); }
/// faiss::Index API
/// All indices receive the same call
void train(idx_t n, const component_t* x) override;
/// faiss::Index API
/// All indices receive the same call
void add(idx_t n, const component_t* x) override;
/// faiss::Index API
/// Query is partitioned into a slice for each sub-index
/// split by ceil(n / #indices) for our sub-indices
void search(idx_t n,
const component_t* x,
idx_t k,
distance_t* distances,
idx_t* labels) const override;
/// reconstructs from the first index
void reconstruct(idx_t, component_t *v) const override;
protected:
/// Called just after an index is added
void onAfterAddIndex(IndexT* index) override;
};
using IndexReplicas = IndexReplicasTemplate<Index>;
using IndexBinaryReplicas = IndexReplicasTemplate<IndexBinary>;
} // namespace

View File

@ -0,0 +1,183 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <cstdio>
#include <algorithm>
#include <omp.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/IndexSQHybrid.h>
namespace faiss {
/*******************************************************************
* IndexIVFSQHybrid implementation
********************************************************************/
IndexIVFSQHybrid::IndexIVFSQHybrid (
Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
MetricType metric, bool encode_residual)
: IndexIVF(quantizer, d, nlist, 0, metric),
sq(d, qtype),
by_residual(encode_residual)
{
code_size = sq.code_size;
// was not known at construction time
invlists->code_size = code_size;
is_trained = false;
}
IndexIVFSQHybrid::IndexIVFSQHybrid ():
IndexIVF(),
by_residual(true)
{
}
void IndexIVFSQHybrid::train_residual (idx_t n, const float *x)
{
sq.train_residual(n, x, quantizer, by_residual, verbose);
}
void IndexIVFSQHybrid::encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = include_listnos ? coarse_code_size () : 0;
memset(codes, 0, (code_size + coarse_size) * n);
#pragma omp parallel if(n > 1)
{
std::vector<float> residual (d);
#pragma omp for
for (size_t i = 0; i < n; i++) {
int64_t list_no = list_nos [i];
if (list_no >= 0) {
const float *xi = x + i * d;
uint8_t *code = codes + i * (code_size + coarse_size);
if (by_residual) {
quantizer->compute_residual (
xi, residual.data(), list_no);
xi = residual.data ();
}
if (coarse_size) {
encode_listno (list_no, code);
}
squant->encode_vector (xi, code + coarse_size);
}
}
}
}
void IndexIVFSQHybrid::sa_decode (idx_t n, const uint8_t *codes,
float *x) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = coarse_code_size ();
#pragma omp parallel if(n > 1)
{
std::vector<float> residual (d);
#pragma omp for
for (size_t i = 0; i < n; i++) {
const uint8_t *code = codes + i * (code_size + coarse_size);
int64_t list_no = decode_listno (code);
float *xi = x + i * d;
squant->decode_vector (code + coarse_size, xi);
if (by_residual) {
quantizer->reconstruct (list_no, residual.data());
for (size_t j = 0; j < d; j++) {
xi[j] += residual[j];
}
}
}
}
}
void IndexIVFSQHybrid::add_with_ids
(idx_t n, const float * x, const idx_t *xids)
{
FAISS_THROW_IF_NOT (is_trained);
std::unique_ptr<int64_t []> idx (new int64_t [n]);
quantizer->assign (n, x, idx.get());
size_t nadd = 0;
std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
#pragma omp parallel reduction(+: nadd)
{
std::vector<float> residual (d);
std::vector<uint8_t> one_code (code_size);
int nt = omp_get_num_threads();
int rank = omp_get_thread_num();
// each thread takes care of a subset of lists
for (size_t i = 0; i < n; i++) {
int64_t list_no = idx [i];
if (list_no >= 0 && list_no % nt == rank) {
int64_t id = xids ? xids[i] : ntotal + i;
const float * xi = x + i * d;
if (by_residual) {
quantizer->compute_residual (xi, residual.data(), list_no);
xi = residual.data();
}
memset (one_code.data(), 0, code_size);
squant->encode_vector (xi, one_code.data());
invlists->add_entry (list_no, id, one_code.data());
nadd++;
}
}
}
ntotal += n;
}
InvertedListScanner* IndexIVFSQHybrid::get_InvertedListScanner
(bool store_pairs) const
{
return sq.select_InvertedListScanner (metric_type, quantizer, store_pairs,
by_residual);
}
void IndexIVFSQHybrid::reconstruct_from_offset (int64_t list_no,
int64_t offset,
float* recons) const
{
std::vector<float> centroid(d);
quantizer->reconstruct (list_no, centroid.data());
const uint8_t* code = invlists->get_single_code (list_no, offset);
sq.decode (code, recons, 1);
for (int i = 0; i < d; ++i) {
recons[i] += centroid[i];
}
}
} // namespace faiss

View File

@ -0,0 +1,65 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_SQ_HYBRID_H
#define FAISS_INDEX_SQ_HYBRID_H
#include <stdint.h>
#include <vector>
#include <faiss/IndexIVF.h>
#include <faiss/impl/ScalarQuantizer.h>
namespace faiss {
/** An IVF implementation where the components of the residuals are
* encoded with a scalar uniform quantizer. All distance computations
* are asymmetric, so the encoded vectors are decoded and approximate
* distances are computed.
*/
struct IndexIVFSQHybrid: IndexIVF {
ScalarQuantizer sq;
bool by_residual;
IndexIVFSQHybrid(Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
MetricType metric = METRIC_L2,
bool encode_residual = true);
IndexIVFSQHybrid();
void train_residual(idx_t n, const float* x) override;
void encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos=false) const override;
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
const override;
void reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const override;
/* standalone codec interface */
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
};
}
#endif

View File

@ -0,0 +1,317 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexScalarQuantizer.h>
#include <cstdio>
#include <algorithm>
#include <omp.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/ScalarQuantizer.h>
namespace faiss {
/*******************************************************************
* IndexScalarQuantizer implementation
********************************************************************/
IndexScalarQuantizer::IndexScalarQuantizer
(int d, ScalarQuantizer::QuantizerType qtype,
MetricType metric):
Index(d, metric),
sq (d, qtype)
{
is_trained =
qtype == ScalarQuantizer::QT_fp16 ||
qtype == ScalarQuantizer::QT_8bit_direct;
code_size = sq.code_size;
}
IndexScalarQuantizer::IndexScalarQuantizer ():
IndexScalarQuantizer(0, ScalarQuantizer::QT_8bit)
{}
void IndexScalarQuantizer::train(idx_t n, const float* x)
{
sq.train(n, x);
is_trained = true;
}
void IndexScalarQuantizer::add(idx_t n, const float* x)
{
FAISS_THROW_IF_NOT (is_trained);
codes.resize ((n + ntotal) * code_size);
sq.compute_codes (x, &codes[ntotal * code_size], n);
ntotal += n;
}
void IndexScalarQuantizer::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const
{
FAISS_THROW_IF_NOT (is_trained);
FAISS_THROW_IF_NOT (metric_type == METRIC_L2 ||
metric_type == METRIC_INNER_PRODUCT);
#pragma omp parallel
{
InvertedListScanner* scanner = sq.select_InvertedListScanner
(metric_type, nullptr, true);
ScopeDeleter1<InvertedListScanner> del(scanner);
#pragma omp for
for (size_t i = 0; i < n; i++) {
float * D = distances + k * i;
idx_t * I = labels + k * i;
// re-order heap
if (metric_type == METRIC_L2) {
maxheap_heapify (k, D, I);
} else {
minheap_heapify (k, D, I);
}
scanner->set_query (x + i * d);
scanner->scan_codes (ntotal, codes.data(),
nullptr, D, I, k);
// re-order heap
if (metric_type == METRIC_L2) {
maxheap_reorder (k, D, I);
} else {
minheap_reorder (k, D, I);
}
}
}
}
DistanceComputer *IndexScalarQuantizer::get_distance_computer () const
{
ScalarQuantizer::SQDistanceComputer *dc =
sq.get_distance_computer (metric_type);
dc->code_size = sq.code_size;
dc->codes = codes.data();
return dc;
}
void IndexScalarQuantizer::reset()
{
codes.clear();
ntotal = 0;
}
void IndexScalarQuantizer::reconstruct_n(
idx_t i0, idx_t ni, float* recons) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
for (size_t i = 0; i < ni; i++) {
squant->decode_vector(&codes[(i + i0) * code_size], recons + i * d);
}
}
void IndexScalarQuantizer::reconstruct(idx_t key, float* recons) const
{
reconstruct_n(key, 1, recons);
}
/* Codec interface */
size_t IndexScalarQuantizer::sa_code_size () const
{
return sq.code_size;
}
void IndexScalarQuantizer::sa_encode (idx_t n, const float *x,
uint8_t *bytes) const
{
FAISS_THROW_IF_NOT (is_trained);
sq.compute_codes (x, bytes, n);
}
void IndexScalarQuantizer::sa_decode (idx_t n, const uint8_t *bytes,
float *x) const
{
FAISS_THROW_IF_NOT (is_trained);
sq.decode(bytes, x, n);
}
/*******************************************************************
* IndexIVFScalarQuantizer implementation
********************************************************************/
IndexIVFScalarQuantizer::IndexIVFScalarQuantizer (
Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
MetricType metric, bool encode_residual)
: IndexIVF(quantizer, d, nlist, 0, metric),
sq(d, qtype),
by_residual(encode_residual)
{
code_size = sq.code_size;
// was not known at construction time
invlists->code_size = code_size;
is_trained = false;
}
IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ():
IndexIVF(),
by_residual(true)
{
}
void IndexIVFScalarQuantizer::train_residual (idx_t n, const float *x)
{
sq.train_residual(n, x, quantizer, by_residual, verbose);
}
void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = include_listnos ? coarse_code_size () : 0;
memset(codes, 0, (code_size + coarse_size) * n);
#pragma omp parallel if(n > 1)
{
std::vector<float> residual (d);
#pragma omp for
for (size_t i = 0; i < n; i++) {
int64_t list_no = list_nos [i];
if (list_no >= 0) {
const float *xi = x + i * d;
uint8_t *code = codes + i * (code_size + coarse_size);
if (by_residual) {
quantizer->compute_residual (
xi, residual.data(), list_no);
xi = residual.data ();
}
if (coarse_size) {
encode_listno (list_no, code);
}
squant->encode_vector (xi, code + coarse_size);
}
}
}
}
void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
float *x) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = coarse_code_size ();
#pragma omp parallel if(n > 1)
{
std::vector<float> residual (d);
#pragma omp for
for (size_t i = 0; i < n; i++) {
const uint8_t *code = codes + i * (code_size + coarse_size);
int64_t list_no = decode_listno (code);
float *xi = x + i * d;
squant->decode_vector (code + coarse_size, xi);
if (by_residual) {
quantizer->reconstruct (list_no, residual.data());
for (size_t j = 0; j < d; j++) {
xi[j] += residual[j];
}
}
}
}
}
void IndexIVFScalarQuantizer::add_with_ids
(idx_t n, const float * x, const idx_t *xids)
{
FAISS_THROW_IF_NOT (is_trained);
std::unique_ptr<int64_t []> idx (new int64_t [n]);
quantizer->assign (n, x, idx.get());
size_t nadd = 0;
std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
#pragma omp parallel reduction(+: nadd)
{
std::vector<float> residual (d);
std::vector<uint8_t> one_code (code_size);
int nt = omp_get_num_threads();
int rank = omp_get_thread_num();
// each thread takes care of a subset of lists
for (size_t i = 0; i < n; i++) {
int64_t list_no = idx [i];
if (list_no >= 0 && list_no % nt == rank) {
int64_t id = xids ? xids[i] : ntotal + i;
const float * xi = x + i * d;
if (by_residual) {
quantizer->compute_residual (xi, residual.data(), list_no);
xi = residual.data();
}
memset (one_code.data(), 0, code_size);
squant->encode_vector (xi, one_code.data());
invlists->add_entry (list_no, id, one_code.data());
nadd++;
}
}
}
ntotal += n;
}
InvertedListScanner* IndexIVFScalarQuantizer::get_InvertedListScanner
(bool store_pairs) const
{
return sq.select_InvertedListScanner (metric_type, quantizer, store_pairs,
by_residual);
}
void IndexIVFScalarQuantizer::reconstruct_from_offset (int64_t list_no,
int64_t offset,
float* recons) const
{
std::vector<float> centroid(d);
quantizer->reconstruct (list_no, centroid.data());
const uint8_t* code = invlists->get_single_code (list_no, offset);
sq.decode (code, recons, 1);
for (int i = 0; i < d; ++i) {
recons[i] += centroid[i];
}
}
} // namespace faiss

View File

@ -0,0 +1,127 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_SCALAR_QUANTIZER_H
#define FAISS_INDEX_SCALAR_QUANTIZER_H
#include <stdint.h>
#include <vector>
#include <faiss/IndexIVF.h>
#include <faiss/impl/ScalarQuantizer.h>
namespace faiss {
/**
* The uniform quantizer has a range [vmin, vmax]. The range can be
* the same for all dimensions (uniform) or specific per dimension
* (default).
*/
struct IndexScalarQuantizer: Index {
/// Used to encode the vectors
ScalarQuantizer sq;
/// Codes. Size ntotal * pq.code_size
std::vector<uint8_t> codes;
size_t code_size;
/** Constructor.
*
* @param d dimensionality of the input vectors
* @param M number of subquantizers
* @param nbits number of bit per subvector index
*/
IndexScalarQuantizer (int d,
ScalarQuantizer::QuantizerType qtype,
MetricType metric = METRIC_L2);
IndexScalarQuantizer ();
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void reset() override;
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
DistanceComputer *get_distance_computer () const override;
/* standalone codec interface */
size_t sa_code_size () const override;
void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const override;
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
};
/** An IVF implementation where the components of the residuals are
* encoded with a scalar uniform quantizer. All distance computations
* are asymmetric, so the encoded vectors are decoded and approximate
* distances are computed.
*/
struct IndexIVFScalarQuantizer: IndexIVF {
ScalarQuantizer sq;
bool by_residual;
IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
MetricType metric = METRIC_L2,
bool encode_residual = true);
IndexIVFScalarQuantizer();
void train_residual(idx_t n, const float* x) override;
void encode_vectors(idx_t n, const float* x,
const idx_t *list_nos,
uint8_t * codes,
bool include_listnos=false) const override;
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
const override;
void reconstruct_from_offset (int64_t list_no, int64_t offset,
float* recons) const override;
/* standalone codec interface */
void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const override;
};
}
#endif

View File

@ -0,0 +1,317 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexShards.h>
#include <cstdio>
#include <functional>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/WorkerThread.h>
namespace faiss {
// subroutines
namespace {
typedef Index::idx_t idx_t;
// add translation to all valid labels
void translate_labels (long n, idx_t *labels, long translation)
{
if (translation == 0) return;
for (long i = 0; i < n; i++) {
if(labels[i] < 0) continue;
labels[i] += translation;
}
}
/** merge result tables from several shards.
* @param all_distances size nshard * n * k
* @param all_labels idem
* @param translartions label translations to apply, size nshard
*/
template <class IndexClass, class C>
void
merge_tables(long n, long k, long nshard,
typename IndexClass::distance_t *distances,
idx_t *labels,
const std::vector<typename IndexClass::distance_t>& all_distances,
const std::vector<idx_t>& all_labels,
const std::vector<long>& translations) {
if (k == 0) {
return;
}
using distance_t = typename IndexClass::distance_t;
long stride = n * k;
#pragma omp parallel
{
std::vector<int> buf (2 * nshard);
int * pointer = buf.data();
int * shard_ids = pointer + nshard;
std::vector<distance_t> buf2 (nshard);
distance_t * heap_vals = buf2.data();
#pragma omp for
for (long i = 0; i < n; i++) {
// the heap maps values to the shard where they are
// produced.
const distance_t *D_in = all_distances.data() + i * k;
const idx_t *I_in = all_labels.data() + i * k;
int heap_size = 0;
for (long s = 0; s < nshard; s++) {
pointer[s] = 0;
if (I_in[stride * s] >= 0) {
heap_push<C> (++heap_size, heap_vals, shard_ids,
D_in[stride * s], s);
}
}
distance_t *D = distances + i * k;
idx_t *I = labels + i * k;
for (int j = 0; j < k; j++) {
if (heap_size == 0) {
I[j] = -1;
D[j] = C::neutral();
} else {
// pop best element
int s = shard_ids[0];
int & p = pointer[s];
D[j] = heap_vals[0];
I[j] = I_in[stride * s + p] + translations[s];
heap_pop<C> (heap_size--, heap_vals, shard_ids);
p++;
if (p < k && I_in[stride * s + p] >= 0) {
heap_push<C> (++heap_size, heap_vals, shard_ids,
D_in[stride * s + p], s);
}
}
}
}
}
}
} // anonymous namespace
template <typename IndexT>
IndexShardsTemplate<IndexT>::IndexShardsTemplate(idx_t d,
bool threaded,
bool successive_ids)
: ThreadedIndex<IndexT>(d, threaded),
successive_ids(successive_ids) {
}
template <typename IndexT>
IndexShardsTemplate<IndexT>::IndexShardsTemplate(int d,
bool threaded,
bool successive_ids)
: ThreadedIndex<IndexT>(d, threaded),
successive_ids(successive_ids) {
}
template <typename IndexT>
IndexShardsTemplate<IndexT>::IndexShardsTemplate(bool threaded,
bool successive_ids)
: ThreadedIndex<IndexT>(threaded),
successive_ids(successive_ids) {
}
template <typename IndexT>
void
IndexShardsTemplate<IndexT>::onAfterAddIndex(IndexT* index /* unused */) {
sync_with_shard_indexes();
}
template <typename IndexT>
void
IndexShardsTemplate<IndexT>::onAfterRemoveIndex(IndexT* index /* unused */) {
sync_with_shard_indexes();
}
template <typename IndexT>
void
IndexShardsTemplate<IndexT>::sync_with_shard_indexes() {
if (!this->count()) {
this->is_trained = false;
this->ntotal = 0;
return;
}
auto firstIndex = this->at(0);
this->metric_type = firstIndex->metric_type;
this->is_trained = firstIndex->is_trained;
this->ntotal = firstIndex->ntotal;
for (int i = 1; i < this->count(); ++i) {
auto index = this->at(i);
FAISS_THROW_IF_NOT(this->metric_type == index->metric_type);
FAISS_THROW_IF_NOT(this->d == index->d);
this->ntotal += index->ntotal;
}
}
template <typename IndexT>
void
IndexShardsTemplate<IndexT>::train(idx_t n,
const component_t *x) {
auto fn =
[n, x](int no, IndexT *index) {
if (index->verbose) {
printf("begin train shard %d on %ld points\n", no, n);
}
index->train(n, x);
if (index->verbose) {
printf("end train shard %d\n", no);
}
};
this->runOnIndex(fn);
sync_with_shard_indexes();
}
template <typename IndexT>
void
IndexShardsTemplate<IndexT>::add(idx_t n,
const component_t *x) {
add_with_ids(n, x, nullptr);
}
template <typename IndexT>
void
IndexShardsTemplate<IndexT>::add_with_ids(idx_t n,
const component_t * x,
const idx_t *xids) {
FAISS_THROW_IF_NOT_MSG(!(successive_ids && xids),
"It makes no sense to pass in ids and "
"request them to be shifted");
if (successive_ids) {
FAISS_THROW_IF_NOT_MSG(!xids,
"It makes no sense to pass in ids and "
"request them to be shifted");
FAISS_THROW_IF_NOT_MSG(this->ntotal == 0,
"when adding to IndexShards with sucessive_ids, "
"only add() in a single pass is supported");
}
idx_t nshard = this->count();
const idx_t *ids = xids;
std::vector<idx_t> aids;
if (!ids && !successive_ids) {
aids.resize(n);
for (idx_t i = 0; i < n; i++) {
aids[i] = this->ntotal + i;
}
ids = aids.data();
}
size_t components_per_vec =
sizeof(component_t) == 1 ? (this->d + 7) / 8 : this->d;
auto fn =
[n, ids, x, nshard, components_per_vec](int no, IndexT *index) {
idx_t i0 = (idx_t) no * n / nshard;
idx_t i1 = ((idx_t) no + 1) * n / nshard;
auto x0 = x + i0 * components_per_vec;
if (index->verbose) {
printf ("begin add shard %d on %ld points\n", no, n);
}
if (ids) {
index->add_with_ids (i1 - i0, x0, ids + i0);
} else {
index->add (i1 - i0, x0);
}
if (index->verbose) {
printf ("end add shard %d on %ld points\n", no, i1 - i0);
}
};
this->runOnIndex(fn);
// This is safe to do here because the current thread controls execution in
// all threads, and nothing else is happening
this->ntotal += n;
}
template <typename IndexT>
void
IndexShardsTemplate<IndexT>::search(idx_t n,
const component_t *x,
idx_t k,
distance_t *distances,
idx_t *labels) const {
long nshard = this->count();
std::vector<distance_t> all_distances(nshard * k * n);
std::vector<idx_t> all_labels(nshard * k * n);
auto fn =
[n, k, x, &all_distances, &all_labels](int no, const IndexT *index) {
if (index->verbose) {
printf ("begin query shard %d on %ld points\n", no, n);
}
index->search (n, x, k,
all_distances.data() + no * k * n,
all_labels.data() + no * k * n);
if (index->verbose) {
printf ("end query shard %d\n", no);
}
};
this->runOnIndex(fn);
std::vector<long> translations(nshard, 0);
// Because we just called runOnIndex above, it is safe to access the sub-index
// ntotal here
if (successive_ids) {
translations[0] = 0;
for (int s = 0; s + 1 < nshard; s++) {
translations[s + 1] = translations[s] + this->at(s)->ntotal;
}
}
if (this->metric_type == METRIC_L2) {
merge_tables<IndexT, CMin<distance_t, int>>(
n, k, nshard, distances, labels,
all_distances, all_labels, translations);
} else {
merge_tables<IndexT, CMax<distance_t, int>>(
n, k, nshard, distances, labels,
all_distances, all_labels, translations);
}
}
// explicit instanciations
template struct IndexShardsTemplate<Index>;
template struct IndexShardsTemplate<IndexBinary>;
} // namespace faiss

View File

@ -0,0 +1,100 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>
#include <faiss/impl/ThreadedIndex.h>
namespace faiss {
/**
* Index that concatenates the results from several sub-indexes
*/
template <typename IndexT>
struct IndexShardsTemplate : public ThreadedIndex<IndexT> {
using idx_t = typename IndexT::idx_t;
using component_t = typename IndexT::component_t;
using distance_t = typename IndexT::distance_t;
/**
* The dimension that all sub-indices must share will be the dimension of the
* first sub-index added
*
* @param threaded do we use one thread per sub_index or do
* queries sequentially?
* @param successive_ids should we shift the returned ids by
* the size of each sub-index or return them
* as they are?
*/
explicit IndexShardsTemplate(bool threaded = false,
bool successive_ids = true);
/**
* @param threaded do we use one thread per sub_index or do
* queries sequentially?
* @param successive_ids should we shift the returned ids by
* the size of each sub-index or return them
* as they are?
*/
explicit IndexShardsTemplate(idx_t d,
bool threaded = false,
bool successive_ids = true);
/// int version due to the implicit bool conversion ambiguity of int as
/// dimension
explicit IndexShardsTemplate(int d,
bool threaded = false,
bool successive_ids = true);
/// Alias for addIndex()
void add_shard(IndexT* index) { this->addIndex(index); }
/// Alias for removeIndex()
void remove_shard(IndexT* index) { this->removeIndex(index); }
/// supported only for sub-indices that implement add_with_ids
void add(idx_t n, const component_t* x) override;
/**
* Cases (successive_ids, xids):
* - true, non-NULL ERROR: it makes no sense to pass in ids and
* request them to be shifted
* - true, NULL OK, but should be called only once (calls add()
* on sub-indexes).
* - false, non-NULL OK: will call add_with_ids with passed in xids
* distributed evenly over shards
* - false, NULL OK: will call add_with_ids on each sub-index,
* starting at ntotal
*/
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
void search(idx_t n, const component_t* x, idx_t k,
distance_t* distances, idx_t* labels) const override;
void train(idx_t n, const component_t* x) override;
// update metric_type and ntotal. Call if you changes something in
// the shard indexes.
void sync_with_shard_indexes();
bool successive_ids;
protected:
/// Called just after an index is added
void onAfterAddIndex(IndexT* index) override;
/// Called just after an index is removed
void onAfterRemoveIndex(IndexT* index) override;
};
using IndexShards = IndexShardsTemplate<Index>;
using IndexBinaryShards = IndexShardsTemplate<IndexBinary>;
} // namespace faiss

View File

@ -0,0 +1,805 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/InvertedLists.h>
#include <cstdio>
#include <numeric>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include "gpu/utils/DeviceUtils.h"
#include "cuda.h"
#include "cuda_runtime.h"
namespace faiss {
PageLockMemory::PageLockMemory(size_t size) : nbytes(size) {
CUDA_VERIFY(cudaHostAlloc(&data, size, 0));
}
PageLockMemory::~PageLockMemory() {
CUDA_VERIFY(cudaFreeHost((void*)data));
}
PageLockMemory::PageLockMemory(const PageLockMemory& other) {
CUDA_VERIFY(cudaHostAlloc(&data, other.nbytes, 0));
memcpy(data, other.data, other.nbytes);
nbytes = other.nbytes;
}
PageLockMemory::PageLockMemory(PageLockMemory &&other) {
data = other.data;
nbytes = other.nbytes;
other.data = nullptr;
other.nbytes = 0;
}
}
namespace faiss {
using ScopedIds = InvertedLists::ScopedIds;
using ScopedCodes = InvertedLists::ScopedCodes;
/*****************************************
* InvertedLists implementation
******************************************/
InvertedLists::InvertedLists (size_t nlist, size_t code_size):
nlist (nlist), code_size (code_size)
{
}
InvertedLists::~InvertedLists ()
{}
InvertedLists::idx_t InvertedLists::get_single_id (
size_t list_no, size_t offset) const
{
assert (offset < list_size (list_no));
return get_ids(list_no)[offset];
}
void InvertedLists::release_codes (size_t, const uint8_t *) const
{}
void InvertedLists::release_ids (size_t, const idx_t *) const
{}
void InvertedLists::prefetch_lists (const idx_t *, int) const
{}
const uint8_t * InvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
assert (offset < list_size (list_no));
return get_codes(list_no) + offset * code_size;
}
size_t InvertedLists::add_entry (size_t list_no, idx_t theid,
const uint8_t *code)
{
return add_entries (list_no, 1, &theid, code);
}
void InvertedLists::update_entry (size_t list_no, size_t offset,
idx_t id, const uint8_t *code)
{
update_entries (list_no, offset, 1, &id, code);
}
InvertedLists* InvertedLists::to_readonly() {
return nullptr;
}
bool InvertedLists::is_readonly() const {
return false;
}
void InvertedLists::reset () {
for (size_t i = 0; i < nlist; i++) {
resize (i, 0);
}
}
void InvertedLists::merge_from (InvertedLists *oivf, size_t add_id) {
#pragma omp parallel for
for (idx_t i = 0; i < nlist; i++) {
size_t list_size = oivf->list_size (i);
ScopedIds ids (oivf, i);
if (add_id == 0) {
add_entries (i, list_size, ids.get (),
ScopedCodes (oivf, i).get());
} else {
std::vector <idx_t> new_ids (list_size);
for (size_t j = 0; j < list_size; j++) {
new_ids [j] = ids[j] + add_id;
}
add_entries (i, list_size, new_ids.data(),
ScopedCodes (oivf, i).get());
}
oivf->resize (i, 0);
}
}
double InvertedLists::imbalance_factor () const {
std::vector<int> hist(nlist);
for (size_t i = 0; i < nlist; i++) {
hist[i] = list_size(i);
}
return faiss::imbalance_factor(nlist, hist.data());
}
void InvertedLists::print_stats () const {
std::vector<int> sizes(40);
for (size_t i = 0; i < nlist; i++) {
for (size_t j = 0; j < sizes.size(); j++) {
if ((list_size(i) >> j) == 0) {
sizes[j]++;
break;
}
}
}
for (size_t i = 0; i < sizes.size(); i++) {
if (sizes[i]) {
printf("list size in < %d: %d instances\n", 1 << i, sizes[i]);
}
}
}
size_t InvertedLists::compute_ntotal () const {
size_t tot = 0;
for (size_t i = 0; i < nlist; i++) {
tot += list_size(i);
}
return tot;
}
/*****************************************
* ArrayInvertedLists implementation
******************************************/
ArrayInvertedLists::ArrayInvertedLists (size_t nlist, size_t code_size):
InvertedLists (nlist, code_size)
{
ids.resize (nlist);
codes.resize (nlist);
}
size_t ArrayInvertedLists::add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids_in, const uint8_t *code)
{
if (n_entry == 0) return 0;
assert (list_no < nlist);
size_t o = ids [list_no].size();
ids [list_no].resize (o + n_entry);
memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
codes [list_no].resize ((o + n_entry) * code_size);
memcpy (&codes[list_no][o * code_size], code, code_size * n_entry);
return o;
}
size_t ArrayInvertedLists::list_size(size_t list_no) const
{
assert (list_no < nlist);
return ids[list_no].size();
}
const uint8_t * ArrayInvertedLists::get_codes (size_t list_no) const
{
assert (list_no < nlist);
return codes[list_no].data();
}
const InvertedLists::idx_t * ArrayInvertedLists::get_ids (size_t list_no) const
{
assert (list_no < nlist);
return ids[list_no].data();
}
void ArrayInvertedLists::resize (size_t list_no, size_t new_size)
{
ids[list_no].resize (new_size);
codes[list_no].resize (new_size * code_size);
}
void ArrayInvertedLists::update_entries (
size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids_in, const uint8_t *codes_in)
{
assert (list_no < nlist);
assert (n_entry + offset <= ids[list_no].size());
memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
}
InvertedLists* ArrayInvertedLists::to_readonly() {
ReadOnlyArrayInvertedLists* readonly = new ReadOnlyArrayInvertedLists(*this);
return readonly;
}
ArrayInvertedLists::~ArrayInvertedLists ()
{}
/*****************************************************************
* ReadOnlyArrayInvertedLists implementations
*****************************************************************/
ReadOnlyArrayInvertedLists::ReadOnlyArrayInvertedLists(size_t nlist,
size_t code_size, const std::vector<size_t>& list_length)
: InvertedLists (nlist, code_size),
readonly_length(list_length) {
valid = readonly_length.size() == nlist;
if (!valid) {
FAISS_THROW_MSG ("Invalid list_length");
return;
}
auto total_size = std::accumulate(readonly_length.begin(), readonly_length.end(), 0);
readonly_offset.reserve(nlist);
size_t offset = 0;
for (auto i=0; i<readonly_length.size(); ++i) {
readonly_offset.emplace_back(offset);
offset += readonly_length[i];
}
}
ReadOnlyArrayInvertedLists::ReadOnlyArrayInvertedLists(const ArrayInvertedLists& other)
: InvertedLists (other.nlist, other.code_size) {
std::vector <uint8_t> readonly_codes;
std::vector <idx_t> readonly_ids;
readonly_length.reserve(nlist);
size_t offset = 0;
for (auto& list_ids : other.ids) {
readonly_length.emplace_back(list_ids.size());
readonly_offset.emplace_back(offset);
offset += list_ids.size();
readonly_ids.insert(readonly_ids.end(), list_ids.begin(), list_ids.end());
}
for(auto& list_codes : other.codes) {
readonly_codes.insert(readonly_codes.end(), list_codes.begin(), list_codes.end());
}
// convert to page-lock memory
{
size_t size = readonly_codes.size() * sizeof(uint8_t);
pin_readonly_codes = std::make_shared<PageLockMemory>(size);
memcpy(pin_readonly_codes->data, readonly_codes.data(), size);
}
{
size_t size = readonly_ids.size() * sizeof(idx_t);
pin_readonly_ids = std::make_shared<PageLockMemory>(size);
memcpy(pin_readonly_ids->data, readonly_ids.data(), size);
}
valid = true;
}
//ReadOnlyArrayInvertedLists::ReadOnlyArrayInvertedLists(const ReadOnlyArrayInvertedLists &other)
// : InvertedLists (other.nlist, other.code_size) {
// readonly_length = other.readonly_length;
// readonly_offset = other.readonly_offset;
// pin_readonly_codes = std::make_shared<PageLockMemory>(*other.pin_readonly_codes);
// pin_readonly_ids = std::make_shared<PageLockMemory>(*other.pin_readonly_ids);
// valid = true;
//}
//ReadOnlyArrayInvertedLists::ReadOnlyArrayInvertedLists(ReadOnlyArrayInvertedLists &&other)
// : InvertedLists (other.nlist, other.code_size) {
// readonly_length = std::move(other.readonly_length);
// readonly_offset = std::move(other.readonly_offset);
// pin_readonly_codes = other.pin_readonly_codes;
// pin_readonly_ids = other.pin_readonly_ids;
//
// other.pin_readonly_codes = nullptr;
// other.pin_readonly_ids = nullptr;
// valid = true;
//}
ReadOnlyArrayInvertedLists::~ReadOnlyArrayInvertedLists() {
}
bool
ReadOnlyArrayInvertedLists::is_valid() {
return valid;
}
size_t ReadOnlyArrayInvertedLists::add_entries (
size_t , size_t ,
const idx_t* , const uint8_t *)
{
FAISS_THROW_MSG ("not implemented");
}
void ReadOnlyArrayInvertedLists::update_entries (size_t, size_t , size_t ,
const idx_t *, const uint8_t *)
{
FAISS_THROW_MSG ("not implemented");
}
void ReadOnlyArrayInvertedLists::resize (size_t , size_t )
{
FAISS_THROW_MSG ("not implemented");
}
size_t ReadOnlyArrayInvertedLists::list_size(size_t list_no) const
{
FAISS_ASSERT(list_no < nlist && valid);
return readonly_length[list_no];
}
const uint8_t * ReadOnlyArrayInvertedLists::get_codes (size_t list_no) const
{
FAISS_ASSERT(list_no < nlist && valid);
uint8_t *pcodes = (uint8_t *)(pin_readonly_codes->data);
return pcodes + readonly_offset[list_no] * code_size;
}
const InvertedLists::idx_t* ReadOnlyArrayInvertedLists::get_ids (size_t list_no) const
{
FAISS_ASSERT(list_no < nlist && valid);
idx_t *pids = (idx_t *)pin_readonly_ids->data;
return pids + readonly_offset[list_no];
}
const InvertedLists::idx_t* ReadOnlyArrayInvertedLists::get_all_ids() const {
FAISS_ASSERT(valid);
return (idx_t *)(pin_readonly_ids->data);
}
const uint8_t* ReadOnlyArrayInvertedLists::get_all_codes() const {
FAISS_ASSERT(valid);
return (uint8_t *)(pin_readonly_codes->data);
}
const std::vector<size_t>& ReadOnlyArrayInvertedLists::get_list_length() const {
FAISS_ASSERT(valid);
return readonly_length;
}
bool ReadOnlyArrayInvertedLists::is_readonly() const {
FAISS_ASSERT(valid);
return true;
}
/*****************************************************************
* Meta-inverted list implementations
*****************************************************************/
size_t ReadOnlyInvertedLists::add_entries (
size_t , size_t ,
const idx_t* , const uint8_t *)
{
FAISS_THROW_MSG ("not implemented");
}
void ReadOnlyInvertedLists::update_entries (size_t, size_t , size_t ,
const idx_t *, const uint8_t *)
{
FAISS_THROW_MSG ("not implemented");
}
void ReadOnlyInvertedLists::resize (size_t , size_t )
{
FAISS_THROW_MSG ("not implemented");
}
/*****************************************
* HStackInvertedLists implementation
******************************************/
HStackInvertedLists::HStackInvertedLists (
int nil, const InvertedLists **ils_in):
ReadOnlyInvertedLists (nil > 0 ? ils_in[0]->nlist : 0,
nil > 0 ? ils_in[0]->code_size : 0)
{
FAISS_THROW_IF_NOT (nil > 0);
for (int i = 0; i < nil; i++) {
ils.push_back (ils_in[i]);
FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size &&
ils_in[i]->nlist == nlist);
}
}
size_t HStackInvertedLists::list_size(size_t list_no) const
{
size_t sz = 0;
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
sz += il->list_size (list_no);
}
return sz;
}
const uint8_t * HStackInvertedLists::get_codes (size_t list_no) const
{
uint8_t *codes = new uint8_t [code_size * list_size(list_no)], *c = codes;
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size(list_no) * code_size;
if (sz > 0) {
memcpy (c, ScopedCodes (il, list_no).get(), sz);
c += sz;
}
}
return codes;
}
const uint8_t * HStackInvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size (list_no);
if (offset < sz) {
// here we have to copy the code, otherwise it will crash at dealloc
uint8_t * code = new uint8_t [code_size];
memcpy (code, ScopedCodes (il, list_no, offset).get(), code_size);
return code;
}
offset -= sz;
}
FAISS_THROW_FMT ("offset %ld unknown", offset);
}
void HStackInvertedLists::release_codes (size_t, const uint8_t *codes) const {
delete [] codes;
}
const Index::idx_t * HStackInvertedLists::get_ids (size_t list_no) const
{
idx_t *ids = new idx_t [list_size(list_no)], *c = ids;
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size(list_no);
if (sz > 0) {
memcpy (c, ScopedIds (il, list_no).get(), sz * sizeof(idx_t));
c += sz;
}
}
return ids;
}
Index::idx_t HStackInvertedLists::get_single_id (
size_t list_no, size_t offset) const
{
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size (list_no);
if (offset < sz) {
return il->get_single_id (list_no, offset);
}
offset -= sz;
}
FAISS_THROW_FMT ("offset %ld unknown", offset);
}
void HStackInvertedLists::release_ids (size_t, const idx_t *ids) const {
delete [] ids;
}
void HStackInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
{
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
il->prefetch_lists (list_nos, nlist);
}
}
/*****************************************
* SliceInvertedLists implementation
******************************************/
namespace {
using idx_t = InvertedLists::idx_t;
idx_t translate_list_no (const SliceInvertedLists *sil,
idx_t list_no) {
FAISS_THROW_IF_NOT (list_no >= 0 && list_no < sil->nlist);
return list_no + sil->i0;
}
};
SliceInvertedLists::SliceInvertedLists (
const InvertedLists *il, idx_t i0, idx_t i1):
ReadOnlyInvertedLists (i1 - i0, il->code_size),
il (il), i0(i0), i1(i1)
{
}
size_t SliceInvertedLists::list_size(size_t list_no) const
{
return il->list_size (translate_list_no (this, list_no));
}
const uint8_t * SliceInvertedLists::get_codes (size_t list_no) const
{
return il->get_codes (translate_list_no (this, list_no));
}
const uint8_t * SliceInvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
return il->get_single_code (translate_list_no (this, list_no), offset);
}
void SliceInvertedLists::release_codes (
size_t list_no, const uint8_t *codes) const {
return il->release_codes (translate_list_no (this, list_no), codes);
}
const Index::idx_t * SliceInvertedLists::get_ids (size_t list_no) const
{
return il->get_ids (translate_list_no (this, list_no));
}
Index::idx_t SliceInvertedLists::get_single_id (
size_t list_no, size_t offset) const
{
return il->get_single_id (translate_list_no (this, list_no), offset);
}
void SliceInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
return il->release_ids (translate_list_no (this, list_no), ids);
}
void SliceInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
{
std::vector<idx_t> translated_list_nos;
for (int j = 0; j < nlist; j++) {
idx_t list_no = list_nos[j];
if (list_no < 0) continue;
translated_list_nos.push_back (translate_list_no (this, list_no));
}
il->prefetch_lists (translated_list_nos.data(),
translated_list_nos.size());
}
/*****************************************
* VStackInvertedLists implementation
******************************************/
namespace {
using idx_t = InvertedLists::idx_t;
// find the invlist this number belongs to
int translate_list_no (const VStackInvertedLists *vil,
idx_t list_no) {
FAISS_THROW_IF_NOT (list_no >= 0 && list_no < vil->nlist);
int i0 = 0, i1 = vil->ils.size();
const idx_t *cumsz = vil->cumsz.data();
while (i0 + 1 < i1) {
int imed = (i0 + i1) / 2;
if (list_no >= cumsz[imed]) {
i0 = imed;
} else {
i1 = imed;
}
}
assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
return i0;
}
idx_t sum_il_sizes (int nil, const InvertedLists **ils_in) {
idx_t tot = 0;
for (int i = 0; i < nil; i++) {
tot += ils_in[i]->nlist;
}
return tot;
}
};
VStackInvertedLists::VStackInvertedLists (
int nil, const InvertedLists **ils_in):
ReadOnlyInvertedLists (sum_il_sizes(nil, ils_in),
nil > 0 ? ils_in[0]->code_size : 0)
{
FAISS_THROW_IF_NOT (nil > 0);
cumsz.resize (nil + 1);
for (int i = 0; i < nil; i++) {
ils.push_back (ils_in[i]);
FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size);
cumsz[i + 1] = cumsz[i] + ils_in[i]->nlist;
}
}
size_t VStackInvertedLists::list_size(size_t list_no) const
{
int i = translate_list_no (this, list_no);
list_no -= cumsz[i];
return ils[i]->list_size (list_no);
}
const uint8_t * VStackInvertedLists::get_codes (size_t list_no) const
{
int i = translate_list_no (this, list_no);
list_no -= cumsz[i];
return ils[i]->get_codes (list_no);
}
const uint8_t * VStackInvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
int i = translate_list_no (this, list_no);
list_no -= cumsz[i];
return ils[i]->get_single_code (list_no, offset);
}
void VStackInvertedLists::release_codes (
size_t list_no, const uint8_t *codes) const {
int i = translate_list_no (this, list_no);
list_no -= cumsz[i];
return ils[i]->release_codes (list_no, codes);
}
const Index::idx_t * VStackInvertedLists::get_ids (size_t list_no) const
{
int i = translate_list_no (this, list_no);
list_no -= cumsz[i];
return ils[i]->get_ids (list_no);
}
Index::idx_t VStackInvertedLists::get_single_id (
size_t list_no, size_t offset) const
{
int i = translate_list_no (this, list_no);
list_no -= cumsz[i];
return ils[i]->get_single_id (list_no, offset);
}
void VStackInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
int i = translate_list_no (this, list_no);
list_no -= cumsz[i];
return ils[i]->release_ids (list_no, ids);
}
void VStackInvertedLists::prefetch_lists (
const idx_t *list_nos, int nlist) const
{
std::vector<int> ilno (nlist, -1);
std::vector<int> n_per_il (ils.size(), 0);
for (int j = 0; j < nlist; j++) {
idx_t list_no = list_nos[j];
if (list_no < 0) continue;
int i = ilno[j] = translate_list_no (this, list_no);
n_per_il[i]++;
}
std::vector<int> cum_n_per_il (ils.size() + 1, 0);
for (int j = 0; j < ils.size(); j++) {
cum_n_per_il[j + 1] = cum_n_per_il[j] + n_per_il[j];
}
std::vector<idx_t> sorted_list_nos (cum_n_per_il.back());
for (int j = 0; j < nlist; j++) {
idx_t list_no = list_nos[j];
if (list_no < 0) continue;
int i = ilno[j];
list_no -= cumsz[i];
sorted_list_nos[cum_n_per_il[i]++] = list_no;
}
int i0 = 0;
for (int j = 0; j < ils.size(); j++) {
int i1 = i0 + n_per_il[j];
if (i1 > i0) {
ils[j]->prefetch_lists (sorted_list_nos.data() + i0,
i1 - i0);
}
i0 = i1;
}
}
/*****************************************
* MaskedInvertedLists implementation
******************************************/
MaskedInvertedLists::MaskedInvertedLists (const InvertedLists *il0,
const InvertedLists *il1):
ReadOnlyInvertedLists (il0->nlist, il0->code_size),
il0 (il0), il1 (il1)
{
FAISS_THROW_IF_NOT (il1->nlist == nlist);
FAISS_THROW_IF_NOT (il1->code_size == code_size);
}
size_t MaskedInvertedLists::list_size(size_t list_no) const
{
size_t sz = il0->list_size(list_no);
return sz ? sz : il1->list_size(list_no);
}
const uint8_t * MaskedInvertedLists::get_codes (size_t list_no) const
{
size_t sz = il0->list_size(list_no);
return (sz ? il0 : il1)->get_codes(list_no);
}
const idx_t * MaskedInvertedLists::get_ids (size_t list_no) const
{
size_t sz = il0->list_size (list_no);
return (sz ? il0 : il1)->get_ids (list_no);
}
void MaskedInvertedLists::release_codes (
size_t list_no, const uint8_t *codes) const
{
size_t sz = il0->list_size (list_no);
(sz ? il0 : il1)->release_codes (list_no, codes);
}
void MaskedInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
{
size_t sz = il0->list_size (list_no);
(sz ? il0 : il1)->release_ids (list_no, ids);
}
idx_t MaskedInvertedLists::get_single_id (size_t list_no, size_t offset) const
{
size_t sz = il0->list_size (list_no);
return (sz ? il0 : il1)->get_single_id (list_no, offset);
}
const uint8_t * MaskedInvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
size_t sz = il0->list_size (list_no);
return (sz ? il0 : il1)->get_single_code (list_no, offset);
}
void MaskedInvertedLists::prefetch_lists (
const idx_t *list_nos, int nlist) const
{
std::vector<idx_t> list0, list1;
for (int i = 0; i < nlist; i++) {
idx_t list_no = list_nos[i];
if (list_no < 0) continue;
size_t sz = il0->list_size(list_no);
(sz ? list0 : list1).push_back (list_no);
}
il0->prefetch_lists (list0.data(), list0.size());
il1->prefetch_lists (list1.data(), list1.size());
}
} // namespace faiss

View File

@ -0,0 +1,402 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INVERTEDLISTS_IVF_H
#define FAISS_INVERTEDLISTS_IVF_H
/**
* Definition of inverted lists + a few common classes that implement
* the interface.
*/
#include <memory>
#include <vector>
#include <faiss/Index.h>
namespace faiss {
struct PageLockMemory {
public:
PageLockMemory() : data(nullptr), nbytes(0) {}
PageLockMemory(size_t size);
~PageLockMemory();
PageLockMemory(const PageLockMemory& other);
PageLockMemory(PageLockMemory &&other);
inline size_t size() {
return nbytes;
}
void *data;
size_t nbytes;
};
using PageLockMemoryPtr = std::shared_ptr<PageLockMemory>;
}
namespace faiss {
/** Table of inverted lists
* multithreading rules:
* - concurrent read accesses are allowed
* - concurrent update accesses are allowed
* - for resize and add_entries, only concurrent access to different lists
* are allowed
*/
struct InvertedLists {
typedef Index::idx_t idx_t;
size_t nlist; ///< number of possible key values
size_t code_size; ///< code size per vector in bytes
InvertedLists (size_t nlist, size_t code_size);
/*************************
* Read only functions */
/// get the size of a list
virtual size_t list_size(size_t list_no) const = 0;
/** get the codes for an inverted list
* must be released by release_codes
*
* @return codes size list_size * code_size
*/
virtual const uint8_t * get_codes (size_t list_no) const = 0;
/** get the ids for an inverted list
* must be released by release_ids
*
* @return ids size list_size
*/
virtual const idx_t * get_ids (size_t list_no) const = 0;
/// release codes returned by get_codes (default implementation is nop
virtual void release_codes (size_t list_no, const uint8_t *codes) const;
/// release ids returned by get_ids
virtual void release_ids (size_t list_no, const idx_t *ids) const;
/// @return a single id in an inverted list
virtual idx_t get_single_id (size_t list_no, size_t offset) const;
/// @return a single code in an inverted list
/// (should be deallocated with release_codes)
virtual const uint8_t * get_single_code (
size_t list_no, size_t offset) const;
/// prepare the following lists (default does nothing)
/// a list can be -1 hence the signed long
virtual void prefetch_lists (const idx_t *list_nos, int nlist) const;
/*************************
* writing functions */
/// add one entry to an inverted list
virtual size_t add_entry (size_t list_no, idx_t theid,
const uint8_t *code);
virtual size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) = 0;
virtual void update_entry (size_t list_no, size_t offset,
idx_t id, const uint8_t *code);
virtual void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) = 0;
virtual void resize (size_t list_no, size_t new_size) = 0;
virtual void reset ();
virtual InvertedLists* to_readonly();
virtual bool is_readonly() const;
/// move all entries from oivf (empty on output)
void merge_from (InvertedLists *oivf, size_t add_id);
virtual ~InvertedLists ();
/*************************
* statistics */
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor () const;
/// display some stats about the inverted lists
void print_stats () const;
/// sum up list sizes
size_t compute_ntotal () const;
/**************************************
* Scoped inverted lists (for automatic deallocation)
*
* instead of writing:
*
* uint8_t * codes = invlists->get_codes (10);
* ... use codes
* invlists->release_codes(10, codes)
*
* write:
*
* ScopedCodes codes (invlists, 10);
* ... use codes.get()
* // release called automatically when codes goes out of scope
*
* the following function call also works:
*
* foo (123, ScopedCodes (invlists, 10).get(), 456);
*
*/
struct ScopedIds {
const InvertedLists *il;
const idx_t *ids;
size_t list_no;
ScopedIds (const InvertedLists *il, size_t list_no):
il (il), ids (il->get_ids (list_no)), list_no (list_no)
{}
const idx_t *get() {return ids; }
idx_t operator [] (size_t i) const {
return ids[i];
}
~ScopedIds () {
il->release_ids (list_no, ids);
}
};
struct ScopedCodes {
const InvertedLists *il;
const uint8_t *codes;
size_t list_no;
ScopedCodes (const InvertedLists *il, size_t list_no):
il (il), codes (il->get_codes (list_no)), list_no (list_no)
{}
ScopedCodes (const InvertedLists *il, size_t list_no, size_t offset):
il (il), codes (il->get_single_code (list_no, offset)),
list_no (list_no)
{}
const uint8_t *get() {return codes; }
~ScopedCodes () {
il->release_codes (list_no, codes);
}
};
};
/// simple (default) implementation as an array of inverted lists
struct ArrayInvertedLists: InvertedLists {
std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
std::vector < std::vector<idx_t> > ids; ///< Inverted lists for indexes
ArrayInvertedLists (size_t nlist, size_t code_size);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
InvertedLists* to_readonly() override;
virtual ~ArrayInvertedLists ();
};
struct ReadOnlyArrayInvertedLists: InvertedLists {
PageLockMemoryPtr pin_readonly_codes;
PageLockMemoryPtr pin_readonly_ids;
// std::vector <uint8_t> readonly_codes;
// std::vector <idx_t> readonly_ids;
std::vector <size_t> readonly_length;
std::vector <size_t> readonly_offset;
bool valid;
ReadOnlyArrayInvertedLists(size_t nlist, size_t code_size, const std::vector<size_t>& list_length);
explicit ReadOnlyArrayInvertedLists(const ArrayInvertedLists& other);
// Use default copy construct, just copy pointer, DON'T COPY pin_readonly_codes AND pin_readonly_ids
// explicit ReadOnlyArrayInvertedLists(const ReadOnlyArrayInvertedLists &);
// explicit ReadOnlyArrayInvertedLists(ReadOnlyArrayInvertedLists &&);
virtual ~ReadOnlyArrayInvertedLists();
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
const uint8_t * get_all_codes() const;
const idx_t * get_all_ids() const;
const std::vector<size_t>& get_list_length() const;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
bool is_readonly() const override;
bool is_valid();
};
/*****************************************************************
* Meta-inverted lists
*
* About terminology: the inverted lists are seen as a sparse matrix,
* that can be stacked horizontally, vertically and sliced.
*****************************************************************/
struct ReadOnlyInvertedLists: InvertedLists {
ReadOnlyInvertedLists (size_t nlist, size_t code_size):
InvertedLists (nlist, code_size) {}
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
};
/// Horizontal stack of inverted lists
struct HStackInvertedLists: ReadOnlyInvertedLists {
std::vector<const InvertedLists *>ils;
/// build InvertedLists by concatenating nil of them
HStackInvertedLists (int nil, const InvertedLists **ils);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
};
using ConcatenatedInvertedLists = HStackInvertedLists;
/// vertical slice of indexes in another InvertedLists
struct SliceInvertedLists: ReadOnlyInvertedLists {
const InvertedLists *il;
idx_t i0, i1;
SliceInvertedLists(const InvertedLists *il, idx_t i0, idx_t i1);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
};
struct VStackInvertedLists: ReadOnlyInvertedLists {
std::vector<const InvertedLists *>ils;
std::vector<idx_t> cumsz;
/// build InvertedLists by concatenating nil of them
VStackInvertedLists (int nil, const InvertedLists **ils);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
};
/** use the first inverted lists if they are non-empty otherwise use the second
*
* This is useful if il1 has a few inverted lists that are too long,
* and that il0 has replacement lists for those, with empty lists for
* the others. */
struct MaskedInvertedLists: ReadOnlyInvertedLists {
const InvertedLists *il0;
const InvertedLists *il1;
MaskedInvertedLists (const InvertedLists *il0,
const InvertedLists *il1);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
};
} // namespace faiss
#endif

21
core/src/index/thirdparty/faiss/LICENSE vendored Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) Facebook, Inc. and its affiliates.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

113
core/src/index/thirdparty/faiss/Makefile vendored Normal file
View File

@ -0,0 +1,113 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-include makefile.inc
HEADERS = $(wildcard *.h impl/*.h utils/*.h)
SRC = $(wildcard *.cpp impl/*.cpp utils/*.cpp)
OBJ = $(SRC:.cpp=.o)
INSTALLDIRS = $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss
GPU_HEADERS = $(wildcard gpu/*.h gpu/impl/*.h gpu/utils/*.h)
GPU_CPPSRC = $(wildcard gpu/*.cpp gpu/impl/*.cpp gpu/utils/*.cpp)
GPU_CUSRC = $(wildcard gpu/*.cu gpu/impl/*.cu gpu/utils/*.cu \
gpu/utils/nvidia/*.cu gpu/utils/blockselect/*.cu gpu/utils/warpselect/*.cu)
GPU_SRC = $(GPU_CPPSRC) $(GPU_CUSRC)
GPU_CPPOBJ = $(GPU_CPPSRC:.cpp=.o)
GPU_CUOBJ = $(GPU_CUSRC:.cu=.o)
GPU_OBJ = $(GPU_CPPOBJ) $(GPU_CUOBJ)
ifneq ($(strip $(NVCC)),)
OBJ += $(GPU_OBJ)
HEADERS += $(GPU_HEADERS)
endif
CPPFLAGS += -I.
NVCCFLAGS += -I.
############################
# Building
all: libfaiss.a libfaiss.$(SHAREDEXT)
libfaiss.a: $(OBJ)
$(AR) r $@ $^
libfaiss.$(SHAREDEXT): $(OBJ)
$(CXX) $(SHAREDFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
%.o: %.cpp
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c $< -o $@
%.o: %.cu
$(NVCC) $(NVCCFLAGS) -c $< -o $@
clean:
rm -f libfaiss.a libfaiss.$(SHAREDEXT)
rm -f $(OBJ)
############################
# Installing
install: libfaiss.a libfaiss.$(SHAREDEXT) installdirs
cp libfaiss.a libfaiss.$(SHAREDEXT) $(DESTDIR)$(libdir)
tar cf - $(HEADERS) | tar xf - -C $(DESTDIR)$(includedir)/faiss/
installdirs:
$(MKDIR_P) $(INSTALLDIRS)
uninstall:
rm -f $(DESTDIR)$(libdir)/libfaiss.a \
$(DESTDIR)$(libdir)/libfaiss.$(SHAREDEXT)
rm -rf $(DESTDIR)$(includedir)/faiss
#############################
# Dependencies
-include depend
depend: $(SRC) $(GPU_SRC)
for i in $^; do \
$(CXXCPP) $(CPPFLAGS) -DCUDA_VERSION=7050 -x c++ -MM $$i; \
done > depend
#############################
# Python
py: libfaiss.a
$(MAKE) -C python
#############################
# Tests
test: libfaiss.a py
$(MAKE) -C tests run
PYTHONPATH=./python/build/`ls python/build | grep lib` \
$(PYTHON) -m unittest discover tests/ -v
test_gpu: libfaiss.a
$(MAKE) -C gpu/test run
PYTHONPATH=./python/build/`ls python/build | grep lib` \
$(PYTHON) -m unittest discover gpu/test/ -v
#############################
# Demos
demos: libfaiss.a
$(MAKE) -C demos
#############################
# Misc
misc/test_blas: misc/test_blas.cpp
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
.PHONY: all clean demos install installdirs py test test_gpu uninstall

View File

@ -0,0 +1,252 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/MatrixStats.h>
#include <stdarg.h> /* va_list, va_start, va_arg, va_end */
#include <cmath>
#include <cstdio>
#include <faiss/utils/utils.h>
namespace faiss {
/*********************************************************************
* MatrixStats
*********************************************************************/
MatrixStats::PerDimStats::PerDimStats():
n(0), n_nan(0), n_inf(0), n0(0),
min(HUGE_VALF), max(-HUGE_VALF),
sum(0), sum2(0),
mean(NAN), stddev(NAN)
{}
void MatrixStats::PerDimStats::add (float x)
{
n++;
if (std::isnan(x)) {
n_nan++;
return;
}
if (!std::isfinite(x)) {
n_inf++;
return;
}
if (x == 0) n0++;
if (x < min) min = x;
if (x > max) max = x;
sum += x;
sum2 += (double)x * (double)x;
}
void MatrixStats::PerDimStats::compute_mean_std ()
{
n_valid = n - n_nan - n_inf;
mean = sum / n_valid;
double var = sum2 / n_valid - mean * mean;
if (var < 0) var = 0;
stddev = sqrt(var);
}
void MatrixStats::do_comment (const char *fmt, ...)
{
va_list ap;
/* Determine required size */
va_start(ap, fmt);
size_t size = vsnprintf(buf, nbuf, fmt, ap);
va_end(ap);
nbuf -= size;
buf += size;
}
MatrixStats::MatrixStats (size_t n, size_t d, const float *x):
n(n), d(d),
n_collision(0), n_valid(0), n0(0),
min_norm2(HUGE_VAL), max_norm2(0)
{
std::vector<char> comment_buf (10000);
buf = comment_buf.data ();
nbuf = comment_buf.size();
do_comment ("analyzing %ld vectors of size %ld\n", n, d);
if (d > 1024) {
do_comment (
"indexing this many dimensions is hard, "
"please consider dimensionality reducution (with PCAMatrix)\n");
}
size_t nbytes = sizeof (x[0]) * d;
per_dim_stats.resize (d);
for (size_t i = 0; i < n; i++) {
const float *xi = x + d * i;
double sum2 = 0;
for (size_t j = 0; j < d; j++) {
per_dim_stats[j].add (xi[j]);
sum2 += xi[j] * (double)xi[j];
}
if (std::isfinite (sum2)) {
n_valid++;
if (sum2 == 0) {
n0 ++;
} else {
if (sum2 < min_norm2) min_norm2 = sum2;
if (sum2 > max_norm2) max_norm2 = sum2;
}
}
{ // check hash
uint64_t hash = hash_bytes((const uint8_t*)xi, nbytes);
auto elt = occurrences.find (hash);
if (elt == occurrences.end()) {
Occurrence occ = {i, 1};
occurrences[hash] = occ;
} else {
if (!memcmp (xi, x + elt->second.first * d, nbytes)) {
elt->second.count ++;
} else {
n_collision ++;
// we should use a list of collisions but overkill
}
}
}
}
// invalid vecor stats
if (n_valid == n) {
do_comment ("no NaN or Infs in data\n");
} else {
do_comment ("%ld vectors contain NaN or Inf "
"(or have too large components), "
"expect bad results with indexing!\n", n - n_valid);
}
// copies in dataset
if (occurrences.size() == n) {
do_comment ("all vectors are distinct\n");
} else {
do_comment ("%ld vectors are distinct (%.2f%%)\n",
occurrences.size(),
occurrences.size() * 100.0 / n);
if (n_collision > 0) {
do_comment ("%ld collisions in hash table, "
"counts may be invalid\n", n_collision);
}
Occurrence max = {0, 0};
for (auto it = occurrences.begin();
it != occurrences.end(); ++it) {
if (it->second.count > max.count) {
max = it->second;
}
}
do_comment ("vector %ld has %ld copies\n", max.first, max.count);
}
{ // norm stats
min_norm2 = sqrt (min_norm2);
max_norm2 = sqrt (max_norm2);
do_comment ("range of L2 norms=[%g, %g] (%ld null vectors)\n",
min_norm2, max_norm2, n0);
if (max_norm2 < min_norm2 * 1.0001) {
do_comment ("vectors are normalized, inner product and "
"L2 search are equivalent\n");
}
if (max_norm2 > min_norm2 * 100) {
do_comment ("vectors have very large differences in norms, "
"is this normal?\n");
}
}
{ // per dimension stats
double max_std = 0, min_std = HUGE_VAL;
size_t n_dangerous_range = 0, n_0_range = 0, n0 = 0;
for (size_t j = 0; j < d; j++) {
PerDimStats &st = per_dim_stats[j];
st.compute_mean_std ();
n0 += st.n0;
if (st.max == st.min) {
n_0_range ++;
} else if (st.max < 1.001 * st.min) {
n_dangerous_range ++;
}
if (st.stddev > max_std) max_std = st.stddev;
if (st.stddev < min_std) min_std = st.stddev;
}
if (n0 == 0) {
do_comment ("matrix contains no 0s\n");
} else {
do_comment ("matrix contains %.2f %% 0 entries\n",
n0 * 100.0 / (n * d));
}
if (n_0_range == 0) {
do_comment ("no constant dimensions\n");
} else {
do_comment ("%ld dimensions are constant: they can be removed\n",
n_0_range);
}
if (n_dangerous_range == 0) {
do_comment ("no dimension has a too large mean\n");
} else {
do_comment ("%ld dimensions are too large "
"wrt. their variance, may loose precision "
"in IndexFlatL2 (use CenteringTransform)\n",
n_dangerous_range);
}
do_comment ("stddevs per dimension are in [%g %g]\n", min_std, max_std);
size_t n_small_var = 0;
for (size_t j = 0; j < d; j++) {
const PerDimStats &st = per_dim_stats[j];
if (st.stddev < max_std * 1e-4) {
n_small_var++;
}
}
if (n_small_var > 0) {
do_comment ("%ld dimensions have negligible stddev wrt. "
"the largest dimension, they could be ignored",
n_small_var);
}
}
comments = comment_buf.data ();
buf = nullptr;
nbuf = 0;
}
} // namespace faiss

View File

@ -0,0 +1,62 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include <string>
#include <unordered_map>
#include <stdint.h>
namespace faiss {
/** Reports some statistics on a dataset and comments on them.
*
* It is a class rather than a function so that all stats can also be
* accessed from code */
struct MatrixStats {
MatrixStats (size_t n, size_t d, const float *x);
std::string comments;
// raw statistics
size_t n, d;
size_t n_collision, n_valid, n0;
double min_norm2, max_norm2;
struct PerDimStats {
size_t n, n_nan, n_inf, n0;
float min, max;
double sum, sum2;
size_t n_valid;
double mean, stddev;
PerDimStats();
void add (float x);
void compute_mean_std ();
};
std::vector<PerDimStats> per_dim_stats;
struct Occurrence {
size_t first;
size_t count;
};
std::unordered_map<uint64_t, Occurrence> occurrences;
char *buf;
size_t nbuf;
void do_comment (const char *fmt, ...);
};
} // namespace faiss

View File

@ -0,0 +1,351 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/MetaIndexes.h>
#include <cstdio>
#include <stdint.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/Heap.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/utils/WorkerThread.h>
namespace faiss {
namespace {
typedef Index::idx_t idx_t;
} // namespace
/*****************************************************
* IndexIDMap implementation
*******************************************************/
template <typename IndexT>
IndexIDMapTemplate<IndexT>::IndexIDMapTemplate (IndexT *index):
index (index),
own_fields (false)
{
FAISS_THROW_IF_NOT_MSG (index->ntotal == 0, "index must be empty on input");
this->is_trained = index->is_trained;
this->metric_type = index->metric_type;
this->verbose = index->verbose;
this->d = index->d;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::add
(idx_t, const typename IndexT::component_t *)
{
FAISS_THROW_MSG ("add does not make sense with IndexIDMap, "
"use add_with_ids");
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::train
(idx_t n, const typename IndexT::component_t *x)
{
index->train (n, x);
this->is_trained = index->is_trained;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::reset ()
{
index->reset ();
id_map.clear();
this->ntotal = 0;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::add_with_ids
(idx_t n, const typename IndexT::component_t * x,
const typename IndexT::idx_t *xids)
{
index->add (n, x);
for (idx_t i = 0; i < n; i++)
id_map.push_back (xids[i]);
this->ntotal = index->ntotal;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::search
(idx_t n, const typename IndexT::component_t *x, idx_t k,
typename IndexT::distance_t *distances, typename IndexT::idx_t *labels) const
{
index->search (n, x, k, distances, labels);
idx_t *li = labels;
#pragma omp parallel for
for (idx_t i = 0; i < n * k; i++) {
li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
}
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::range_search
(typename IndexT::idx_t n, const typename IndexT::component_t *x,
typename IndexT::distance_t radius, RangeSearchResult *result) const
{
index->range_search(n, x, radius, result);
#pragma omp parallel for
for (idx_t i = 0; i < result->lims[result->nq]; i++) {
result->labels[i] = result->labels[i] < 0 ?
result->labels[i] : id_map[result->labels[i]];
}
}
namespace {
struct IDTranslatedSelector: IDSelector {
const std::vector <int64_t> & id_map;
const IDSelector & sel;
IDTranslatedSelector (const std::vector <int64_t> & id_map,
const IDSelector & sel):
id_map (id_map), sel (sel)
{}
bool is_member(idx_t id) const override {
return sel.is_member(id_map[id]);
}
};
}
template <typename IndexT>
size_t IndexIDMapTemplate<IndexT>::remove_ids (const IDSelector & sel)
{
// remove in sub-index first
IDTranslatedSelector sel2 (id_map, sel);
size_t nremove = index->remove_ids (sel2);
int64_t j = 0;
for (idx_t i = 0; i < this->ntotal; i++) {
if (sel.is_member (id_map[i])) {
// remove
} else {
id_map[j] = id_map[i];
j++;
}
}
FAISS_ASSERT (j == index->ntotal);
this->ntotal = j;
id_map.resize(this->ntotal);
return nremove;
}
template <typename IndexT>
IndexIDMapTemplate<IndexT>::~IndexIDMapTemplate ()
{
if (own_fields) delete index;
}
/*****************************************************
* IndexIDMap2 implementation
*******************************************************/
template <typename IndexT>
IndexIDMap2Template<IndexT>::IndexIDMap2Template (IndexT *index):
IndexIDMapTemplate<IndexT> (index)
{}
template <typename IndexT>
void IndexIDMap2Template<IndexT>::add_with_ids
(idx_t n, const typename IndexT::component_t* x,
const typename IndexT::idx_t* xids)
{
size_t prev_ntotal = this->ntotal;
IndexIDMapTemplate<IndexT>::add_with_ids (n, x, xids);
for (size_t i = prev_ntotal; i < this->ntotal; i++) {
rev_map [this->id_map [i]] = i;
}
}
template <typename IndexT>
void IndexIDMap2Template<IndexT>::construct_rev_map ()
{
rev_map.clear ();
for (size_t i = 0; i < this->ntotal; i++) {
rev_map [this->id_map [i]] = i;
}
}
template <typename IndexT>
size_t IndexIDMap2Template<IndexT>::remove_ids(const IDSelector& sel)
{
// This is quite inefficient
size_t nremove = IndexIDMapTemplate<IndexT>::remove_ids (sel);
construct_rev_map ();
return nremove;
}
template <typename IndexT>
void IndexIDMap2Template<IndexT>::reconstruct
(idx_t key, typename IndexT::component_t * recons) const
{
try {
this->index->reconstruct (rev_map.at (key), recons);
} catch (const std::out_of_range& e) {
FAISS_THROW_FMT ("key %ld not found", key);
}
}
// explicit template instantiations
template struct IndexIDMapTemplate<Index>;
template struct IndexIDMapTemplate<IndexBinary>;
template struct IndexIDMap2Template<Index>;
template struct IndexIDMap2Template<IndexBinary>;
/*****************************************************
* IndexSplitVectors implementation
*******************************************************/
IndexSplitVectors::IndexSplitVectors (idx_t d, bool threaded):
Index (d), own_fields (false),
threaded (threaded), sum_d (0)
{
}
void IndexSplitVectors::add_sub_index (Index *index)
{
sub_indexes.push_back (index);
sync_with_sub_indexes ();
}
void IndexSplitVectors::sync_with_sub_indexes ()
{
if (sub_indexes.empty()) return;
Index * index0 = sub_indexes[0];
sum_d = index0->d;
metric_type = index0->metric_type;
is_trained = index0->is_trained;
ntotal = index0->ntotal;
for (int i = 1; i < sub_indexes.size(); i++) {
Index * index = sub_indexes[i];
FAISS_THROW_IF_NOT (metric_type == index->metric_type);
FAISS_THROW_IF_NOT (ntotal == index->ntotal);
sum_d += index->d;
}
}
void IndexSplitVectors::add(idx_t /*n*/, const float* /*x*/) {
FAISS_THROW_MSG("not implemented");
}
void IndexSplitVectors::search (
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const
{
FAISS_THROW_IF_NOT_MSG (k == 1,
"search implemented only for k=1");
FAISS_THROW_IF_NOT_MSG (sum_d == d,
"not enough indexes compared to # dimensions");
int64_t nshard = sub_indexes.size();
float *all_distances = new float [nshard * k * n];
idx_t *all_labels = new idx_t [nshard * k * n];
ScopeDeleter<float> del (all_distances);
ScopeDeleter<idx_t> del2 (all_labels);
auto query_func = [n, x, k, distances, labels, all_distances, all_labels, this]
(int no) {
const IndexSplitVectors *index = this;
float *distances1 = no == 0 ? distances : all_distances + no * k * n;
idx_t *labels1 = no == 0 ? labels : all_labels + no * k * n;
if (index->verbose)
printf ("begin query shard %d on %ld points\n", no, n);
const Index * sub_index = index->sub_indexes[no];
int64_t sub_d = sub_index->d, d = index->d;
idx_t ofs = 0;
for (int i = 0; i < no; i++) ofs += index->sub_indexes[i]->d;
float *sub_x = new float [sub_d * n];
ScopeDeleter<float> del1 (sub_x);
for (idx_t i = 0; i < n; i++)
memcpy (sub_x + i * sub_d, x + ofs + i * d, sub_d * sizeof (sub_x));
sub_index->search (n, sub_x, k, distances1, labels1);
if (index->verbose)
printf ("end query shard %d\n", no);
};
if (!threaded) {
for (int i = 0; i < nshard; i++) {
query_func(i);
}
} else {
std::vector<std::unique_ptr<WorkerThread> > threads;
std::vector<std::future<bool>> v;
for (int i = 0; i < nshard; i++) {
threads.emplace_back(new WorkerThread());
WorkerThread *wt = threads.back().get();
v.emplace_back(wt->add([i, query_func](){query_func(i); }));
}
// Blocking wait for completion
for (auto& func : v) {
func.get();
}
}
int64_t factor = 1;
for (int i = 0; i < nshard; i++) {
if (i > 0) { // results of 0 are already in the table
const float *distances_i = all_distances + i * k * n;
const idx_t *labels_i = all_labels + i * k * n;
for (int64_t j = 0; j < n; j++) {
if (labels[j] >= 0 && labels_i[j] >= 0) {
labels[j] += labels_i[j] * factor;
distances[j] += distances_i[j];
} else {
labels[j] = -1;
distances[j] = 0.0 / 0.0;
}
}
}
factor *= sub_indexes[i]->ntotal;
}
}
void IndexSplitVectors::train(idx_t /*n*/, const float* /*x*/) {
FAISS_THROW_MSG("not implemented");
}
void IndexSplitVectors::reset ()
{
FAISS_THROW_MSG ("not implemented");
}
IndexSplitVectors::~IndexSplitVectors ()
{
if (own_fields) {
for (int s = 0; s < sub_indexes.size(); s++)
delete sub_indexes [s];
}
}
} // namespace faiss

View File

@ -0,0 +1,126 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef META_INDEXES_H
#define META_INDEXES_H
#include <vector>
#include <unordered_map>
#include <faiss/Index.h>
#include <faiss/IndexShards.h>
#include <faiss/IndexReplicas.h>
namespace faiss {
/** Index that translates search results to ids */
template <typename IndexT>
struct IndexIDMapTemplate : IndexT {
using idx_t = typename IndexT::idx_t;
using component_t = typename IndexT::component_t;
using distance_t = typename IndexT::distance_t;
IndexT * index; ///! the sub-index
bool own_fields; ///! whether pointers are deleted in destructo
std::vector<idx_t> id_map;
explicit IndexIDMapTemplate (IndexT *index);
/// @param xids if non-null, ids to store for the vectors (size n)
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
/// this will fail. Use add_with_ids
void add(idx_t n, const component_t* x) override;
void search(
idx_t n, const component_t* x, idx_t k,
distance_t* distances,
idx_t* labels) const override;
void train(idx_t n, const component_t* x) override;
void reset() override;
/// remove ids adapted to IndexFlat
size_t remove_ids(const IDSelector& sel) override;
void range_search (idx_t n, const component_t *x, distance_t radius,
RangeSearchResult *result) const override;
~IndexIDMapTemplate () override;
IndexIDMapTemplate () {own_fields=false; index=nullptr; }
};
using IndexIDMap = IndexIDMapTemplate<Index>;
using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
/** same as IndexIDMap but also provides an efficient reconstruction
* implementation via a 2-way index */
template <typename IndexT>
struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
using idx_t = typename IndexT::idx_t;
using component_t = typename IndexT::component_t;
using distance_t = typename IndexT::distance_t;
std::unordered_map<idx_t, idx_t> rev_map;
explicit IndexIDMap2Template (IndexT *index);
/// make the rev_map from scratch
void construct_rev_map ();
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
size_t remove_ids(const IDSelector& sel) override;
void reconstruct (idx_t key, component_t * recons) const override;
~IndexIDMap2Template() override {}
IndexIDMap2Template () {}
};
using IndexIDMap2 = IndexIDMap2Template<Index>;
using IndexBinaryIDMap2 = IndexIDMap2Template<IndexBinary>;
/** splits input vectors in segments and assigns each segment to a sub-index
* used to distribute a MultiIndexQuantizer
*/
struct IndexSplitVectors: Index {
bool own_fields;
bool threaded;
std::vector<Index*> sub_indexes;
idx_t sum_d; /// sum of dimensions seen so far
explicit IndexSplitVectors (idx_t d, bool threaded = false);
void add_sub_index (Index *);
void sync_with_sub_indexes ();
void add(idx_t n, const float* x) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void train(idx_t n, const float* x) override;
void reset() override;
~IndexSplitVectors() override;
};
} // namespace faiss
#endif

View File

@ -0,0 +1,674 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/OnDiskInvertedLists.h>
#include <pthread.h>
#include <unordered_set>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/types.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/utils.h>
namespace faiss {
/**********************************************
* LockLevels
**********************************************/
struct LockLevels {
/* There n times lock1(n), one lock2 and one lock3
* Invariants:
* a single thread can hold one lock1(n) for some n
* a single thread can hold lock2, if it holds lock1(n) for some n
* a single thread can hold lock3, if it holds lock1(n) for some n
* AND lock2 AND no other thread holds lock1(m) for m != n
*/
pthread_mutex_t mutex1;
pthread_cond_t level1_cv;
pthread_cond_t level2_cv;
pthread_cond_t level3_cv;
std::unordered_set<int> level1_holders; // which level1 locks are held
int n_level2; // nb threads that wait on level2
bool level3_in_use; // a threads waits on level3
bool level2_in_use;
LockLevels() {
pthread_mutex_init(&mutex1, nullptr);
pthread_cond_init(&level1_cv, nullptr);
pthread_cond_init(&level2_cv, nullptr);
pthread_cond_init(&level3_cv, nullptr);
n_level2 = 0;
level2_in_use = false;
level3_in_use = false;
}
~LockLevels() {
pthread_cond_destroy(&level1_cv);
pthread_cond_destroy(&level2_cv);
pthread_cond_destroy(&level3_cv);
pthread_mutex_destroy(&mutex1);
}
void lock_1(int no) {
pthread_mutex_lock(&mutex1);
while (level3_in_use || level1_holders.count(no) > 0) {
pthread_cond_wait(&level1_cv, &mutex1);
}
level1_holders.insert(no);
pthread_mutex_unlock(&mutex1);
}
void unlock_1(int no) {
pthread_mutex_lock(&mutex1);
assert(level1_holders.count(no) == 1);
level1_holders.erase(no);
if (level3_in_use) { // a writer is waiting
pthread_cond_signal(&level3_cv);
} else {
pthread_cond_broadcast(&level1_cv);
}
pthread_mutex_unlock(&mutex1);
}
void lock_2() {
pthread_mutex_lock(&mutex1);
n_level2 ++;
if (level3_in_use) { // tell waiting level3 that we are blocked
pthread_cond_signal(&level3_cv);
}
while (level2_in_use) {
pthread_cond_wait(&level2_cv, &mutex1);
}
level2_in_use = true;
pthread_mutex_unlock(&mutex1);
}
void unlock_2() {
pthread_mutex_lock(&mutex1);
level2_in_use = false;
n_level2 --;
pthread_cond_signal(&level2_cv);
pthread_mutex_unlock(&mutex1);
}
void lock_3() {
pthread_mutex_lock(&mutex1);
level3_in_use = true;
// wait until there are no level1 holders anymore except the
// ones that are waiting on level2 (we are holding lock2)
while (level1_holders.size() > n_level2) {
pthread_cond_wait(&level3_cv, &mutex1);
}
// don't release the lock!
}
void unlock_3() {
level3_in_use = false;
// wake up all level1_holders
pthread_cond_broadcast(&level1_cv);
pthread_mutex_unlock(&mutex1);
}
void print () {
pthread_mutex_lock(&mutex1);
printf("State: level3_in_use=%d n_level2=%d level1_holders: [", level3_in_use, n_level2);
for (int k : level1_holders) {
printf("%d ", k);
}
printf("]\n");
pthread_mutex_unlock(&mutex1);
}
};
/**********************************************
* OngoingPrefetch
**********************************************/
struct OnDiskInvertedLists::OngoingPrefetch {
struct Thread {
pthread_t pth;
OngoingPrefetch *pf;
bool one_list () {
idx_t list_no = pf->get_next_list();
if(list_no == -1) return false;
const OnDiskInvertedLists *od = pf->od;
od->locks->lock_1 (list_no);
size_t n = od->list_size (list_no);
const Index::idx_t *idx = od->get_ids (list_no);
const uint8_t *codes = od->get_codes (list_no);
int cs = 0;
for (size_t i = 0; i < n;i++) {
cs += idx[i];
}
const idx_t *codes8 = (const idx_t*)codes;
idx_t n8 = n * od->code_size / 8;
for (size_t i = 0; i < n8;i++) {
cs += codes8[i];
}
od->locks->unlock_1(list_no);
global_cs += cs & 1;
return true;
}
};
std::vector<Thread> threads;
pthread_mutex_t list_ids_mutex;
std::vector<idx_t> list_ids;
int cur_list;
// mutex for the list of tasks
pthread_mutex_t mutex;
// pretext to avoid code below to be optimized out
static int global_cs;
const OnDiskInvertedLists *od;
explicit OngoingPrefetch (const OnDiskInvertedLists *od): od (od)
{
pthread_mutex_init (&mutex, nullptr);
pthread_mutex_init (&list_ids_mutex, nullptr);
cur_list = 0;
}
static void* prefetch_list (void * arg) {
Thread *th = static_cast<Thread*>(arg);
while (th->one_list()) ;
return nullptr;
}
idx_t get_next_list () {
idx_t list_no = -1;
pthread_mutex_lock (&list_ids_mutex);
if (cur_list >= 0 && cur_list < list_ids.size()) {
list_no = list_ids[cur_list++];
}
pthread_mutex_unlock (&list_ids_mutex);
return list_no;
}
void prefetch_lists (const idx_t *list_nos, int n) {
pthread_mutex_lock (&mutex);
pthread_mutex_lock (&list_ids_mutex);
list_ids.clear ();
pthread_mutex_unlock (&list_ids_mutex);
for (auto &th: threads) {
pthread_join (th.pth, nullptr);
}
threads.resize (0);
cur_list = 0;
int nt = std::min (n, od->prefetch_nthread);
if (nt > 0) {
// prepare tasks
for (int i = 0; i < n; i++) {
idx_t list_no = list_nos[i];
if (list_no >= 0 && od->list_size(list_no) > 0) {
list_ids.push_back (list_no);
}
}
// prepare threads
threads.resize (nt);
for (Thread &th: threads) {
th.pf = this;
pthread_create (&th.pth, nullptr, prefetch_list, &th);
}
}
pthread_mutex_unlock (&mutex);
}
~OngoingPrefetch () {
pthread_mutex_lock (&mutex);
for (auto &th: threads) {
pthread_join (th.pth, nullptr);
}
pthread_mutex_unlock (&mutex);
pthread_mutex_destroy (&mutex);
pthread_mutex_destroy (&list_ids_mutex);
}
};
int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
void OnDiskInvertedLists::prefetch_lists (const idx_t *list_nos, int n) const
{
pf->prefetch_lists (list_nos, n);
}
/**********************************************
* OnDiskInvertedLists: mmapping
**********************************************/
void OnDiskInvertedLists::do_mmap ()
{
const char *rw_flags = read_only ? "r" : "r+";
int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
FILE *f = fopen (filename.c_str(), rw_flags);
FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode %s: %s",
filename.c_str(), rw_flags, strerror(errno));
uint8_t * ptro = (uint8_t*)mmap (nullptr, totsize,
prot, MAP_SHARED, fileno (f), 0);
FAISS_THROW_IF_NOT_FMT (ptro != MAP_FAILED,
"could not mmap %s: %s",
filename.c_str(),
strerror(errno));
ptr = ptro;
fclose (f);
}
void OnDiskInvertedLists::update_totsize (size_t new_size)
{
// unmap file
if (ptr != nullptr) {
int err = munmap (ptr, totsize);
FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s",
strerror(errno));
}
if (totsize == 0) {
// must create file before truncating it
FILE *f = fopen (filename.c_str(), "w");
FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode W: %s",
filename.c_str(), strerror(errno));
fclose (f);
}
if (new_size > totsize) {
if (!slots.empty() &&
slots.back().offset + slots.back().capacity == totsize) {
slots.back().capacity += new_size - totsize;
} else {
slots.push_back (Slot(totsize, new_size - totsize));
}
} else {
assert(!"not implemented");
}
totsize = new_size;
// create file
printf ("resizing %s to %ld bytes\n", filename.c_str(), totsize);
int err = truncate (filename.c_str(), totsize);
FAISS_THROW_IF_NOT_FMT (err == 0, "truncate %s to %ld: %s",
filename.c_str(), totsize,
strerror(errno));
do_mmap ();
}
/**********************************************
* OnDiskInvertedLists
**********************************************/
#define INVALID_OFFSET (size_t)(-1)
OnDiskInvertedLists::List::List ():
size (0), capacity (0), offset (INVALID_OFFSET)
{}
OnDiskInvertedLists::Slot::Slot (size_t offset, size_t capacity):
offset (offset), capacity (capacity)
{}
OnDiskInvertedLists::Slot::Slot ():
offset (0), capacity (0)
{}
OnDiskInvertedLists::OnDiskInvertedLists (
size_t nlist, size_t code_size,
const char *filename):
InvertedLists (nlist, code_size),
filename (filename),
totsize (0),
ptr (nullptr),
read_only (false),
locks (new LockLevels ()),
pf (new OngoingPrefetch (this)),
prefetch_nthread (32)
{
lists.resize (nlist);
// slots starts empty
}
OnDiskInvertedLists::OnDiskInvertedLists ():
OnDiskInvertedLists (0, 0, "")
{
}
OnDiskInvertedLists::~OnDiskInvertedLists ()
{
delete pf;
// unmap all lists
if (ptr != nullptr) {
int err = munmap (ptr, totsize);
if (err != 0) {
fprintf(stderr, "mumap error: %s",
strerror(errno));
}
}
delete locks;
}
size_t OnDiskInvertedLists::list_size(size_t list_no) const
{
return lists[list_no].size;
}
const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
{
if (lists[list_no].offset == INVALID_OFFSET) {
return nullptr;
}
return ptr + lists[list_no].offset;
}
const Index::idx_t * OnDiskInvertedLists::get_ids (size_t list_no) const
{
if (lists[list_no].offset == INVALID_OFFSET) {
return nullptr;
}
return (const idx_t*)(ptr + lists[list_no].offset +
code_size * lists[list_no].capacity);
}
void OnDiskInvertedLists::update_entries (
size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids_in, const uint8_t *codes_in)
{
FAISS_THROW_IF_NOT (!read_only);
if (n_entry == 0) return;
const List & l = lists[list_no];
assert (n_entry + offset <= l.size);
idx_t *ids = const_cast<idx_t*>(get_ids (list_no));
memcpy (ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
uint8_t *codes = const_cast<uint8_t*>(get_codes (list_no));
memcpy (codes + offset * code_size, codes_in, code_size * n_entry);
}
size_t OnDiskInvertedLists::add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code)
{
FAISS_THROW_IF_NOT (!read_only);
locks->lock_1 (list_no);
size_t o = list_size (list_no);
resize_locked (list_no, n_entry + o);
update_entries (list_no, o, n_entry, ids, code);
locks->unlock_1 (list_no);
return o;
}
void OnDiskInvertedLists::resize (size_t list_no, size_t new_size)
{
FAISS_THROW_IF_NOT (!read_only);
locks->lock_1 (list_no);
resize_locked (list_no, new_size);
locks->unlock_1 (list_no);
}
void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
{
List & l = lists[list_no];
if (new_size <= l.capacity &&
new_size > l.capacity / 2) {
l.size = new_size;
return;
}
// otherwise we release the current slot, and find a new one
locks->lock_2 ();
free_slot (l.offset, l.capacity);
List new_l;
if (new_size == 0) {
new_l = List();
} else {
new_l.size = new_size;
new_l.capacity = 1;
while (new_l.capacity < new_size) {
new_l.capacity *= 2;
}
new_l.offset = allocate_slot (
new_l.capacity * (sizeof(idx_t) + code_size));
}
// copy common data
if (l.offset != new_l.offset) {
size_t n = std::min (new_size, l.size);
if (n > 0) {
memcpy (ptr + new_l.offset, get_codes(list_no), n * code_size);
memcpy (ptr + new_l.offset + new_l.capacity * code_size,
get_ids (list_no), n * sizeof(idx_t));
}
}
lists[list_no] = new_l;
locks->unlock_2 ();
}
size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
// should hold lock2
auto it = slots.begin();
while (it != slots.end() && it->capacity < capacity) {
it++;
}
if (it == slots.end()) {
// not enough capacity
size_t new_size = totsize == 0 ? 32 : totsize * 2;
while (new_size - totsize < capacity)
new_size *= 2;
locks->lock_3 ();
update_totsize(new_size);
locks->unlock_3 ();
it = slots.begin();
while (it != slots.end() && it->capacity < capacity) {
it++;
}
assert (it != slots.end());
}
size_t o = it->offset;
if (it->capacity == capacity) {
slots.erase (it);
} else {
// take from beginning of slot
it->capacity -= capacity;
it->offset += capacity;
}
return o;
}
void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
// should hold lock2
if (capacity == 0) return;
auto it = slots.begin();
while (it != slots.end() && it->offset <= offset) {
it++;
}
size_t inf = 1UL << 60;
size_t end_prev = inf;
if (it != slots.begin()) {
auto prev = it;
prev--;
end_prev = prev->offset + prev->capacity;
}
size_t begin_next = 1L << 60;
if (it != slots.end()) {
begin_next = it->offset;
}
assert (end_prev == inf || offset >= end_prev);
assert (offset + capacity <= begin_next);
if (offset == end_prev) {
auto prev = it;
prev--;
if (offset + capacity == begin_next) {
prev->capacity += capacity + it->capacity;
slots.erase (it);
} else {
prev->capacity += capacity;
}
} else {
if (offset + capacity == begin_next) {
it->offset -= capacity;
it->capacity += capacity;
} else {
slots.insert (it, Slot (offset, capacity));
}
}
// TODO shrink global storage if needed
}
/*****************************************
* Compact form
*****************************************/
size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
bool verbose)
{
FAISS_THROW_IF_NOT_MSG (totsize == 0, "works only on an empty InvertedLists");
std::vector<size_t> sizes (nlist);
for (int i = 0; i < n_il; i++) {
const InvertedLists *il = ils[i];
FAISS_THROW_IF_NOT (il->nlist == nlist && il->code_size == code_size);
for (size_t j = 0; j < nlist; j++) {
sizes [j] += il->list_size(j);
}
}
size_t cums = 0;
size_t ntotal = 0;
for (size_t j = 0; j < nlist; j++) {
ntotal += sizes[j];
lists[j].size = 0;
lists[j].capacity = sizes[j];
lists[j].offset = cums;
cums += lists[j].capacity * (sizeof(idx_t) + code_size);
}
update_totsize (cums);
size_t nmerged = 0;
double t0 = getmillisecs(), last_t = t0;
#pragma omp parallel for
for (size_t j = 0; j < nlist; j++) {
List & l = lists[j];
for (int i = 0; i < n_il; i++) {
const InvertedLists *il = ils[i];
size_t n_entry = il->list_size(j);
l.size += n_entry;
update_entries (j, l.size - n_entry, n_entry,
ScopedIds(il, j).get(),
ScopedCodes(il, j).get());
}
assert (l.size == l.capacity);
if (verbose) {
#pragma omp critical
{
nmerged++;
double t1 = getmillisecs();
if (t1 - last_t > 500) {
printf("merged %ld lists in %.3f s\r",
nmerged, (t1 - t0) / 1000.0);
fflush(stdout);
last_t = t1;
}
}
}
}
if(verbose) {
printf("\n");
}
return ntotal;
}
void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
{
FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
std::vector<List> new_lists (l1 - l0);
memcpy (new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
lists.swap(new_lists);
nlist = l1 - l0;
}
} // namespace faiss

View File

@ -0,0 +1,127 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_ON_DISK_INVERTED_LISTS_H
#define FAISS_ON_DISK_INVERTED_LISTS_H
#include <vector>
#include <list>
#include <faiss/IndexIVF.h>
namespace faiss {
struct LockLevels;
/** On-disk storage of inverted lists.
*
* The data is stored in a mmapped chunk of memory (base ptointer ptr,
* size totsize). Each list is a range of memory that contains (object
* List) that contains:
*
* - uint8_t codes[capacity * code_size]
* - followed by idx_t ids[capacity]
*
* in each of the arrays, the size <= capacity first elements are
* used, the rest is not initialized.
*
* Addition and resize are supported by:
* - roundind up the capacity of the lists to a power of two
* - maintaining a list of empty slots, sorted by size.
* - resizing the mmapped block is adjusted as needed.
*
* An OnDiskInvertedLists is compact if the size == capacity for all
* lists and there are no available slots.
*
* Addition to the invlists is slow. For incremental add it is better
* to use a default ArrayInvertedLists object and convert it to an
* OnDisk with merge_from.
*
* When it is known that a set of lists will be accessed, it is useful
* to call prefetch_lists, that launches a set of threads to read the
* lists in parallel.
*/
struct OnDiskInvertedLists: InvertedLists {
struct List {
size_t size; // size of inverted list (entries)
size_t capacity; // allocated size (entries)
size_t offset; // offset in buffer (bytes)
List ();
};
// size nlist
std::vector<List> lists;
struct Slot {
size_t offset; // bytes
size_t capacity; // bytes
Slot (size_t offset, size_t capacity);
Slot ();
};
// size whatever space remains
std::list<Slot> slots;
std::string filename;
size_t totsize;
uint8_t *ptr; // mmap base pointer
bool read_only; /// are inverted lists mapped read-only
OnDiskInvertedLists (size_t nlist, size_t code_size,
const char *filename);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
// copy all inverted lists into *this, in compact form (without
// allocating slots)
size_t merge_from (const InvertedLists **ils, int n_il, bool verbose=false);
/// restrict the inverted lists to l0:l1 without touching the mmapped region
void crop_invlists(size_t l0, size_t l1);
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
virtual ~OnDiskInvertedLists ();
// private
LockLevels * locks;
// encapsulates the threads that are busy prefeteching
struct OngoingPrefetch;
OngoingPrefetch *pf;
int prefetch_nthread;
void do_mmap ();
void update_totsize (size_t new_totsize);
void resize_locked (size_t list_no, size_t new_size);
size_t allocate_slot (size_t capacity);
void free_slot (size_t offset, size_t capacity);
// empty constructor for the I/O functions
OnDiskInvertedLists ();
};
} // namespace faiss
#endif

View File

@ -0,0 +1,87 @@
# Faiss
Faiss is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning. Faiss is written in C++ with complete wrappers for Python/numpy. Some of the most useful algorithms are implemented on the GPU. It is developed by [Facebook AI Research](https://research.fb.com/category/facebook-ai-research-fair/).
## NEWS
*NEW: version 1.5.3 (2019-06-24) fix performance regression in IndexIVF.*
*NEW: version 1.5.2 (2019-05-27) the license was relaxed to MIT from BSD+Patents. Read LICENSE for details.*
*NEW: version 1.5.0 (2018-12-19) GPU binary flat index and binary HNSW index*
*NEW: version 1.4.0 (2018-08-30) no more crashes in pure Python code*
*NEW: version 1.3.0 (2018-07-12) support for binary indexes*
*NEW: latest commit (2018-02-22) supports on-disk storage of inverted indexes, see demos/demo_ondisk_ivf.py*
*NEW: latest commit (2018-01-09) includes an implementation of the HNSW indexing method, see benchs/bench_hnsw.py*
*NEW: there is now a Facebook public discussion group for Faiss users at https://www.facebook.com/groups/faissusers/*
*NEW: on 2017-07-30, the license on Faiss was relaxed to BSD from CC-BY-NC. Read LICENSE for details.*
## Introduction
Faiss contains several methods for similarity search. It assumes that the instances are represented as vectors and are identified by an integer, and that the vectors can be compared with L2 distances or dot products. Vectors that are similar to a query vector are those that have the lowest L2 distance or the highest dot product with the query vector. It also supports cosine similarity, since this is a dot product on normalized vectors.
Most of the methods, like those based on binary vectors and compact quantization codes, solely use a compressed representation of the vectors and do not require to keep the original vectors. This generally comes at the cost of a less precise search but these methods can scale to billions of vectors in main memory on a single server.
The GPU implementation can accept input from either CPU or GPU memory. On a server with GPUs, the GPU indexes can be used a drop-in replacement for the CPU indexes (e.g., replace `IndexFlatL2` with `GpuIndexFlatL2`) and copies to/from GPU memory are handled automatically. Results will be faster however if both input and output remain resident on the GPU. Both single and multi-GPU usage is supported.
## Building
The library is mostly implemented in C++, with optional GPU support provided via CUDA, and an optional Python interface. The CPU version requires a BLAS library. It compiles with a Makefile and can be packaged in a docker image. See [INSTALL.md](INSTALL.md) for details.
## How Faiss works
Faiss is built around an index type that stores a set of vectors, and provides a function to search in them with L2 and/or dot product vector comparison. Some index types are simple baselines, such as exact search. Most of the available indexing structures correspond to various trade-offs with respect to
- search time
- search quality
- memory used per index vector
- training time
- need for external data for unsupervised training
The optional GPU implementation provides what is likely (as of March 2017) the fastest exact and approximate (compressed-domain) nearest neighbor search implementation for high-dimensional vectors, fastest Lloyd's k-means, and fastest small k-selection algorithm known. [The implementation is detailed here](https://arxiv.org/abs/1702.08734).
## Full documentation of Faiss
The following are entry points for documentation:
- the full documentation, including a [tutorial](https://github.com/facebookresearch/faiss/wiki/Getting-started), a [FAQ](https://github.com/facebookresearch/faiss/wiki/FAQ) and a [troubleshooting section](https://github.com/facebookresearch/faiss/wiki/Troubleshooting) can be found on the [wiki page](http://github.com/facebookresearch/faiss/wiki)
- the [doxygen documentation](http://rawgithub.com/facebookresearch/faiss/master/docs/html/annotated.html) gives per-class information
- to reproduce results from our research papers, [Polysemous codes](https://arxiv.org/abs/1609.01882) and [Billion-scale similarity search with GPUs](https://arxiv.org/abs/1702.08734), refer to the [benchmarks README](benchs/README.md). For [
Link and code: Fast indexing with graphs and compact regression codes](https://arxiv.org/abs/1804.09996), see the [link_and_code README](benchs/link_and_code)
## Authors
The main authors of Faiss are:
- [Hervé Jégou](https://github.com/jegou) initiated the Faiss project and wrote its first implementation
- [Matthijs Douze](https://github.com/mdouze) implemented most of the CPU Faiss
- [Jeff Johnson](https://github.com/wickedfoo) implemented all of the GPU Faiss
- [Lucas Hosseini](https://github.com/beauby) implemented the binary indexes
## Reference
Reference to cite when you use Faiss in a research paper:
```
@article{JDH17,
title={Billion-scale similarity search with GPUs},
author={Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
journal={arXiv preprint arXiv:1702.08734},
year={2017}
}
```
## Join the Faiss community
For public discussion of Faiss or for questions, there is a Facebook public discussion group at https://www.facebook.com/groups/faissusers/
We monitor the [issues page](http://github.com/facebookresearch/faiss/issues) of the repository. You can report bugs, ask questions, etc.
## License
Faiss is MIT-licensed.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,322 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_VECTOR_TRANSFORM_H
#define FAISS_VECTOR_TRANSFORM_H
/** Defines a few objects that apply transformations to a set of
* vectors Often these are pre-processing steps.
*/
#include <vector>
#include <stdint.h>
#include <faiss/Index.h>
namespace faiss {
/** Any transformation applied on a set of vectors */
struct VectorTransform {
typedef Index::idx_t idx_t;
int d_in; ///! input dimension
int d_out; ///! output dimension
explicit VectorTransform (int d_in = 0, int d_out = 0):
d_in(d_in), d_out(d_out), is_trained(true)
{}
/// set if the VectorTransform does not require training, or if
/// training is done already
bool is_trained;
/** Perform training on a representative set of vectors. Does
* nothing by default.
*
* @param n nb of training vectors
* @param x training vecors, size n * d
*/
virtual void train (idx_t n, const float *x);
/** apply the random roation, return new allocated matrix
* @param x size n * d_in
* @return size n * d_out
*/
float *apply (idx_t n, const float * x) const;
/// same as apply, but result is pre-allocated
virtual void apply_noalloc (idx_t n, const float * x,
float *xt) const = 0;
/// reverse transformation. May not be implemented or may return
/// approximate result
virtual void reverse_transform (idx_t n, const float * xt,
float *x) const;
virtual ~VectorTransform () {}
};
/** Generic linear transformation, with bias term applied on output
* y = A * x + b
*/
struct LinearTransform: VectorTransform {
bool have_bias; ///! whether to use the bias term
/// check if matrix A is orthonormal (enables reverse_transform)
bool is_orthonormal;
/// Transformation matrix, size d_out * d_in
std::vector<float> A;
/// bias vector, size d_out
std::vector<float> b;
/// both d_in > d_out and d_out < d_in are supported
explicit LinearTransform (int d_in = 0, int d_out = 0,
bool have_bias = false);
/// same as apply, but result is pre-allocated
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// compute x = A^T * (x - b)
/// is reverse transform if A has orthonormal lines
void transform_transpose (idx_t n, const float * y,
float *x) const;
/// works only if is_orthonormal
void reverse_transform (idx_t n, const float * xt,
float *x) const override;
/// compute A^T * A to set the is_orthonormal flag
void set_is_orthonormal ();
bool verbose;
void print_if_verbose (const char*name, const std::vector<double> &mat,
int n, int d) const;
~LinearTransform() override {}
};
/// Randomly rotate a set of vectors
struct RandomRotationMatrix: LinearTransform {
/// both d_in > d_out and d_out < d_in are supported
RandomRotationMatrix (int d_in, int d_out):
LinearTransform(d_in, d_out, false) {}
/// must be called before the transform is used
void init(int seed);
// intializes with an arbitrary seed
void train(idx_t n, const float* x) override;
RandomRotationMatrix () {}
};
/** Applies a principal component analysis on a set of vectors,
* with optionally whitening and random rotation. */
struct PCAMatrix: LinearTransform {
/** after transformation the components are multiplied by
* eigenvalues^eigen_power
*
* =0: no whitening
* =-0.5: full whitening
*/
float eigen_power;
/// random rotation after PCA
bool random_rotation;
/// ratio between # training vectors and dimension
size_t max_points_per_d;
/// try to distribute output eigenvectors in this many bins
int balanced_bins;
/// Mean, size d_in
std::vector<float> mean;
/// eigenvalues of covariance matrix (= squared singular values)
std::vector<float> eigenvalues;
/// PCA matrix, size d_in * d_in
std::vector<float> PCAMat;
// the final matrix is computed after random rotation and/or whitening
explicit PCAMatrix (int d_in = 0, int d_out = 0,
float eigen_power = 0, bool random_rotation = false);
/// train on n vectors. If n < d_in then the eigenvector matrix
/// will be completed with 0s
void train(idx_t n, const float* x) override;
/// copy pre-trained PCA matrix
void copy_from (const PCAMatrix & other);
/// called after mean, PCAMat and eigenvalues are computed
void prepare_Ab();
};
/** ITQ implementation from
*
* Iterative quantization: A procrustean approach to learning binary codes
* for large-scale image retrieval,
*
* Yunchao Gong, Svetlana Lazebnik, Albert Gordo, Florent Perronnin,
* PAMI'12.
*/
struct ITQMatrix: LinearTransform {
int max_iter;
int seed;
// force initialization of the rotation (for debugging)
std::vector<double> init_rotation;
explicit ITQMatrix (int d = 0);
void train (idx_t n, const float* x) override;
};
/** The full ITQ transform, including normalizations and PCA transformation
*/
struct ITQTransform: VectorTransform {
std::vector<float> mean;
bool do_pca;
ITQMatrix itq;
/// max training points per dimension
int max_train_per_dim;
// concatenation of PCA + ITQ transformation
LinearTransform pca_then_itq;
explicit ITQTransform (int d_in = 0, int d_out = 0, bool do_pca = false);
void train (idx_t n, const float *x) override;
void apply_noalloc (idx_t n, const float* x, float* xt) const override;
};
struct ProductQuantizer;
/** Applies a rotation to align the dimensions with a PQ to minimize
* the reconstruction error. Can be used before an IndexPQ or an
* IndexIVFPQ. The method is the non-parametric version described in:
*
* "Optimized Product Quantization for Approximate Nearest Neighbor Search"
* Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
*
*/
struct OPQMatrix: LinearTransform {
int M; ///< nb of subquantizers
int niter; ///< Number of outer training iterations
int niter_pq; ///< Number of training iterations for the PQ
int niter_pq_0; ///< same, for the first outer iteration
/// if there are too many training points, resample
size_t max_train_points;
bool verbose;
/// if non-NULL, use this product quantizer for training
/// should be constructed with (d_out, M, _)
ProductQuantizer * pq;
/// if d2 != -1, output vectors of this dimension
explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
void train(idx_t n, const float* x) override;
};
/** remap dimensions for intput vectors, possibly inserting 0s
* strictly speaking this is also a linear transform but we don't want
* to compute it with matrix multiplies */
struct RemapDimensionsTransform: VectorTransform {
/// map from output dimension to input, size d_out
/// -1 -> set output to 0
std::vector<int> map;
RemapDimensionsTransform (int d_in, int d_out, const int *map);
/// remap input to output, skipping or inserting dimensions as needed
/// if uniform: distribute dimensions uniformly
/// otherwise just take the d_out first ones.
RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// reverse transform correct only when the mapping is a permutation
void reverse_transform(idx_t n, const float* xt, float* x) const override;
RemapDimensionsTransform () {}
};
/** per-vector normalization */
struct NormalizationTransform: VectorTransform {
float norm;
explicit NormalizationTransform (int d, float norm = 2.0);
NormalizationTransform ();
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// Identity transform since norm is not revertible
void reverse_transform(idx_t n, const float* xt, float* x) const override;
};
/** Subtract the mean of each component from the vectors. */
struct CenteringTransform: VectorTransform {
/// Mean, size d_in = d_out
std::vector<float> mean;
explicit CenteringTransform (int d = 0);
/// train on n vectors.
void train(idx_t n, const float* x) override;
/// subtract the mean
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// add the mean
void reverse_transform (idx_t n, const float * xt,
float *x) const override;
};
} // namespace faiss
#endif

View File

@ -0,0 +1,234 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_blas.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_BLAS([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
#
# DESCRIPTION
#
# This macro looks for a library that implements the BLAS linear-algebra
# interface (see http://www.netlib.org/blas/). On success, it sets the
# BLAS_LIBS output variable to hold the requisite library linkages.
#
# To link with BLAS, you should link with:
#
# $BLAS_LIBS $LIBS $FLIBS
#
# in that order. FLIBS is the output variable of the
# AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is
# sometimes necessary in order to link with F77 libraries. Users will also
# need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same
# reason.
#
# Many libraries are searched for, from ATLAS to CXML to ESSL. The user
# may also use --with-blas=<lib> in order to use some specific BLAS
# library <lib>. In order to link successfully, however, be aware that you
# will probably need to use the same Fortran compiler (which can be set
# via the F77 env. var.) as was used to compile the BLAS library.
#
# ACTION-IF-FOUND is a list of shell commands to run if a BLAS library is
# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is
# not found. If ACTION-IF-FOUND is not specified, the default action will
# define HAVE_BLAS.
#
# LICENSE
#
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 15
AU_ALIAS([ACX_BLAS], [AX_BLAS])
AC_DEFUN([AX_BLAS], [
AC_PREREQ(2.50)
# AC_REQUIRE([AC_F77_LIBRARY_LDFLAGS])
AC_REQUIRE([AC_CANONICAL_HOST])
ax_blas_ok=no
AC_ARG_WITH(blas,
[AS_HELP_STRING([--with-blas=<lib>], [use BLAS library <lib>])])
case $with_blas in
yes | "") ;;
no) ax_blas_ok=disable ;;
-* | */* | *.a | *.so | *.so.* | *.o) BLAS_LIBS="$with_blas" ;;
*) BLAS_LIBS="-l$with_blas" ;;
esac
OPENMP_LDFLAGS="$OPENMP_CXXFLAGS"
# Get fortran linker names of BLAS functions to check for.
# AC_F77_FUNC(sgemm)
# AC_F77_FUNC(dgemm)
sgemm=sgemm_
dgemm=dgemm_
ax_blas_save_LIBS="$LIBS"
LIBS="$LIBS $FLIBS"
# First, check BLAS_LIBS environment variable
if test $ax_blas_ok = no; then
if test "x$BLAS_LIBS" != x; then
save_LIBS="$LIBS"; LIBS="$BLAS_LIBS $LIBS"
AC_MSG_CHECKING([for $sgemm in $BLAS_LIBS])
AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes], [BLAS_LIBS=""])
AC_MSG_RESULT($ax_blas_ok)
LIBS="$save_LIBS"
fi
fi
# BLAS linked to by default? (happens on some supercomputers)
if test $ax_blas_ok = no; then
save_LIBS="$LIBS"; LIBS="$LIBS"
AC_MSG_CHECKING([if $sgemm is being linked in already])
AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes])
AC_MSG_RESULT($ax_blas_ok)
LIBS="$save_LIBS"
fi
# BLAS in Intel MKL library?
if test $ax_blas_ok = no; then
case $host_os in
darwin*)
AC_CHECK_LIB(mkl_intel_lp64, $sgemm,
[ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread"; OPENMP_LDFLAGS=""],,
[-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread])
;;
*)
if test $host_cpu = x86_64; then
AC_CHECK_LIB(mkl_intel_lp64, $sgemm,
[ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl"],,
[-lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl])
elif test $host_cpu = i686; then
AC_CHECK_LIB(mkl_intel, $sgemm,
[ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl"],,
[-lmkl_intel -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl])
fi
;;
esac
fi
# Old versions of MKL
if test $ax_blas_ok = no; then
AC_CHECK_LIB(mkl, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl -lguide -lpthread"],,[-lguide -lpthread])
fi
# BLAS in OpenBLAS library? (http://xianyi.github.com/OpenBLAS/)
if test $ax_blas_ok = no; then
AC_CHECK_LIB(openblas, $sgemm, [ax_blas_ok=yes
BLAS_LIBS="-lopenblas"])
fi
# BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)
if test $ax_blas_ok = no; then
AC_CHECK_LIB(atlas, ATL_xerbla,
[AC_CHECK_LIB(f77blas, $sgemm,
[AC_CHECK_LIB(cblas, cblas_dgemm,
[ax_blas_ok=yes
BLAS_LIBS="-lcblas -lf77blas -latlas"],
[], [-lf77blas -latlas])],
[], [-latlas])])
fi
# BLAS in PhiPACK libraries? (requires generic BLAS lib, too)
if test $ax_blas_ok = no; then
AC_CHECK_LIB(blas, $sgemm,
[AC_CHECK_LIB(dgemm, $dgemm,
[AC_CHECK_LIB(sgemm, $sgemm,
[ax_blas_ok=yes; BLAS_LIBS="-lsgemm -ldgemm -lblas"],
[], [-lblas])],
[], [-lblas])])
fi
# BLAS in Apple vecLib library?
if test $ax_blas_ok = no; then
save_LIBS="$LIBS"; LIBS="-framework vecLib $LIBS"
AC_MSG_CHECKING([for $sgemm in -framework vecLib])
AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes;BLAS_LIBS="-framework vecLib"])
AC_MSG_RESULT($ax_blas_ok)
LIBS="$save_LIBS"
fi
# BLAS in Alpha CXML library?
if test $ax_blas_ok = no; then
AC_CHECK_LIB(cxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lcxml"])
fi
# BLAS in Alpha DXML library? (now called CXML, see above)
if test $ax_blas_ok = no; then
AC_CHECK_LIB(dxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-ldxml"])
fi
# BLAS in Sun Performance library?
if test $ax_blas_ok = no; then
if test "x$GCC" != xyes; then # only works with Sun CC
AC_CHECK_LIB(sunmath, acosp,
[AC_CHECK_LIB(sunperf, $sgemm,
[BLAS_LIBS="-xlic_lib=sunperf -lsunmath"
ax_blas_ok=yes],[],[-lsunmath])])
fi
fi
# BLAS in SCSL library? (SGI/Cray Scientific Library)
if test $ax_blas_ok = no; then
AC_CHECK_LIB(scs, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lscs"])
fi
# BLAS in SGIMATH library?
if test $ax_blas_ok = no; then
AC_CHECK_LIB(complib.sgimath, $sgemm,
[ax_blas_ok=yes; BLAS_LIBS="-lcomplib.sgimath"])
fi
# BLAS in IBM ESSL library? (requires generic BLAS lib, too)
if test $ax_blas_ok = no; then
AC_CHECK_LIB(blas, $sgemm,
[AC_CHECK_LIB(essl, $sgemm,
[ax_blas_ok=yes; BLAS_LIBS="-lessl -lblas"],
[], [-lblas $FLIBS])])
fi
# Generic BLAS library?
if test $ax_blas_ok = no; then
AC_CHECK_LIB(blas, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lblas"])
fi
AC_SUBST(BLAS_LIBS)
AC_SUBST(OPENMP_LDFLAGS)
LIBS="$ax_blas_save_LIBS"
# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
if test x"$ax_blas_ok" = xyes; then
ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1])
:
else
ax_blas_ok=no
$2
fi
])dnl AX_BLAS

View File

@ -0,0 +1,26 @@
# serial 1
AC_DEFUN([AX_CPU_ARCH], [
AC_MSG_CHECKING([for cpu arch])
AC_CANONICAL_TARGET
case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-mavx2 -mf16c -msse4 -mpopcnt"
ARCH_CXXFLAGS="-m64"
;;
aarch64*-*)
dnl This is an arch for Nvidia Xavier a proper detection would be nice.
ARCH_CPUFLAGS="-march=armv8.2-a"
;;
*) ;;
esac
AC_MSG_RESULT([$target CPUFLAGS+="$ARCH_CPUFLAGS" CXXFLAGS+="$ARCH_CXXFLAGS"])
AC_SUBST(ARCH_CPUFLAGS)
AC_SUBST(ARCH_CXXFLAGS)
])dnl

View File

@ -0,0 +1,972 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional])
#
# DESCRIPTION
#
# Check for baseline language coverage in the compiler for the specified
# version of the C++ standard. If necessary, add switches to CXX and
# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard)
# or '14' (for the C++14 standard).
#
# The second argument, if specified, indicates whether you insist on an
# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
# -std=c++11). If neither is specified, you get whatever works, with
# preference for an extended mode.
#
# The third argument, if specified 'mandatory' or if left unspecified,
# indicates that baseline support for the specified C++ standard is
# required and that the macro should error out if no mode with that
# support is found. If specified 'optional', then configuration proceeds
# regardless, after defining HAVE_CXX${VERSION} if and only if a
# supporting mode is found.
#
# LICENSE
#
# Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
# Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
# Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
# Copyright (c) 2015 Paul Norman <penorman@mac.com>
# Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
# Copyright (c) 2016, 2018 Krzesimir Nowak <qdlacz@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 9
dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
dnl (serial version number 13).
AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"],
[$1], [14], [ax_cxx_compile_alternatives="14 1y"],
[$1], [17], [ax_cxx_compile_alternatives="17 1z"],
[m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
m4_if([$2], [], [],
[$2], [ext], [],
[$2], [noext], [],
[m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl
m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true],
[$3], [mandatory], [ax_cxx_compile_cxx$1_required=true],
[$3], [optional], [ax_cxx_compile_cxx$1_required=false],
[m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
AC_LANG_PUSH([C++])dnl
ac_success=no
m4_if([$2], [noext], [], [dnl
if test x$ac_success = xno; then
for alternative in ${ax_cxx_compile_alternatives}; do
switch="-std=gnu++${alternative}"
cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
$cachevar,
[ac_save_CXX="$CXX"
CXX="$CXX $switch"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
[eval $cachevar=yes],
[eval $cachevar=no])
CXX="$ac_save_CXX"])
if eval test x\$$cachevar = xyes; then
CXX="$CXX $switch"
if test -n "$CXXCPP" ; then
CXXCPP="$CXXCPP $switch"
fi
ac_success=yes
break
fi
done
fi])
m4_if([$2], [ext], [], [dnl
if test x$ac_success = xno; then
dnl HP's aCC needs +std=c++11 according to:
dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
dnl Cray's crayCC needs "-h std=c++11"
for alternative in ${ax_cxx_compile_alternatives}; do
for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
$cachevar,
[ac_save_CXX="$CXX"
CXX="$CXX $switch"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
[eval $cachevar=yes],
[eval $cachevar=no])
CXX="$ac_save_CXX"])
if eval test x\$$cachevar = xyes; then
CXX="$CXX $switch"
if test -n "$CXXCPP" ; then
CXXCPP="$CXXCPP $switch"
fi
ac_success=yes
break
fi
done
if test x$ac_success = xyes; then
break
fi
done
fi])
AC_LANG_POP([C++])
if test x$ax_cxx_compile_cxx$1_required = xtrue; then
if test x$ac_success = xno; then
AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.])
fi
fi
if test x$ac_success = xno; then
HAVE_CXX$1=0
AC_MSG_NOTICE([No compiler with C++$1 support was found])
else
HAVE_CXX$1=1
AC_DEFINE(HAVE_CXX$1,1,
[define if the compiler supports basic C++$1 syntax])
fi
AC_SUBST(HAVE_CXX$1)
])
dnl Test body for checking C++11 support
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_11
)
dnl Test body for checking C++14 support
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_11
_AX_CXX_COMPILE_STDCXX_testbody_new_in_14
)
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_11
_AX_CXX_COMPILE_STDCXX_testbody_new_in_14
_AX_CXX_COMPILE_STDCXX_testbody_new_in_17
)
dnl Tests for new features in C++11
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[
// If the compiler admits that it is not ready for C++11, why torture it?
// Hopefully, this will speed up the test.
#ifndef __cplusplus
#error "This is not a C++ compiler"
#elif __cplusplus < 201103L
#error "This is not a C++11 compiler"
#else
namespace cxx11
{
namespace test_static_assert
{
template <typename T>
struct check
{
static_assert(sizeof(int) <= sizeof(T), "not big enough");
};
}
namespace test_final_override
{
struct Base
{
virtual void f() {}
};
struct Derived : public Base
{
virtual void f() override {}
};
}
namespace test_double_right_angle_brackets
{
template < typename T >
struct check {};
typedef check<void> single_type;
typedef check<check<void>> double_type;
typedef check<check<check<void>>> triple_type;
typedef check<check<check<check<void>>>> quadruple_type;
}
namespace test_decltype
{
int
f()
{
int a = 1;
decltype(a) b = 2;
return a + b;
}
}
namespace test_type_deduction
{
template < typename T1, typename T2 >
struct is_same
{
static const bool value = false;
};
template < typename T >
struct is_same<T, T>
{
static const bool value = true;
};
template < typename T1, typename T2 >
auto
add(T1 a1, T2 a2) -> decltype(a1 + a2)
{
return a1 + a2;
}
int
test(const int c, volatile int v)
{
static_assert(is_same<int, decltype(0)>::value == true, "");
static_assert(is_same<int, decltype(c)>::value == false, "");
static_assert(is_same<int, decltype(v)>::value == false, "");
auto ac = c;
auto av = v;
auto sumi = ac + av + 'x';
auto sumf = ac + av + 1.0;
static_assert(is_same<int, decltype(ac)>::value == true, "");
static_assert(is_same<int, decltype(av)>::value == true, "");
static_assert(is_same<int, decltype(sumi)>::value == true, "");
static_assert(is_same<int, decltype(sumf)>::value == false, "");
static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
return (sumf > 0.0) ? sumi : add(c, v);
}
}
namespace test_noexcept
{
int f() { return 0; }
int g() noexcept { return 0; }
static_assert(noexcept(f()) == false, "");
static_assert(noexcept(g()) == true, "");
}
namespace test_constexpr
{
template < typename CharT >
unsigned long constexpr
strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
{
return *s ? strlen_c_r(s + 1, acc + 1) : acc;
}
template < typename CharT >
unsigned long constexpr
strlen_c(const CharT *const s) noexcept
{
return strlen_c_r(s, 0UL);
}
static_assert(strlen_c("") == 0UL, "");
static_assert(strlen_c("1") == 1UL, "");
static_assert(strlen_c("example") == 7UL, "");
static_assert(strlen_c("another\0example") == 7UL, "");
}
namespace test_rvalue_references
{
template < int N >
struct answer
{
static constexpr int value = N;
};
answer<1> f(int&) { return answer<1>(); }
answer<2> f(const int&) { return answer<2>(); }
answer<3> f(int&&) { return answer<3>(); }
void
test()
{
int i = 0;
const int c = 0;
static_assert(decltype(f(i))::value == 1, "");
static_assert(decltype(f(c))::value == 2, "");
static_assert(decltype(f(0))::value == 3, "");
}
}
namespace test_uniform_initialization
{
struct test
{
static const int zero {};
static const int one {1};
};
static_assert(test::zero == 0, "");
static_assert(test::one == 1, "");
}
namespace test_lambdas
{
void
test1()
{
auto lambda1 = [](){};
auto lambda2 = lambda1;
lambda1();
lambda2();
}
int
test2()
{
auto a = [](int i, int j){ return i + j; }(1, 2);
auto b = []() -> int { return '0'; }();
auto c = [=](){ return a + b; }();
auto d = [&](){ return c; }();
auto e = [a, &b](int x) mutable {
const auto identity = [](int y){ return y; };
for (auto i = 0; i < a; ++i)
a += b--;
return x + identity(a + b);
}(0);
return a + b + c + d + e;
}
int
test3()
{
const auto nullary = [](){ return 0; };
const auto unary = [](int x){ return x; };
using nullary_t = decltype(nullary);
using unary_t = decltype(unary);
const auto higher1st = [](nullary_t f){ return f(); };
const auto higher2nd = [unary](nullary_t f1){
return [unary, f1](unary_t f2){ return f2(unary(f1())); };
};
return higher1st(nullary) + higher2nd(nullary)(unary);
}
}
namespace test_variadic_templates
{
template <int...>
struct sum;
template <int N0, int... N1toN>
struct sum<N0, N1toN...>
{
static constexpr auto value = N0 + sum<N1toN...>::value;
};
template <>
struct sum<>
{
static constexpr auto value = 0;
};
static_assert(sum<>::value == 0, "");
static_assert(sum<1>::value == 1, "");
static_assert(sum<23>::value == 23, "");
static_assert(sum<1, 2>::value == 3, "");
static_assert(sum<5, 5, 11>::value == 21, "");
static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
}
// http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
// Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
// because of this.
namespace test_template_alias_sfinae
{
struct foo {};
template<typename T>
using member = typename T::member_type;
template<typename T>
void func(...) {}
template<typename T>
void func(member<T>*) {}
void test();
void test() { func<foo>(0); }
}
} // namespace cxx11
#endif // __cplusplus >= 201103L
]])
dnl Tests for new features in C++14
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[
// If the compiler admits that it is not ready for C++14, why torture it?
// Hopefully, this will speed up the test.
#ifndef __cplusplus
#error "This is not a C++ compiler"
#elif __cplusplus < 201402L
#error "This is not a C++14 compiler"
#else
namespace cxx14
{
namespace test_polymorphic_lambdas
{
int
test()
{
const auto lambda = [](auto&&... args){
const auto istiny = [](auto x){
return (sizeof(x) == 1UL) ? 1 : 0;
};
const int aretiny[] = { istiny(args)... };
return aretiny[0];
};
return lambda(1, 1L, 1.0f, '1');
}
}
namespace test_binary_literals
{
constexpr auto ivii = 0b0000000000101010;
static_assert(ivii == 42, "wrong value");
}
namespace test_generalized_constexpr
{
template < typename CharT >
constexpr unsigned long
strlen_c(const CharT *const s) noexcept
{
auto length = 0UL;
for (auto p = s; *p; ++p)
++length;
return length;
}
static_assert(strlen_c("") == 0UL, "");
static_assert(strlen_c("x") == 1UL, "");
static_assert(strlen_c("test") == 4UL, "");
static_assert(strlen_c("another\0test") == 7UL, "");
}
namespace test_lambda_init_capture
{
int
test()
{
auto x = 0;
const auto lambda1 = [a = x](int b){ return a + b; };
const auto lambda2 = [a = lambda1(x)](){ return a; };
return lambda2();
}
}
namespace test_digit_separators
{
constexpr auto ten_million = 100'000'000;
static_assert(ten_million == 100000000, "");
}
namespace test_return_type_deduction
{
auto f(int& x) { return x; }
decltype(auto) g(int& x) { return x; }
template < typename T1, typename T2 >
struct is_same
{
static constexpr auto value = false;
};
template < typename T >
struct is_same<T, T>
{
static constexpr auto value = true;
};
int
test()
{
auto x = 0;
static_assert(is_same<int, decltype(f(x))>::value, "");
static_assert(is_same<int&, decltype(g(x))>::value, "");
return x;
}
}
} // namespace cxx14
#endif // __cplusplus >= 201402L
]])
dnl Tests for new features in C++17
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[
// If the compiler admits that it is not ready for C++17, why torture it?
// Hopefully, this will speed up the test.
#ifndef __cplusplus
#error "This is not a C++ compiler"
#elif __cplusplus <= 201402L
#error "This is not a C++17 compiler"
#else
#if defined(__clang__)
#define REALLY_CLANG
#else
#if defined(__GNUC__)
#define REALLY_GCC
#endif
#endif
#include <initializer_list>
#include <utility>
#include <type_traits>
namespace cxx17
{
#if !defined(REALLY_CLANG)
namespace test_constexpr_lambdas
{
// TODO: test it with clang++ from git
constexpr int foo = [](){return 42;}();
}
#endif // !defined(REALLY_CLANG)
namespace test::nested_namespace::definitions
{
}
namespace test_fold_expression
{
template<typename... Args>
int multiply(Args... args)
{
return (args * ... * 1);
}
template<typename... Args>
bool all(Args... args)
{
return (args && ...);
}
}
namespace test_extended_static_assert
{
static_assert (true);
}
namespace test_auto_brace_init_list
{
auto foo = {5};
auto bar {5};
static_assert(std::is_same<std::initializer_list<int>, decltype(foo)>::value);
static_assert(std::is_same<int, decltype(bar)>::value);
}
namespace test_typename_in_template_template_parameter
{
template<template<typename> typename X> struct D;
}
namespace test_fallthrough_nodiscard_maybe_unused_attributes
{
int f1()
{
return 42;
}
[[nodiscard]] int f2()
{
[[maybe_unused]] auto unused = f1();
switch (f1())
{
case 17:
f1();
[[fallthrough]];
case 42:
f1();
}
return f1();
}
}
namespace test_extended_aggregate_initialization
{
struct base1
{
int b1, b2 = 42;
};
struct base2
{
base2() {
b3 = 42;
}
int b3;
};
struct derived : base1, base2
{
int d;
};
derived d1 {{1, 2}, {}, 4}; // full initialization
derived d2 {{}, {}, 4}; // value-initialized bases
}
namespace test_general_range_based_for_loop
{
struct iter
{
int i;
int& operator* ()
{
return i;
}
const int& operator* () const
{
return i;
}
iter& operator++()
{
++i;
return *this;
}
};
struct sentinel
{
int i;
};
bool operator== (const iter& i, const sentinel& s)
{
return i.i == s.i;
}
bool operator!= (const iter& i, const sentinel& s)
{
return !(i == s);
}
struct range
{
iter begin() const
{
return {0};
}
sentinel end() const
{
return {5};
}
};
void f()
{
range r {};
for (auto i : r)
{
[[maybe_unused]] auto v = i;
}
}
}
namespace test_lambda_capture_asterisk_this_by_value
{
struct t
{
int i;
int foo()
{
return [*this]()
{
return i;
}();
}
};
}
namespace test_enum_class_construction
{
enum class byte : unsigned char
{};
byte foo {42};
}
namespace test_constexpr_if
{
template <bool cond>
int f ()
{
if constexpr(cond)
{
return 13;
}
else
{
return 42;
}
}
}
namespace test_selection_statement_with_initializer
{
int f()
{
return 13;
}
int f2()
{
if (auto i = f(); i > 0)
{
return 3;
}
switch (auto i = f(); i + 4)
{
case 17:
return 2;
default:
return 1;
}
}
}
#if !defined(REALLY_CLANG)
namespace test_template_argument_deduction_for_class_templates
{
// TODO: test it with clang++ from git
template <typename T1, typename T2>
struct pair
{
pair (T1 p1, T2 p2)
: m1 {p1},
m2 {p2}
{}
T1 m1;
T2 m2;
};
void f()
{
[[maybe_unused]] auto p = pair{13, 42u};
}
}
#endif // !defined(REALLY_CLANG)
namespace test_non_type_auto_template_parameters
{
template <auto n>
struct B
{};
B<5> b1;
B<'a'> b2;
}
#if !defined(REALLY_CLANG)
namespace test_structured_bindings
{
// TODO: test it with clang++ from git
int arr[2] = { 1, 2 };
std::pair<int, int> pr = { 1, 2 };
auto f1() -> int(&)[2]
{
return arr;
}
auto f2() -> std::pair<int, int>&
{
return pr;
}
struct S
{
int x1 : 2;
volatile double y1;
};
S f3()
{
return {};
}
auto [ x1, y1 ] = f1();
auto& [ xr1, yr1 ] = f1();
auto [ x2, y2 ] = f2();
auto& [ xr2, yr2 ] = f2();
const auto [ x3, y3 ] = f3();
}
#endif // !defined(REALLY_CLANG)
#if !defined(REALLY_CLANG)
namespace test_exception_spec_type_system
{
// TODO: test it with clang++ from git
struct Good {};
struct Bad {};
void g1() noexcept;
void g2();
template<typename T>
Bad
f(T*, T*);
template<typename T1, typename T2>
Good
f(T1*, T2*);
static_assert (std::is_same_v<Good, decltype(f(g1, g2))>);
}
#endif // !defined(REALLY_CLANG)
namespace test_inline_variables
{
template<class T> void f(T)
{}
template<class T> inline T g(T)
{
return T{};
}
template<> inline void f<>(int)
{}
template<> int g<>(int)
{
return 5;
}
}
} // namespace cxx17
#endif // __cplusplus <= 201402L
]])

View File

@ -0,0 +1,132 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_lapack.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_LAPACK([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
#
# DESCRIPTION
#
# This macro looks for a library that implements the LAPACK linear-algebra
# interface (see http://www.netlib.org/lapack/). On success, it sets the
# LAPACK_LIBS output variable to hold the requisite library linkages.
#
# To link with LAPACK, you should link with:
#
# $LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS
#
# in that order. BLAS_LIBS is the output variable of the AX_BLAS macro,
# called automatically. FLIBS is the output variable of the
# AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is
# sometimes necessary in order to link with F77 libraries. Users will also
# need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same
# reason.
#
# The user may also use --with-lapack=<lib> in order to use some specific
# LAPACK library <lib>. In order to link successfully, however, be aware
# that you will probably need to use the same Fortran compiler (which can
# be set via the F77 env. var.) as was used to compile the LAPACK and BLAS
# libraries.
#
# ACTION-IF-FOUND is a list of shell commands to run if a LAPACK library
# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
# is not found. If ACTION-IF-FOUND is not specified, the default action
# will define HAVE_LAPACK.
#
# LICENSE
#
# Copyright (c) 2009 Steven G. Johnson <stevenj@alum.mit.edu>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 8
AU_ALIAS([ACX_LAPACK], [AX_LAPACK])
AC_DEFUN([AX_LAPACK], [
AC_REQUIRE([AX_BLAS])
ax_lapack_ok=no
AC_ARG_WITH(lapack,
[AS_HELP_STRING([--with-lapack=<lib>], [use LAPACK library <lib>])])
case $with_lapack in
yes | "") ;;
no) ax_lapack_ok=disable ;;
-* | */* | *.a | *.so | *.so.* | *.o) LAPACK_LIBS="$with_lapack" ;;
*) LAPACK_LIBS="-l$with_lapack" ;;
esac
# Get fortran linker name of LAPACK function to check for.
# AC_F77_FUNC(cheev)
cheev=cheev_
# We cannot use LAPACK if BLAS is not found
if test "x$ax_blas_ok" != xyes; then
ax_lapack_ok=noblas
LAPACK_LIBS=""
fi
# First, check LAPACK_LIBS environment variable
if test "x$LAPACK_LIBS" != x; then
save_LIBS="$LIBS"; LIBS="$LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS"
AC_MSG_CHECKING([for $cheev in $LAPACK_LIBS])
AC_TRY_LINK_FUNC($cheev, [ax_lapack_ok=yes], [LAPACK_LIBS=""])
AC_MSG_RESULT($ax_lapack_ok)
LIBS="$save_LIBS"
if test $ax_lapack_ok = no; then
LAPACK_LIBS=""
fi
fi
# LAPACK linked to by default? (is sometimes included in BLAS lib)
if test $ax_lapack_ok = no; then
save_LIBS="$LIBS"; LIBS="$LIBS $BLAS_LIBS $FLIBS"
AC_CHECK_FUNC($cheev, [ax_lapack_ok=yes])
LIBS="$save_LIBS"
fi
# Generic LAPACK library?
for lapack in lapack lapack_rs6k; do
if test $ax_lapack_ok = no; then
save_LIBS="$LIBS"; LIBS="$BLAS_LIBS $LIBS"
AC_CHECK_LIB($lapack, $cheev,
[ax_lapack_ok=yes; LAPACK_LIBS="-l$lapack"], [], [$FLIBS])
LIBS="$save_LIBS"
fi
done
AC_SUBST(LAPACK_LIBS)
# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
if test x"$ax_lapack_ok" = xyes; then
ifelse([$1],,AC_DEFINE(HAVE_LAPACK,1,[Define if you have LAPACK library.]),[$1])
:
else
ax_lapack_ok=no
$2
fi
])dnl AX_LAPACK

View File

@ -0,0 +1,67 @@
AC_DEFUN([FA_CHECK_CUDA], [
AC_ARG_WITH(cuda,
[AS_HELP_STRING([--with-cuda=<prefix>], [prefix of the CUDA installation])])
AC_ARG_WITH(cuda-arch,
[AS_HELP_STRING([--with-cuda-arch=<gencodes>], [device specific -gencode flags])],
[],
[with_cuda_arch=default])
if test x$with_cuda != xno; then
if test x$with_cuda != x; then
cuda_prefix=$with_cuda
AC_CHECK_PROG(NVCC, [nvcc], [$cuda_prefix/bin/nvcc], [], [$cuda_prefix/bin])
NVCC_CPPFLAGS="-I$cuda_prefix/include"
NVCC_LDFLAGS="-L$cuda_prefix/lib64"
else
AC_CHECK_PROGS(NVCC, [nvcc /usr/local/cuda/bin/nvcc], [])
if test "x$NVCC" == "x/usr/local/cuda/bin/nvcc"; then
cuda_prefix="/usr/local/cuda"
NVCC_CPPFLAGS="-I$cuda_prefix/include"
NVCC_LDFLAGS="-L$cuda_prefix/lib64"
else
cuda_prefix=""
NVCC_CPPFLAGS=""
NVCC_LDFLAGS=""
fi
fi
if test "x$NVCC" == x; then
AC_MSG_ERROR([Couldn't find nvcc])
fi
if test "x$with_cuda_arch" == xdefault; then
with_cuda_arch="-gencode=arch=compute_35,code=compute_35 \\
-gencode=arch=compute_52,code=compute_52 \\
-gencode=arch=compute_60,code=compute_60 \\
-gencode=arch=compute_61,code=compute_61 \\
-gencode=arch=compute_70,code=compute_70 \\
-gencode=arch=compute_75,code=compute_75"
fi
fa_save_CPPFLAGS="$CPPFLAGS"
fa_save_LDFLAGS="$LDFLAGS"
fa_save_LIBS="$LIBS"
CPPFLAGS="$NVCC_CPPFLAGS $CPPFLAGS"
LDFLAGS="$NVCC_LDFLAGS $LDFLAGS"
AC_CHECK_HEADER([cuda.h], [], AC_MSG_FAILURE([Couldn't find cuda.h]))
AC_CHECK_LIB([cublas], [cublasAlloc], [], AC_MSG_FAILURE([Couldn't find libcublas]))
AC_CHECK_LIB([cudart], [cudaSetDevice], [], AC_MSG_FAILURE([Couldn't find libcudart]))
NVCC_LIBS="$LIBS"
NVCC_CPPFLAGS="$CPPFLAGS"
NVCC_LDFLAGS="$LDFLAGS"
CPPFLAGS="$fa_save_CPPFLAGS"
LDFLAGS="$fa_save_LDFLAGS"
LIBS="$fa_save_LIBS"
fi
AC_SUBST(NVCC)
AC_SUBST(NVCC_CPPFLAGS)
AC_SUBST(NVCC_LDFLAGS)
AC_SUBST(NVCC_LIBS)
AC_SUBST(CUDA_PREFIX, $cuda_prefix)
AC_SUBST(CUDA_ARCH, $with_cuda_arch)
])

View File

@ -0,0 +1,20 @@
AC_DEFUN([FA_NUMPY], [
AC_REQUIRE([FA_PYTHON])
AC_MSG_CHECKING([for numpy headers path])
fa_numpy_headers=`$PYTHON -c "import numpy; print(numpy.get_include())"`
if test $? == 0; then
if test x$fa_numpy_headers != x; then
AC_MSG_RESULT($fa_numpy_headers)
AC_SUBST(NUMPY_INCLUDE, $fa_numpy_headers)
else
AC_MSG_RESULT([not found])
AC_MSG_WARN([You won't be able to build the python interface.])
fi
else
AC_MSG_RESULT([not found])
AC_MSG_WARN([You won't be able to build the python interface.])
fi
])dnl

View File

@ -0,0 +1,16 @@
dnl
dnl Check for an nm(1) utility.
dnl
AC_DEFUN([FA_PROG_NM],
[
case "${NM-unset}" in
unset) AC_CHECK_PROGS(NM, nm, nm) ;;
*) AC_CHECK_PROGS(NM, $NM nm, nm) ;;
esac
AC_MSG_CHECKING(nm flags)
case "${NMFLAGS-unset}" in
unset) NMFLAGS= ;;
esac
AC_MSG_RESULT($NMFLAGS)
AC_SUBST(NMFLAGS)
])

View File

@ -0,0 +1,11 @@
AC_DEFUN([FA_PROG_SWIG], [
AC_ARG_WITH(swig,
[AS_HELP_STRING([--with-swig=<bin>], [use SWIG binary <bin>])])
case $with_swig in
"") AC_CHECK_PROG(SWIG, swig, swig);;
*) SWIG="$with_swig"
esac
AC_SUBST(SWIG)
])

View File

@ -0,0 +1,21 @@
AC_DEFUN([FA_PYTHON], [
AC_ARG_WITH(python,
[AS_HELP_STRING([--with-python=<bin>], [use Python binary <bin>])])
case $with_python in
"") PYTHON_BIN=python ;;
*) PYTHON_BIN="$with_python"
esac
AC_CHECK_PROG(PYTHON, $PYTHON_BIN, $PYTHON_BIN)
fa_python_bin=$PYTHON
AC_MSG_CHECKING([for Python C flags])
fa_python_cflags=`$PYTHON -c "
import sysconfig
paths = [['-I' + sysconfig.get_path(p) for p in ['include', 'platinclude']]]
print(' '.join(paths))"`
AC_MSG_RESULT($fa_python_cflags)
AC_SUBST(PYTHON_CFLAGS, "$PYTHON_CFLAGS $fa_python_cflags")
])dnl FA_PYTHON

View File

@ -0,0 +1,338 @@
# Benchmarking scripts
This directory contains benchmarking scripts that can reproduce the
numbers reported in the two papers
```
@inproceedings{DJP16,
Author = {Douze, Matthijs and J{\'e}gou, Herv{\'e} and Perronnin, Florent},
Booktitle = "ECCV",
Organization = {Springer},
Title = {Polysemous codes},
Year = {2016}
}
```
and
```
@inproceedings{JDJ17,
Author = {Jeff Johnson and Matthijs Douze and Herv{\'e} J{\'e}gou},
journal= {arXiv:1702.08734},,
Title = {Billion-scale similarity search with GPUs},
Year = {2017},
}
```
Note that the numbers (especially timings) change slightly due to changes in the implementation, different machines, etc.
The scripts are self-contained. They depend only on Faiss and external training data that should be stored in sub-directories.
## SIFT1M experiments
The script [`bench_polysemous_sift1m.py`](bench_polysemous_sift1m.py) reproduces the numbers in
Figure 3 from the "Polysemous" paper.
### Getting SIFT1M
To run it, please download the ANN_SIFT1M dataset from
http://corpus-texmex.irisa.fr/
and unzip it to the subdirectory sift1M.
### Result
The output looks like:
```
PQ training on 100000 points, remains 0 points: training polysemous on centroids
add vectors to index
PQ baseline 7.517 ms per query, R@1 0.4474
Polysemous 64 9.875 ms per query, R@1 0.4474
Polysemous 62 8.358 ms per query, R@1 0.4474
Polysemous 58 5.531 ms per query, R@1 0.4474
Polysemous 54 3.420 ms per query, R@1 0.4478
Polysemous 50 2.182 ms per query, R@1 0.4475
Polysemous 46 1.621 ms per query, R@1 0.4408
Polysemous 42 1.448 ms per query, R@1 0.4174
Polysemous 38 1.331 ms per query, R@1 0.3563
Polysemous 34 1.334 ms per query, R@1 0.2661
Polysemous 30 1.272 ms per query, R@1 0.1794
```
## Experiments on 1B elements dataset
The script [`bench_polysemous_1bn.py`](bench_polysemous_1bn.py) reproduces a few experiments on
two datasets of size 1B from the Polysemous codes" paper.
### Getting BIGANN
Download the four files of ANN_SIFT1B from
http://corpus-texmex.irisa.fr/ to subdirectory bigann/
### Getting Deep1B
The ground-truth and queries are available here
https://yadi.sk/d/11eDCm7Dsn9GA
For the learning and database vectors, use the script
https://github.com/arbabenko/GNOIMI/blob/master/downloadDeep1B.py
to download the data to subdirectory deep1b/, then concatenate the
database files to base.fvecs and the training files to learn.fvecs
### Running the experiments
These experiments are quite long. To support resuming, the script
stores the result of training to a temporary directory, `/tmp/bench_polysemous`.
The script `bench_polysemous_1bn.py` takes at least two arguments:
- the dataset name: SIFT1000M (aka SIFT1B, aka BIGANN) or Deep1B. SIFT1M, SIFT2M,... are also supported to make subsets of for small experiments (note that SIFT1M as a subset of SIFT1B is not the same as the SIFT1M above)
- the type of index to build, which should be a valid [index_factory key](https://github.com/facebookresearch/faiss/wiki/High-level-interface-and-auto-tuning#index-factory) (see below for examples)
- the remaining arguments are parsed as search-time parameters.
### Experiments of Table 2
The `IMI*+PolyD+ADC` results in Table 2 can be reproduced with (for 16 bytes):
```
python bench_polysemous_1bn.par SIFT1000M IMI2x12,PQ16 nprobe=16,max_codes={10000,30000},ht={44..54}
```
Training takes about 2 minutes and adding vectors to the dataset
takes 3.1 h. These operations are multithreaded. Note that in the command
above, we use bash's [brace expansion](https://www.gnu.org/software/bash/manual/html_node/Brace-Expansion.html) to set a grid of parameters.
The search is *not* multithreaded, and the output looks like:
```
R@1 R@10 R@100 time %pass
nprobe=16,max_codes=10000,ht=44 0.1779 0.2994 0.3139 0.194 12.45
nprobe=16,max_codes=10000,ht=45 0.1859 0.3183 0.3339 0.197 14.24
nprobe=16,max_codes=10000,ht=46 0.1930 0.3366 0.3543 0.202 16.22
nprobe=16,max_codes=10000,ht=47 0.1993 0.3550 0.3745 0.209 18.39
nprobe=16,max_codes=10000,ht=48 0.2033 0.3694 0.3917 0.640 20.77
nprobe=16,max_codes=10000,ht=49 0.2070 0.3839 0.4077 0.229 23.36
nprobe=16,max_codes=10000,ht=50 0.2101 0.3949 0.4205 0.232 26.17
nprobe=16,max_codes=10000,ht=51 0.2120 0.4042 0.4310 0.239 29.21
nprobe=16,max_codes=10000,ht=52 0.2134 0.4113 0.4402 0.245 32.47
nprobe=16,max_codes=10000,ht=53 0.2157 0.4184 0.4482 0.250 35.96
nprobe=16,max_codes=10000,ht=54 0.2170 0.4240 0.4546 0.256 39.66
nprobe=16,max_codes=30000,ht=44 0.1882 0.3327 0.3555 0.226 11.29
nprobe=16,max_codes=30000,ht=45 0.1964 0.3525 0.3771 0.231 13.05
nprobe=16,max_codes=30000,ht=46 0.2039 0.3713 0.3987 0.236 15.01
nprobe=16,max_codes=30000,ht=47 0.2103 0.3907 0.4202 0.245 17.19
nprobe=16,max_codes=30000,ht=48 0.2145 0.4055 0.4384 0.251 19.60
nprobe=16,max_codes=30000,ht=49 0.2179 0.4198 0.4550 0.257 22.25
nprobe=16,max_codes=30000,ht=50 0.2208 0.4305 0.4681 0.268 25.15
nprobe=16,max_codes=30000,ht=51 0.2227 0.4402 0.4791 0.275 28.30
nprobe=16,max_codes=30000,ht=52 0.2241 0.4473 0.4884 0.284 31.70
nprobe=16,max_codes=30000,ht=53 0.2265 0.4544 0.4965 0.294 35.34
nprobe=16,max_codes=30000,ht=54 0.2278 0.4601 0.5031 0.303 39.20
```
The result reported in table 2 is the one for which the %pass (percentage of code comparisons that pass the Hamming check) is around 20%, which occurs for Hamming threshold `ht=48`.
The 8-byte results can be reproduced with the factory key `IMI2x12,PQ8`
### Experiments of the appendix
The experiments in the appendix are only in the ArXiv version of the paper (table 3).
```
python bench_polysemous_1bn.py SIFT1000M OPQ8_64,IMI2x13,PQ8 nprobe={1,2,4,8,16,32,64,128},ht={20,24,26,28,30}
R@1 R@10 R@100 time %pass
nprobe=1,ht=20 0.0351 0.0616 0.0751 0.158 19.01
...
nprobe=32,ht=28 0.1256 0.3563 0.5026 0.561 52.61
...
```
Here again the runs are not exactly the same but the original result was obtained from nprobe=32,ht=28.
For Deep1B, we used a simple version of [auto-tuning](https://github.com/facebookresearch/faiss/wiki/High-level-interface-and-auto-tuning/_edit#auto-tuning-the-runtime-parameters) to sweep through the set of operating points:
```
python bench_polysemous_1bn.py Deep1B OPQ20_80,IMI2x14,PQ20 autotune
...
Done in 4067.555 s, available OPs:
Parameters 1-R@1 time
0.0000 0.000
nprobe=1,ht=22,max_codes=256 0.0215 3.115
nprobe=1,ht=30,max_codes=256 0.0381 3.120
...
nprobe=512,ht=68,max_codes=524288 0.4478 36.903
nprobe=1024,ht=80,max_codes=131072 0.4557 46.363
nprobe=1024,ht=78,max_codes=262144 0.4616 61.939
...
```
The original results were obtained with `nprobe=1024,ht=66,max_codes=262144`.
## GPU experiments
The benchmarks below run 1 or 4 Titan X GPUs and reproduce the results of the "GPU paper". They are also a good starting point on how to use GPU Faiss.
### Search on SIFT1M
See above on how to get SIFT1M into subdirectory sift1M/. The script [`bench_gpu_sift1m.py`](bench_gpu_sift1m.py) reproduces the "exact k-NN time" plot in the ArXiv paper, and the SIFT1M numbers.
The output is:
```
============ Exact search
add vectors to index
warmup
benchmark
k=1 0.715 s, R@1 0.9914
k=2 0.729 s, R@1 0.9935
k=4 0.731 s, R@1 0.9935
k=8 0.732 s, R@1 0.9935
k=16 0.742 s, R@1 0.9935
k=32 0.737 s, R@1 0.9935
k=64 0.753 s, R@1 0.9935
k=128 0.761 s, R@1 0.9935
k=256 0.799 s, R@1 0.9935
k=512 0.975 s, R@1 0.9935
k=1024 1.424 s, R@1 0.9935
============ Approximate search
train
WARNING clustering 100000 points to 4096 centroids: please provide at least 159744 training points
add vectors to index
WARN: increase temp memory to avoid cudaMalloc, or decrease query/add size (alloc 256000000 B, highwater 256000000 B)
warmup
benchmark
nprobe= 1 0.043 s recalls= 0.3909 0.4312 0.4312
nprobe= 2 0.040 s recalls= 0.5041 0.5636 0.5636
nprobe= 4 0.048 s recalls= 0.6048 0.6897 0.6897
nprobe= 8 0.064 s recalls= 0.6879 0.8028 0.8028
nprobe= 16 0.088 s recalls= 0.7534 0.8940 0.8940
nprobe= 32 0.134 s recalls= 0.7957 0.9549 0.9550
nprobe= 64 0.224 s recalls= 0.8125 0.9833 0.9834
nprobe= 128 0.395 s recalls= 0.8205 0.9953 0.9954
nprobe= 256 0.717 s recalls= 0.8227 0.9993 0.9994
nprobe= 512 1.348 s recalls= 0.8228 0.9999 1.0000
```
The run produces two warnings:
- the clustering complains that it does not have enough training data, there is not much we can do about this.
- the add() function complains that there is an inefficient memory allocation, but this is a concern only when it happens often, and we are not benchmarking the add time anyways.
To index small datasets, it is more efficient to use a `GpuIVFFlat`, which just stores the full vectors in the inverted lists. We did not mention this in the the paper because it is not as scalable. To experiment with this setting, change the `index_factory` string from "IVF4096,PQ64" to "IVF16384,Flat". This gives:
```
nprobe= 1 0.025 s recalls= 0.4084 0.4105 0.4105
nprobe= 2 0.033 s recalls= 0.5235 0.5264 0.5264
nprobe= 4 0.033 s recalls= 0.6332 0.6367 0.6367
nprobe= 8 0.040 s recalls= 0.7358 0.7403 0.7403
nprobe= 16 0.049 s recalls= 0.8273 0.8324 0.8324
nprobe= 32 0.068 s recalls= 0.8957 0.9024 0.9024
nprobe= 64 0.104 s recalls= 0.9477 0.9549 0.9549
nprobe= 128 0.174 s recalls= 0.9760 0.9837 0.9837
nprobe= 256 0.299 s recalls= 0.9866 0.9944 0.9944
nprobe= 512 0.527 s recalls= 0.9907 0.9987 0.9987
```
### Clustering on MNIST8m
To get the "infinite MNIST dataset", follow the instructions on [Léon Bottou's website](http://leon.bottou.org/projects/infimnist). The script assumes the file `mnist8m-patterns-idx3-ubyte` is in subdirectory `mnist8m`
The script [`kmeans_mnist.py`](kmeans_mnist.py) produces the following output:
```
python kmeans_mnist.py 1 256
...
Clustering 8100000 points in 784D to 256 clusters, redo 1 times, 20 iterations
Preprocessing in 7.94526 s
Iteration 19 (131.697 s, search 114.78 s): objective=1.44881e+13 imbalance=1.05963 nsplit=0
final objective: 1.449e+13
total runtime: 140.615 s
```
### search on SIFT1B
The script [`bench_gpu_1bn.py`](bench_gpu_1bn.py) runs multi-gpu searches on the two 1-billion vector datasets we considered. It is more complex than the previous scripts, because it supports many search options and decomposes the dataset build process in Python to exploit the best possible CPU/GPU parallelism and GPU distribution.
Even on multiple GPUs, building the 1B datasets can last several hours. It is often a good idea to validate that everything is working fine on smaller datasets like SIFT1M, SIFT2M, etc.
The search results on SIFT1B in the "GPU paper" can be obtained with
<!-- see P57124181 -->
```
python bench_gpu_1bn.py SIFT1000M OPQ8_32,IVF262144,PQ8 -nnn 10 -ngpu 1 -tempmem $[1536*1024*1024]
...
0/10000 (0.024 s) probe=1 : 0.161 s 1-R@1: 0.0752 1-R@10: 0.1924
0/10000 (0.005 s) probe=2 : 0.150 s 1-R@1: 0.0964 1-R@10: 0.2693
0/10000 (0.005 s) probe=4 : 0.153 s 1-R@1: 0.1102 1-R@10: 0.3328
0/10000 (0.005 s) probe=8 : 0.170 s 1-R@1: 0.1220 1-R@10: 0.3827
0/10000 (0.005 s) probe=16 : 0.196 s 1-R@1: 0.1290 1-R@10: 0.4151
0/10000 (0.006 s) probe=32 : 0.244 s 1-R@1: 0.1314 1-R@10: 0.4345
0/10000 (0.006 s) probe=64 : 0.353 s 1-R@1: 0.1332 1-R@10: 0.4461
0/10000 (0.005 s) probe=128: 0.587 s 1-R@1: 0.1341 1-R@10: 0.4502
0/10000 (0.006 s) probe=256: 1.160 s 1-R@1: 0.1342 1-R@10: 0.4511
```
We use the `-tempmem` option to reduce the temporary memory allocation to 1.5G, otherwise the dataset does not fit in GPU memory
### search on Deep1B
The same script generates the GPU search results on Deep1B.
```
python bench_gpu_1bn.py Deep1B OPQ20_80,IVF262144,PQ20 -nnn 10 -R 2 -ngpu 4 -altadd -noptables -tempmem $[1024*1024*1024]
...
0/10000 (0.115 s) probe=1 : 0.239 s 1-R@1: 0.2387 1-R@10: 0.3420
0/10000 (0.006 s) probe=2 : 0.103 s 1-R@1: 0.3110 1-R@10: 0.4623
0/10000 (0.005 s) probe=4 : 0.105 s 1-R@1: 0.3772 1-R@10: 0.5862
0/10000 (0.005 s) probe=8 : 0.116 s 1-R@1: 0.4235 1-R@10: 0.6889
0/10000 (0.005 s) probe=16 : 0.133 s 1-R@1: 0.4517 1-R@10: 0.7693
0/10000 (0.005 s) probe=32 : 0.168 s 1-R@1: 0.4713 1-R@10: 0.8281
0/10000 (0.005 s) probe=64 : 0.238 s 1-R@1: 0.4841 1-R@10: 0.8649
0/10000 (0.007 s) probe=128: 0.384 s 1-R@1: 0.4900 1-R@10: 0.8816
0/10000 (0.005 s) probe=256: 0.736 s 1-R@1: 0.4933 1-R@10: 0.8912
```
Here we are a bit tight on memory so we disable precomputed tables (`-noptables`) and restrict the amount of temporary memory. The `-altadd` option avoids GPU memory overflows during add.
### knn-graph on Deep1B
The same script generates the KNN-graph on Deep1B. Note that the inverted file from above will not be re-used because the training sets are different. For the knngraph, the script will first do a pass over the whole dataset to compute the ground-truth knn for a subset of 10k nodes, for evaluation.
```
python bench_gpu_1bn.py Deep1B OPQ20_80,IVF262144,PQ20 -nnn 10 -altadd -knngraph -R 2 -noptables -tempmem $[1<<30] -ngpu 4
...
CPU index contains 1000000000 vectors, move to GPU
Copy CPU index to 2 sharded GPU indexes
dispatch to GPUs 0:2
IndexShards shard 0 indices 0:500000000
IndexIVFPQ size 500000000 -> GpuIndexIVFPQ indicesOptions=0 usePrecomputed=0 useFloat16=0 reserveVecs=0
IndexShards shard 1 indices 500000000:1000000000
IndexIVFPQ size 500000000 -> GpuIndexIVFPQ indicesOptions=0 usePrecomputed=0 useFloat16=0 reserveVecs=0
dispatch to GPUs 2:4
IndexShards shard 0 indices 0:500000000
IndexIVFPQ size 500000000 -> GpuIndexIVFPQ indicesOptions=0 usePrecomputed=0 useFloat16=0 reserveVecs=0
IndexShards shard 1 indices 500000000:1000000000
IndexIVFPQ size 500000000 -> GpuIndexIVFPQ indicesOptions=0 usePrecomputed=0 useFloat16=0 reserveVecs=0
move to GPU done in 151.535 s
search...
999997440/1000000000 (8389.961 s, 0.3379) probe=1 : 8389.990 s rank-10 intersection results: 0.3379
999997440/1000000000 (9205.934 s, 0.4079) probe=2 : 9205.966 s rank-10 intersection results: 0.4079
999997440/1000000000 (9741.095 s, 0.4722) probe=4 : 9741.128 s rank-10 intersection results: 0.4722
999997440/1000000000 (10830.420 s, 0.5256) probe=8 : 10830.455 s rank-10 intersection results: 0.5256
999997440/1000000000 (12531.716 s, 0.5603) probe=16 : 12531.758 s rank-10 intersection results: 0.5603
999997440/1000000000 (15922.519 s, 0.5825) probe=32 : 15922.571 s rank-10 intersection results: 0.5825
999997440/1000000000 (22774.153 s, 0.5950) probe=64 : 22774.220 s rank-10 intersection results: 0.5950
999997440/1000000000 (36717.207 s, 0.6015) probe=128: 36717.309 s rank-10 intersection results: 0.6015
999997440/1000000000 (70616.392 s, 0.6047) probe=256: 70616.581 s rank-10 intersection results: 0.6047
```

View File

@ -0,0 +1,20 @@
# Benchmark of IVF variants
This is a benchmark of IVF index variants, looking at compression vs. speed vs. accuracy.
The results are in [this wiki chapter](https://github.com/facebookresearch/faiss/wiki/Indexing-1G-vectors)
The code is organized as:
- `datasets.py`: code to access the datafiles, compute the ground-truth and report accuracies
- `bench_all_ivf.py`: evaluate one type of inverted file
- `run_on_cluster_generic.bash`: call `bench_all_ivf.py` for all tested types of indices.
Since the number of experiments is quite large the script is structued so that the benchmark can be run on a cluster.
- `parse_bench_all_ivf.py`: make nice tradeoff plots from all the results.
The code depends on Faiss and can use 1 to 8 GPUs to do the k-means clustering for large vocabularies.
It was run in October 2018 for the results in the wiki.

View File

@ -0,0 +1,308 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#!/usr/bin/env python2
import os
import sys
import time
import numpy as np
import faiss
import argparse
import datasets
from datasets import sanitize
######################################################
# Command-line parsing
######################################################
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('dataset options')
aa('--db', default='deep1M', help='dataset')
aa('--compute_gt', default=False, action='store_true',
help='compute and store the groundtruth')
group = parser.add_argument_group('index consturction')
aa('--indexkey', default='HNSW32', help='index_factory type')
aa('--efConstruction', default=200, type=int,
help='HNSW construction factor')
aa('--M0', default=-1, type=int, help='size of base level')
aa('--maxtrain', default=256 * 256, type=int,
help='maximum number of training points (0 to set automatically)')
aa('--indexfile', default='', help='file to read or write index from')
aa('--add_bs', default=-1, type=int,
help='add elements index by batches of this size')
aa('--no_precomputed_tables', action='store_true', default=False,
help='disable precomputed tables (uses less memory)')
aa('--clustering_niter', default=-1, type=int,
help='number of clustering iterations (-1 = leave default)')
aa('--train_on_gpu', default=False, action='store_true',
help='do training on GPU')
aa('--get_centroids_from', default='',
help='get the centroids from this index (to speed up training)')
group = parser.add_argument_group('searching')
aa('--k', default=100, type=int, help='nb of nearest neighbors')
aa('--searchthreads', default=-1, type=int,
help='nb of threads to use at search time')
aa('--searchparams', nargs='+', default=['autotune'],
help="search parameters to use (can be autotune or a list of params)")
aa('--n_autotune', default=500, type=int,
help="max nb of autotune experiments")
aa('--autotune_max', default=[], nargs='*',
help='set max value for autotune variables format "var:val" (exclusive)')
aa('--autotune_range', default=[], nargs='*',
help='set complete autotune range, format "var:val1,val2,..."')
aa('--min_test_duration', default=0, type=float,
help='run test at least for so long to avoid jitter')
args = parser.parse_args()
print("args:", args)
os.system('echo -n "nb processors "; '
'cat /proc/cpuinfo | grep ^processor | wc -l; '
'cat /proc/cpuinfo | grep ^"model name" | tail -1')
######################################################
# Load dataset
######################################################
xt, xb, xq, gt = datasets.load_data(
dataset=args.db, compute_gt=args.compute_gt)
print("dataset sizes: train %s base %s query %s GT %s" % (
xt.shape, xb.shape, xq.shape, gt.shape))
nq, d = xq.shape
nb, d = xb.shape
######################################################
# Make index
######################################################
if args.indexfile and os.path.exists(args.indexfile):
print("reading", args.indexfile)
index = faiss.read_index(args.indexfile)
if isinstance(index, faiss.IndexPreTransform):
index_ivf = faiss.downcast_index(index.index)
else:
index_ivf = index
assert isinstance(index_ivf, faiss.IndexIVF)
vec_transform = lambda x: x
assert isinstance(index_ivf, faiss.IndexIVF)
else:
print("build index, key=", args.indexkey)
index = faiss.index_factory(d, args.indexkey)
if isinstance(index, faiss.IndexPreTransform):
index_ivf = faiss.downcast_index(index.index)
vec_transform = index.chain.at(0).apply_py
else:
index_ivf = index
vec_transform = lambda x:x
assert isinstance(index_ivf, faiss.IndexIVF)
index_ivf.verbose = True
index_ivf.quantizer.verbose = True
index_ivf.cp.verbose = True
maxtrain = args.maxtrain
if maxtrain == 0:
if 'IMI' in args.indexkey:
maxtrain = int(256 * 2 ** (np.log2(index_ivf.nlist) / 2))
else:
maxtrain = 50 * index_ivf.nlist
print("setting maxtrain to %d" % maxtrain)
args.maxtrain = maxtrain
xt2 = sanitize(xt[:args.maxtrain])
assert np.all(np.isfinite(xt2))
print("train, size", xt2.shape)
if args.get_centroids_from == '':
if args.clustering_niter >= 0:
print(("setting nb of clustering iterations to %d" %
args.clustering_niter))
index_ivf.cp.niter = args.clustering_niter
if args.train_on_gpu:
print("add a training index on GPU")
train_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
index_ivf.clustering_index = train_index
else:
print("Getting centroids from", args.get_centroids_from)
src_index = faiss.read_index(args.get_centroids_from)
src_quant = faiss.downcast_index(src_index.quantizer)
centroids = faiss.vector_to_array(src_quant.xb)
centroids = centroids.reshape(-1, d)
print(" centroid table shape", centroids.shape)
if isinstance(index, faiss.IndexPreTransform):
print(" training vector transform")
assert index.chain.size() == 1
vt = index.chain.at(0)
vt.train(xt2)
print(" transform centroids")
centroids = vt.apply_py(centroids)
print(" add centroids to quantizer")
index_ivf.quantizer.add(centroids)
del src_index
t0 = time.time()
index.train(xt2)
print(" train in %.3f s" % (time.time() - t0))
print("adding")
t0 = time.time()
if args.add_bs == -1:
index.add(sanitize(xb))
else:
for i0 in range(0, nb, args.add_bs):
i1 = min(nb, i0 + args.add_bs)
print(" adding %d:%d / %d" % (i0, i1, nb))
index.add(sanitize(xb[i0:i1]))
print(" add in %.3f s" % (time.time() - t0))
if args.indexfile:
print("storing", args.indexfile)
faiss.write_index(index, args.indexfile)
if args.no_precomputed_tables:
if isinstance(index_ivf, faiss.IndexIVFPQ):
print("disabling precomputed table")
index_ivf.use_precomputed_table = -1
index_ivf.precomputed_table.clear()
if args.indexfile:
print("index size on disk: ", os.stat(args.indexfile).st_size)
print("current RSS:", faiss.get_mem_usage_kb() * 1024)
precomputed_table_size = 0
if hasattr(index_ivf, 'precomputed_table'):
precomputed_table_size = index_ivf.precomputed_table.size() * 4
print("precomputed tables size:", precomputed_table_size)
#############################################################
# Index is ready
#############################################################
xq = sanitize(xq)
if args.searchthreads != -1:
print("Setting nb of threads to", args.searchthreads)
faiss.omp_set_num_threads(args.searchthreads)
ps = faiss.ParameterSpace()
ps.initialize(index)
parametersets = args.searchparams
header = '%-40s R@1 R@10 R@100 time(ms/q) nb distances #runs' % "parameters"
def eval_setting(index, xq, gt, min_time):
nq = xq.shape[0]
ivf_stats = faiss.cvar.indexIVF_stats
ivf_stats.reset()
nrun = 0
t0 = time.time()
while True:
D, I = index.search(xq, 100)
nrun += 1
t1 = time.time()
if t1 - t0 > min_time:
break
ms_per_query = ((t1 - t0) * 1000.0 / nq / nrun)
for rank in 1, 10, 100:
n_ok = (I[:, :rank] == gt[:, :1]).sum()
print("%.4f" % (n_ok / float(nq)), end=' ')
print(" %8.3f " % ms_per_query, end=' ')
print("%12d " % (ivf_stats.ndis / nrun), end=' ')
print(nrun)
if parametersets == ['autotune']:
ps.n_experiments = args.n_autotune
ps.min_test_duration = args.min_test_duration
for kv in args.autotune_max:
k, vmax = kv.split(':')
vmax = float(vmax)
print("limiting %s to %g" % (k, vmax))
pr = ps.add_range(k)
values = faiss.vector_to_array(pr.values)
values = np.array([v for v in values if v < vmax])
faiss.copy_array_to_vector(values, pr.values)
for kv in args.autotune_range:
k, vals = kv.split(':')
vals = np.fromstring(vals, sep=',')
print("setting %s to %s" % (k, vals))
pr = ps.add_range(k)
faiss.copy_array_to_vector(vals, pr.values)
# setup the Criterion object: optimize for 1-R@1
crit = faiss.OneRecallAtRCriterion(nq, 1)
# by default, the criterion will request only 1 NN
crit.nnn = 100
crit.set_groundtruth(None, gt.astype('int64'))
# then we let Faiss find the optimal parameters by itself
print("exploring operating points")
ps.display()
t0 = time.time()
op = ps.explore(index, xq, crit)
print("Done in %.3f s, available OPs:" % (time.time() - t0))
op.display()
print(header)
opv = op.optimal_pts
for i in range(opv.size()):
opt = opv.at(i)
ps.set_index_parameters(index, opt.key)
print("%-40s " % opt.key, end=' ')
sys.stdout.flush()
eval_setting(index, xq, gt, args.min_test_duration)
else:
print(header)
for param in parametersets:
print("%-40s " % param, end=' ')
sys.stdout.flush()
ps.set_index_parameters(index, param)
eval_setting(index, xq, gt, args.min_test_duration)

View File

@ -0,0 +1,118 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#!/usr/bin/env python2
from __future__ import print_function
import os
import numpy as np
import faiss
import argparse
import datasets
from datasets import sanitize
######################################################
# Command-line parsing
######################################################
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('dataset options')
aa('--db', default='deep1M', help='dataset')
aa('--nt', default=65536, type=int)
aa('--nb', default=100000, type=int)
aa('--nt_sample', default=0, type=int)
group = parser.add_argument_group('kmeans options')
aa('--k', default=256, type=int)
aa('--seed', default=12345, type=int)
aa('--pcadim', default=-1, type=int, help='PCA to this dimension')
aa('--niter', default=25, type=int)
aa('--eval_freq', default=100, type=int)
args = parser.parse_args()
print("args:", args)
os.system('echo -n "nb processors "; '
'cat /proc/cpuinfo | grep ^processor | wc -l; '
'cat /proc/cpuinfo | grep ^"model name" | tail -1')
ngpu = faiss.get_num_gpus()
print("nb GPUs:", ngpu)
######################################################
# Load dataset
######################################################
xt, xb, xq, gt = datasets.load_data(dataset=args.db)
if args.nt_sample == 0:
xt_pca = xt[args.nt:args.nt + 10000]
xt = xt[:args.nt]
else:
xt_pca = xt[args.nt_sample:args.nt_sample + 10000]
rs = np.random.RandomState(args.seed)
idx = rs.choice(args.nt_sample, size=args.nt, replace=False)
xt = xt[idx]
xb = xb[:args.nb]
d = xb.shape[1]
if args.pcadim != -1:
print("training PCA: %d -> %d" % (d, args.pcadim))
pca = faiss.PCAMatrix(d, args.pcadim)
pca.train(sanitize(xt_pca))
xt = pca.apply_py(sanitize(xt))
xb = pca.apply_py(sanitize(xb))
d = xb.shape[1]
######################################################
# Run clustering
######################################################
index = faiss.IndexFlatL2(d)
if ngpu > 0:
print("moving index to GPU")
index = faiss.index_cpu_to_all_gpus(index)
clustering = faiss.Clustering(d, args.k)
clustering.verbose = True
clustering.seed = args.seed
clustering.max_points_per_centroid = 10**6
clustering.min_points_per_centroid = 1
for iter0 in range(0, args.niter, args.eval_freq):
iter1 = min(args.niter, iter0 + args.eval_freq)
clustering.niter = iter1 - iter0
if iter0 > 0:
faiss.copy_array_to_vector(centroids.ravel(), clustering.centroids)
clustering.train(sanitize(xt), index)
index.reset()
centroids = faiss.vector_to_array(clustering.centroids).reshape(args.k, d)
index.add(centroids)
_, I = index.search(sanitize(xb), 1)
error = ((xb - centroids[I.ravel()]) ** 2).sum()
print("iter1=%d quantization error on test: %.4f" % (iter1, error))

View File

@ -0,0 +1,234 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
"""
Common functions to load datasets and compute their ground-truth
"""
from __future__ import print_function
import time
import numpy as np
import faiss
import sys
# set this to the directory that contains the datafiles.
# deep1b data should be at simdir + 'deep1b'
# bigann data should be at simdir + 'bigann'
simdir = '/mnt/vol/gfsai-east/ai-group/datasets/simsearch/'
#################################################################
# Small I/O functions
#################################################################
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
def ivecs_mmap(fname):
a = np.memmap(fname, dtype='int32', mode='r')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:]
def fvecs_mmap(fname):
return ivecs_mmap(fname).view('float32')
def bvecs_mmap(fname):
x = np.memmap(fname, dtype='uint8', mode='r')
d = x[:4].view('int32')[0]
return x.reshape(-1, d + 4)[:, 4:]
def ivecs_write(fname, m):
n, d = m.shape
m1 = np.empty((n, d + 1), dtype='int32')
m1[:, 0] = d
m1[:, 1:] = m
m1.tofile(fname)
def fvecs_write(fname, m):
m = m.astype('float32')
ivecs_write(fname, m.view('int32'))
#################################################################
# Dataset
#################################################################
def sanitize(x):
return np.ascontiguousarray(x, dtype='float32')
class ResultHeap:
""" Combine query results from a sliced dataset """
def __init__(self, nq, k):
" nq: number of query vectors, k: number of results per query "
self.I = np.zeros((nq, k), dtype='int64')
self.D = np.zeros((nq, k), dtype='float32')
self.nq, self.k = nq, k
heaps = faiss.float_maxheap_array_t()
heaps.k = k
heaps.nh = nq
heaps.val = faiss.swig_ptr(self.D)
heaps.ids = faiss.swig_ptr(self.I)
heaps.heapify()
self.heaps = heaps
def add_batch_result(self, D, I, i0):
assert D.shape == (self.nq, self.k)
assert I.shape == (self.nq, self.k)
I += i0
self.heaps.addn_with_ids(
self.k, faiss.swig_ptr(D),
faiss.swig_ptr(I), self.k)
def finalize(self):
self.heaps.reorder()
def compute_GT_sliced(xb, xq, k):
print("compute GT")
t0 = time.time()
nb, d = xb.shape
nq, d = xq.shape
rh = ResultHeap(nq, k)
bs = 10 ** 5
xqs = sanitize(xq)
db_gt = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
# compute ground-truth by blocks of bs, and add to heaps
for i0 in range(0, nb, bs):
i1 = min(nb, i0 + bs)
xsl = sanitize(xb[i0:i1])
db_gt.add(xsl)
D, I = db_gt.search(xqs, k)
rh.add_batch_result(D, I, i0)
db_gt.reset()
print("\r %d/%d, %.3f s" % (i0, nb, time.time() - t0), end=' ')
sys.stdout.flush()
print()
rh.finalize()
gt_I = rh.I
print("GT time: %.3f s" % (time.time() - t0))
return gt_I
def do_compute_gt(xb, xq, k):
print("computing GT")
nb, d = xb.shape
index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
if nb < 100 * 1000:
print(" add")
index.add(np.ascontiguousarray(xb, dtype='float32'))
print(" search")
D, I = index.search(np.ascontiguousarray(xq, dtype='float32'), k)
else:
I = compute_GT_sliced(xb, xq, k)
return I.astype('int32')
def load_data(dataset='deep1M', compute_gt=False):
print("load data", dataset)
if dataset == 'sift1M':
basedir = simdir + 'sift1M/'
xt = fvecs_read(basedir + "sift_learn.fvecs")
xb = fvecs_read(basedir + "sift_base.fvecs")
xq = fvecs_read(basedir + "sift_query.fvecs")
gt = ivecs_read(basedir + "sift_groundtruth.ivecs")
elif dataset.startswith('bigann'):
basedir = simdir + 'bigann/'
dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1])
xb = bvecs_mmap(basedir + 'bigann_base.bvecs')
xq = bvecs_mmap(basedir + 'bigann_query.bvecs')
xt = bvecs_mmap(basedir + 'bigann_learn.bvecs')
# trim xb to correct size
xb = xb[:dbsize * 1000 * 1000]
gt = ivecs_read(basedir + 'gnd/idx_%dM.ivecs' % dbsize)
elif dataset.startswith("deep"):
basedir = simdir + 'deep1b/'
szsuf = dataset[4:]
if szsuf[-1] == 'M':
dbsize = 10 ** 6 * int(szsuf[:-1])
elif szsuf == '1B':
dbsize = 10 ** 9
elif szsuf[-1] == 'k':
dbsize = 1000 * int(szsuf[:-1])
else:
assert False, "did not recognize suffix " + szsuf
xt = fvecs_mmap(basedir + "learn.fvecs")
xb = fvecs_mmap(basedir + "base.fvecs")
xq = fvecs_read(basedir + "deep1B_queries.fvecs")
xb = xb[:dbsize]
gt_fname = basedir + "%s_groundtruth.ivecs" % dataset
if compute_gt:
gt = do_compute_gt(xb, xq, 100)
print("store", gt_fname)
ivecs_write(gt_fname, gt)
gt = ivecs_read(gt_fname)
else:
assert False
print("dataset %s sizes: B %s Q %s T %s" % (
dataset, xb.shape, xq.shape, xt.shape))
return xt, xb, xq, gt
#################################################################
# Evaluation
#################################################################
def evaluate_DI(D, I, gt):
nq = gt.shape[0]
k = I.shape[1]
rank = 1
while rank <= k:
recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
print("R@%d: %.4f" % (rank, recall), end=' ')
rank *= 10
def evaluate(xq, gt, index, k=100, endl=True):
t0 = time.time()
D, I = index.search(xq, k)
t1 = time.time()
nq = xq.shape[0]
print("\t %8.4f ms per query, " % (
(t1 - t0) * 1000.0 / nq), end=' ')
rank = 1
while rank <= k:
recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
print("R@%d: %.4f" % (rank, recall), end=' ')
rank *= 10
if endl:
print()
return D, I

View File

@ -0,0 +1,268 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
import os
import numpy as np
from matplotlib import pyplot
import re
from argparse import Namespace
# the directory used in run_on_cluster.bash
basedir = '/mnt/vol/gfsai-east/ai-group/users/matthijs/bench_all_ivf/'
logdir = basedir + 'logs/'
# which plot to output
db = 'bigann1B'
code_size = 8
def unitsize(indexkey):
""" size of one vector in the index """
mo = re.match('.*,PQ(\\d+)', indexkey)
if mo:
return int(mo.group(1))
if indexkey.endswith('SQ8'):
bits_per_d = 8
elif indexkey.endswith('SQ4'):
bits_per_d = 4
elif indexkey.endswith('SQfp16'):
bits_per_d = 16
else:
assert False
mo = re.match('PCAR(\\d+),.*', indexkey)
if mo:
return bits_per_d * int(mo.group(1)) / 8
mo = re.match('OPQ\\d+_(\\d+),.*', indexkey)
if mo:
return bits_per_d * int(mo.group(1)) / 8
mo = re.match('RR(\\d+),.*', indexkey)
if mo:
return bits_per_d * int(mo.group(1)) / 8
assert False
def dbsize_from_name(dbname):
sufs = {
'1B': 10**9,
'100M': 10**8,
'10M': 10**7,
'1M': 10**6,
}
for s in sufs:
if dbname.endswith(s):
return sufs[s]
else:
assert False
def keep_latest_stdout(fnames):
fnames = [fname for fname in fnames if fname.endswith('.stdout')]
fnames.sort()
n = len(fnames)
fnames2 = []
for i, fname in enumerate(fnames):
if i + 1 < n and fnames[i + 1][:-8] == fname[:-8]:
continue
fnames2.append(fname)
return fnames2
def parse_result_file(fname):
# print fname
st = 0
res = []
keys = []
stats = {}
stats['run_version'] = fname[-8]
for l in open(fname):
if st == 0:
if l.startswith('CHRONOS_JOB_INSTANCE_ID'):
stats['CHRONOS_JOB_INSTANCE_ID'] = l.split()[-1]
if l.startswith('index size on disk:'):
stats['index_size'] = int(l.split()[-1])
if l.startswith('current RSS:'):
stats['RSS'] = int(l.split()[-1])
if l.startswith('precomputed tables size:'):
stats['tables_size'] = int(l.split()[-1])
if l.startswith('Setting nb of threads to'):
stats['n_threads'] = int(l.split()[-1])
if l.startswith(' add in'):
stats['add_time'] = float(l.split()[-2])
if l.startswith('args:'):
args = eval(l[l.find(' '):])
indexkey = args.indexkey
elif 'R@1 R@10 R@100' in l:
st = 1
elif 'index size on disk:' in l:
index_size = int(l.split()[-1])
elif st == 1:
st = 2
elif st == 2:
fi = l.split()
keys.append(fi[0])
res.append([float(x) for x in fi[1:]])
return indexkey, np.array(res), keys, stats
# run parsing
allres = {}
allstats = {}
nts = []
missing = []
versions = {}
fnames = keep_latest_stdout(os.listdir(logdir))
# print fnames
# filenames are in the form <key>.x.stdout
# where x is a version number (from a to z)
# keep only latest version of each name
for fname in fnames:
if not ('db' + db in fname and fname.endswith('.stdout')):
continue
indexkey, res, _, stats = parse_result_file(logdir + fname)
if res.size == 0:
missing.append(fname)
errorline = open(
logdir + fname.replace('.stdout', '.stderr')).readlines()
if len(errorline) > 0:
errorline = errorline[-1]
else:
errorline = 'NO STDERR'
print fname, stats['CHRONOS_JOB_INSTANCE_ID'], errorline
else:
if indexkey in allres:
if allstats[indexkey]['run_version'] > stats['run_version']:
# don't use this run
continue
n_threads = stats.get('n_threads', 1)
nts.append(n_threads)
allres[indexkey] = res
allstats[indexkey] = stats
assert len(set(nts)) == 1
n_threads = nts[0]
def plot_tradeoffs(allres, code_size, recall_rank):
dbsize = dbsize_from_name(db)
recall_idx = int(np.log10(recall_rank))
bigtab = []
names = []
for k,v in sorted(allres.items()):
if v.ndim != 2: continue
us = unitsize(k)
if us != code_size: continue
perf = v[:, recall_idx]
times = v[:, 3]
bigtab.append(
np.vstack((
np.ones(times.size, dtype=int) * len(names),
perf, times
))
)
names.append(k)
bigtab = np.hstack(bigtab)
perm = np.argsort(bigtab[1, :])
bigtab = bigtab[:, perm]
times = np.minimum.accumulate(bigtab[2, ::-1])[::-1]
selection = np.where(bigtab[2, :] == times)
selected_methods = [names[i] for i in
np.unique(bigtab[0, selection].astype(int))]
not_selected = list(set(names) - set(selected_methods))
print "methods without an optimal OP: ", not_selected
nq = 10000
pyplot.title('database ' + db + ' code_size=%d' % code_size)
# grayed out lines
for k in not_selected:
v = allres[k]
if v.ndim != 2: continue
us = unitsize(k)
if us != code_size: continue
linestyle = (':' if 'PQ' in k else
'-.' if 'SQ4' in k else
'--' if 'SQ8' in k else '-')
pyplot.semilogy(v[:, recall_idx], v[:, 3], label=None,
linestyle=linestyle,
marker='o' if 'HNSW' in k else '+',
color='#cccccc', linewidth=0.2)
# important methods
for k in selected_methods:
v = allres[k]
if v.ndim != 2: continue
us = unitsize(k)
if us != code_size: continue
stats = allstats[k]
tot_size = stats['index_size'] + stats['tables_size']
id_size = 8 # 64 bit
addt = ''
if 'add_time' in stats:
add_time = stats['add_time']
if add_time > 7200:
add_min = add_time / 60
addt = ', %dh%02d' % (add_min / 60, add_min % 60)
else:
add_sec = int(add_time)
addt = ', %dm%02d' % (add_sec / 60, add_sec % 60)
label = k + ' (size+%.1f%%%s)' % (
tot_size / float((code_size + id_size) * dbsize) * 100 - 100,
addt)
linestyle = (':' if 'PQ' in k else
'-.' if 'SQ4' in k else
'--' if 'SQ8' in k else '-')
pyplot.semilogy(v[:, recall_idx], v[:, 3], label=label,
linestyle=linestyle,
marker='o' if 'HNSW' in k else '+')
if len(not_selected) == 0:
om = ''
else:
om = '\nomitted:'
nc = len(om)
for m in not_selected:
if nc > 80:
om += '\n'
nc = 0
om += ' ' + m
nc += len(m) + 1
pyplot.xlabel('1-recall at %d %s' % (recall_rank, om) )
pyplot.ylabel('search time per query (ms, %d threads)' % n_threads)
pyplot.legend()
pyplot.grid()
pyplot.savefig('figs/tradeoffs_%s_cs%d_r%d.png' % (
db, code_size, recall_rank))
return selected_methods, not_selected
pyplot.gcf().set_size_inches(15, 10)
plot_tradeoffs(allres, code_size=code_size, recall_rank=1)

Some files were not shown because too many files have changed in this diff Show More