mirror of https://github.com/milvus-io/milvus.git
* This is my commit message Signed-off-by jack hou Developer <364427912@qq.com> Signed-off-by: root <364427912@qq.com> * fix build with -u -a Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * fix run on GPU Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * fix ThirdPartyPackages.cmake Signed-off-by: shengjun.li <shengjun.li@zilliz.com> Co-authored-by: shengjun.li <shengjun.li@zilliz.com>pull/4083/head
parent
0b020b8255
commit
8d5c1a87d5
|
@ -10,6 +10,7 @@ Please mark all change in change log and use the issue from GitHub
|
|||
- \#4012 Milvus hangs when continually creating and dropping partitions.
|
||||
|
||||
## Feature
|
||||
- \#3773 Support IVF_PQ to run on FPGA.
|
||||
|
||||
## Improvement
|
||||
- \#3775 Improve search performance in the case that no item deleted.
|
||||
|
|
|
@ -152,7 +152,12 @@ if (MILVUS_USE_CCACHE)
|
|||
set(ENV{CCACHE_COMMENTS} "1")
|
||||
endif (CCACHE_FOUND)
|
||||
endif ()
|
||||
|
||||
if (MILVUS_FPGA_VERSION)
|
||||
message(STATUS "Building Milvus FPGA version")
|
||||
add_compile_definitions("MILVUS_FPGA_VERSION")
|
||||
else ()
|
||||
message(STATUS "Building Milvus CPU version")
|
||||
endif ()
|
||||
if (MILVUS_GPU_VERSION)
|
||||
message(STATUS "Building Milvus GPU version")
|
||||
add_compile_definitions("MILVUS_GPU_VERSION")
|
||||
|
@ -204,7 +209,11 @@ add_custom_target(Clean-All COMMAND ${CMAKE_BUILD_TOOL} clean)
|
|||
if ("${MILVUS_DB_PATH}" STREQUAL "")
|
||||
set(MILVUS_DB_PATH "${CMAKE_INSTALL_PREFIX}")
|
||||
endif ()
|
||||
|
||||
if (MILVUS_FPGA_VERSION)
|
||||
set(FPGA_ENABLE "true")
|
||||
else ()
|
||||
set(FPGA_ENABLE "false")
|
||||
endif ()
|
||||
if (MILVUS_GPU_VERSION)
|
||||
set(GPU_ENABLE "true")
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/server_config.template
|
||||
|
|
|
@ -11,12 +11,13 @@ PROFILING="OFF"
|
|||
RUN_CPPLINT="OFF"
|
||||
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
|
||||
GPU_VERSION="OFF" #defaults to CPU version
|
||||
FPGA_VERSION="OFF"
|
||||
WITH_MKL="OFF"
|
||||
WITH_PROMETHEUS="ON"
|
||||
FIU_ENABLE="OFF"
|
||||
BUILD_OPENBLAS="ON"
|
||||
|
||||
while getopts "p:d:t:f:ulrcghzmei" arg; do
|
||||
while getopts "p:d:t:f:ulrcgahzmei" arg; do
|
||||
case $arg in
|
||||
p)
|
||||
INSTALL_PREFIX=$OPTARG
|
||||
|
@ -58,6 +59,9 @@ while getopts "p:d:t:f:ulrcghzmei" arg; do
|
|||
i)
|
||||
FIU_ENABLE="ON"
|
||||
;;
|
||||
a)
|
||||
FPGA_VERSION="ON"
|
||||
;;
|
||||
h) # help
|
||||
echo "
|
||||
|
||||
|
@ -74,10 +78,11 @@ parameter:
|
|||
-m: build with MKL(default: OFF)
|
||||
-e: build without prometheus(default: OFF)
|
||||
-i: build FIU_ENABLE(default: OFF)
|
||||
-a: build FPGA(default: OFF)
|
||||
-h: help
|
||||
|
||||
usage:
|
||||
./build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} [-u] [-l] [-r] [-c] [-z] [-g] [-m] [-e] [-h]
|
||||
./build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} [-u] [-l] [-r] [-c] [-z] [-g] [-a] [-m] [-e] [-h]
|
||||
"
|
||||
exit 0
|
||||
;;
|
||||
|
@ -108,6 +113,7 @@ CMAKE_CMD="cmake \
|
|||
-DMILVUS_DB_PATH=${DB_PATH} \
|
||||
-DENABLE_CPU_PROFILING=${PROFILING} \
|
||||
-DMILVUS_GPU_VERSION=${GPU_VERSION} \
|
||||
-DMILVUS_FPGA_VERSION=${FPGA_VERSION} \
|
||||
-DFAISS_WITH_MKL=${WITH_MKL} \
|
||||
-DMILVUS_WITH_PROMETHEUS=${WITH_PROMETHEUS} \
|
||||
-DMILVUS_WITH_FIU=${FIU_ENABLE} \
|
||||
|
|
|
@ -45,6 +45,7 @@ set_option_category("Milvus Build Option")
|
|||
|
||||
define_option(MILVUS_GPU_VERSION "Build GPU version" OFF)
|
||||
|
||||
define_option(MILVUS_FPGA_VERSION "Build FPGA version" OFF)
|
||||
#----------------------------------------------------------------------
|
||||
set_option_category("Thirdparty")
|
||||
|
||||
|
|
|
@ -24,7 +24,8 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES
|
|||
Opentracing
|
||||
fiu
|
||||
AWS
|
||||
oatpp)
|
||||
oatpp
|
||||
armadillo)
|
||||
|
||||
message(STATUS "Using ${MILVUS_DEPENDENCY_SOURCE} approach to find dependencies")
|
||||
|
||||
|
@ -64,6 +65,8 @@ macro(build_dependency DEPENDENCY_NAME)
|
|||
build_oatpp()
|
||||
elseif("${DEPENDENCY_NAME}" STREQUAL "AWS")
|
||||
build_aws()
|
||||
elseif("${DEPENDENCY_NAME}" STREQUAL "armadillo")
|
||||
build_armadillo()
|
||||
else ()
|
||||
message(FATAL_ERROR "Unknown thirdparty dependency to build: ${DEPENDENCY_NAME}")
|
||||
endif ()
|
||||
|
@ -334,6 +337,12 @@ else ()
|
|||
set(AWS_SOURCE_URL "https://github.com/aws/aws-sdk-cpp/archive/${AWS_VERSION}.tar.gz")
|
||||
endif ()
|
||||
|
||||
if (DEFINED ENV{MILVUS_ARMADILLO_URL})
|
||||
set(ARMADILLO_SOURCE_URL "$ENV{MILVUS_ARMADILLO_URL}")
|
||||
else ()
|
||||
set(ARMADILLO_SOURCE_URL "https://gitlab.com/conradsnicta/armadillo-code/-/archive/9.900.x/armadillo-code-9.900.x.tar.gz")
|
||||
endif ()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Google gtest
|
||||
|
||||
|
@ -1158,3 +1167,48 @@ if(MILVUS_WITH_AWS)
|
|||
include_directories(SYSTEM ${AWS_CPP_SDK_CORE_INCLUDE_DIR})
|
||||
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# armadillo
|
||||
|
||||
macro(build_armadillo)
|
||||
message(STATUS "Building armadillo 9.9.x from source")
|
||||
set(ARMADILLO_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/armadillo_ep-prefix/src/armadillo_ep")
|
||||
set(ARMADILLO_INCLUDE_DIR "${ARMADILLO_PREFIX}/include")
|
||||
set(ARMADILLO_CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${ARMADILLO_PREFIX}")
|
||||
|
||||
externalproject_add(armadillo_ep
|
||||
URL ${ARMADILLO_SOURCE_URL}
|
||||
${EP_LOG_OPTIONS}
|
||||
PREFIX ${ARMADILLO_PREFIX}
|
||||
INSTALL_DIR ${ARMADILLO_PREFIX}
|
||||
CMAKE_ARGS ${ARMADILLO_CMAKE_ARGS}
|
||||
BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS}
|
||||
INSTALL_COMMAND ${MAKE} install
|
||||
BUILD_BYPRODUCTS
|
||||
${ARMADILLO_SHARED_LIB}
|
||||
)
|
||||
|
||||
file(MAKE_DIRECTORY "${ARMADILLO_INCLUDE_DIR}")
|
||||
add_library(armadillo SHARED IMPORTED)
|
||||
ExTernalProject_Get_Property(armadillo_ep INSTALL_DIR)
|
||||
set_target_properties(armadillo
|
||||
PROPERTIES
|
||||
IMPORTED_GLOBAL TRUE
|
||||
IMPORTED_LOCATION "${INSTALL_DIR}/lib/libarmadillo.so"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${INSTALL_DIR}/include")
|
||||
|
||||
add_dependencies(armadillo armadillo_ep)
|
||||
endmacro()
|
||||
|
||||
if(MILVUS_FPGA_VERSION)
|
||||
resolve_dependency(armadillo)
|
||||
|
||||
get_target_property(ARMADILLO_INCLUDE_DIR armadillo INTERFACE_INCLUDE_DIRECTORIES)
|
||||
include_directories(SYSTEM ${ARMADILLO_INCLUDE_DIR})
|
||||
install(FILES
|
||||
${INSTALL_DIR}/lib/libarmadillo.so
|
||||
${INSTALL_DIR}/lib/libarmadillo.so.9
|
||||
${INSTALL_DIR}/lib/libarmadillo.so.9.900.4
|
||||
DESTINATION lib)
|
||||
endif()
|
||||
|
|
|
@ -152,6 +152,19 @@ gpu:
|
|||
- gpu0
|
||||
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# FPGA Config | Description | Type | Default |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# enable | Use FPGA devices or not. | Boolean | false |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# search_devices | The list of FPGA devices used for search computation. | DeviceList | gpu0 |
|
||||
# | Must be in format fpgax. | | |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
|
||||
fpga:
|
||||
enable: true
|
||||
search_devices:
|
||||
- fpga0
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# Logs Config | Description | Type | Default |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# level | Log level in Milvus. Must be one of debug, info, warning, | String | debug |
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "cache/FpgaCacheMgr.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include <fiu-local.h>
|
||||
|
||||
#include "config/Config.h"
|
||||
#include "utils/Log.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace cache {
|
||||
|
||||
namespace {
|
||||
// constexpr int64_t unit = 1024 * 1024 * 1024;
|
||||
constexpr int64_t unit = 1;
|
||||
} // namespace
|
||||
|
||||
FpgaCacheMgr::FpgaCacheMgr() {
|
||||
// All config values have been checked in Config::ValidateConfig()
|
||||
server::Config& config = server::Config::GetInstance();
|
||||
|
||||
int64_t cpu_cache_cap;
|
||||
config.GetCacheConfigCpuCacheCapacity(cpu_cache_cap);
|
||||
int64_t cap = cpu_cache_cap * unit;
|
||||
LOG_SERVER_DEBUG_ << "cpu cache.size: " << cap;
|
||||
LOG_SERVER_INFO_ << "cpu cache.size: " << cap;
|
||||
cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32, "[CACHE CPU]");
|
||||
|
||||
float cpu_cache_threshold;
|
||||
config.GetCacheConfigCpuCacheThreshold(cpu_cache_threshold);
|
||||
cache_->set_freemem_percent(cpu_cache_threshold);
|
||||
|
||||
SetIdentity("CpuCacheMgr");
|
||||
AddCpuCacheCapacityListener();
|
||||
}
|
||||
|
||||
FpgaCacheMgr*
|
||||
FpgaCacheMgr::GetInstance() {
|
||||
static FpgaCacheMgr s_mgr;
|
||||
return &s_mgr;
|
||||
}
|
||||
|
||||
DataObjPtr
|
||||
FpgaCacheMgr::GetIndex(const std::string& key) {
|
||||
DataObjPtr obj = GetItem(key);
|
||||
return obj;
|
||||
}
|
||||
|
||||
void
|
||||
FpgaCacheMgr::OnCpuCacheCapacityChanged(int64_t value) {
|
||||
SetCapacity(value * unit);
|
||||
}
|
||||
|
||||
} // namespace cache
|
||||
} // namespace milvus
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "cache/CacheMgr.h"
|
||||
#include "cache/DataObj.h"
|
||||
#include "config/handler/CacheConfigHandler.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace cache {
|
||||
|
||||
class FpgaCacheMgr : public CacheMgr<DataObjPtr>, public server::CacheConfigHandler {
|
||||
private:
|
||||
FpgaCacheMgr();
|
||||
|
||||
public:
|
||||
// TODO(myh): use smart pointer instead
|
||||
static FpgaCacheMgr*
|
||||
GetInstance();
|
||||
|
||||
DataObjPtr
|
||||
GetIndex(const std::string& key);
|
||||
|
||||
protected:
|
||||
void
|
||||
OnCpuCacheCapacityChanged(int64_t value) override;
|
||||
};
|
||||
|
||||
} // namespace cache
|
||||
} // namespace milvus
|
|
@ -117,7 +117,21 @@ const char* CONFIG_ENGINE_SIMD_TYPE = "simd_type";
|
|||
const char* CONFIG_ENGINE_SIMD_TYPE_DEFAULT = "auto";
|
||||
const char* CONFIG_ENGINE_SEARCH_COMBINE_MAX_NQ = "search_combine_nq";
|
||||
const char* CONFIG_ENGINE_SEARCH_COMBINE_MAX_NQ_DEFAULT = "64";
|
||||
|
||||
/* fpga resource config */
|
||||
const char* CONFIG_FPGA_RESOURCE = "fpga";
|
||||
const char* CONFIG_FPGA_RESOURCE_ENABLE = "enable";
|
||||
const char* CONFIG_FPGA_RESOURCE_CACHE_CAPACITY = "cache_size";
|
||||
const char* CONFIG_FPGA_RESOURCE_CACHE_CAPACITY_DEFAULT = "1073741824"; /* 1 GB */
|
||||
const char* CONFIG_FPGA_RESOURCE_CACHE_THRESHOLD = "cache_threshold";
|
||||
const char* CONFIG_FPGA_RESOURCE_CACHE_THRESHOLD_DEFAULT = "0.7";
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
const char* CONFIG_FPGA_RESOURCE_ENABLE_DEFAULT = "true";
|
||||
#else
|
||||
const char* CONFIG_FPGA_RESOURCE_ENABLE_DEFAULT = "false";
|
||||
#endif
|
||||
const char* CONFIG_FPGA_RESOURCE_DELIMITER = ",";
|
||||
const char* CONFIG_FPGA_RESOURCE_SEARCH_RESOURCES = "search_devices";
|
||||
const char* CONFIG_FPGA_RESOURCE_SEARCH_RESOURCES_DEFAULT = "fpga0";
|
||||
/* gpu resource config */
|
||||
const char* CONFIG_GPU_RESOURCE = "gpu";
|
||||
const char* CONFIG_GPU_RESOURCE_ENABLE = "enable";
|
||||
|
@ -1325,7 +1339,19 @@ Config::CheckMetricConfigPort(const std::string& value) {
|
|||
}
|
||||
return Status::OK();
|
||||
}
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
Status
|
||||
Config::CheckFpgaResourceConfigEnable(const std::string& value) {
|
||||
fiu_return_on("check_config_fpga_resource_enable_fail", Status(SERVER_INVALID_ARGUMENT, ""));
|
||||
|
||||
/* if (!ValidateStringIsBool(value).ok()) {
|
||||
std::string msg = "Invalid fpga resource config: " + value + ". Possible reason: fpga.enable is not a
|
||||
boolean."; return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
}*/
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#endif
|
||||
/* cache config */
|
||||
Status
|
||||
Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) {
|
||||
|
@ -2052,7 +2078,71 @@ Config::GetGeneralConfigMetaURI(std::string& value) {
|
|||
value = GetConfigStr(CONFIG_GENERAL, CONFIG_GENERAL_METAURI, CONFIG_GENERAL_METAURI_DEFAULT);
|
||||
return CheckGeneralConfigMetaURI(value);
|
||||
}
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
Status
|
||||
Config::GetFpgaResourceConfigEnable(bool& value) {
|
||||
std::string str =
|
||||
GetConfigStr(CONFIG_FPGA_RESOURCE, CONFIG_FPGA_RESOURCE_ENABLE, CONFIG_FPGA_RESOURCE_ENABLE_DEFAULT);
|
||||
|
||||
STATUS_CHECK(CheckFpgaResourceConfigEnable(str));
|
||||
STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value));
|
||||
return Status::OK();
|
||||
}
|
||||
Status
|
||||
Config::GetFpgaResourceConfigSearchResources(std::vector<int64_t>& value) {
|
||||
value.push_back(0);
|
||||
return Status::OK();
|
||||
}
|
||||
Status
|
||||
Config::GetFpgaResourceConfigCacheCapacity(int64_t& value) {
|
||||
bool fpga_resource_enable = false;
|
||||
STATUS_CHECK(GetFpgaResourceConfigEnable(fpga_resource_enable));
|
||||
fiu_do_on("Config.GetFpgaResourceConfigCacheCapacity.diable_fpga_resource", fpga_resource_enable = false);
|
||||
if (!fpga_resource_enable) {
|
||||
std::string msg = "FPGA not supported. Possible reason: fpga.enable is set to false.";
|
||||
return Status(SERVER_UNSUPPORTED_ERROR, msg);
|
||||
}
|
||||
std::string str = GetConfigStr(CONFIG_FPGA_RESOURCE, CONFIG_FPGA_RESOURCE_CACHE_CAPACITY,
|
||||
CONFIG_FPGA_RESOURCE_CACHE_CAPACITY_DEFAULT);
|
||||
std::string err;
|
||||
value = parse_bytes(str, err);
|
||||
// value = std::stoll(str);
|
||||
return Status::OK();
|
||||
}
|
||||
Status
|
||||
Config::GetFpgaResourceConfigCacheThreshold(float& value) {
|
||||
bool fpga_resource_enable = false;
|
||||
STATUS_CHECK(GetFpgaResourceConfigEnable(fpga_resource_enable));
|
||||
fiu_do_on("Config.GetFpgaResourceConfigCacheThreshold.diable_fpga_resource", fpga_resource_enable = false);
|
||||
if (!fpga_resource_enable) {
|
||||
std::string msg = "FPGA not supported. Possible reason: fpga.enable is set to false.";
|
||||
return Status(SERVER_UNSUPPORTED_ERROR, msg);
|
||||
}
|
||||
std::string str = GetConfigStr(CONFIG_FPGA_RESOURCE, CONFIG_FPGA_RESOURCE_CACHE_THRESHOLD,
|
||||
CONFIG_FPGA_RESOURCE_CACHE_THRESHOLD_DEFAULT);
|
||||
STATUS_CHECK(CheckFpgaResourceConfigCacheThreshold(str));
|
||||
value = std::stof(str);
|
||||
return Status::OK();
|
||||
}
|
||||
Status
|
||||
Config::CheckFpgaResourceConfigCacheThreshold(const std::string& value) {
|
||||
fiu_return_on("check_config_fpga_resource_cache_threshold_fail", Status(SERVER_INVALID_ARGUMENT, ""));
|
||||
|
||||
/* if (!ValidateStringIsFloat(value).ok()) {
|
||||
std::string msg = "Invalid fpga cache threshold: " + value +
|
||||
". Possible reason: fpga.cache_threshold is not in range (0.0, 1.0].";
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
} else {
|
||||
float fpga_cache_threshold = std::stof(value);
|
||||
if (fpga_cache_threshold <= 0.0 || fpga_cache_threshold >= 1.0) {
|
||||
std::string msg = "Invalid fpga cache threshold: " + value +
|
||||
". Possible reason: fpga.cache_threshold is not in range (0.0, 1.0].";
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
}
|
||||
}*/
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
/* network config */
|
||||
Status
|
||||
Config::GetNetworkConfigBindAddress(std::string& value) {
|
||||
|
|
|
@ -104,7 +104,21 @@ extern const char* CONFIG_ENGINE_SIMD_TYPE;
|
|||
extern const char* CONFIG_ENGINE_SIMD_TYPE_DEFAULT;
|
||||
extern const char* CONFIG_ENGINE_SEARCH_COMBINE_MAX_NQ;
|
||||
extern const char* CONFIG_ENGINE_SEARCH_COMBINE_MAX_NQ_DEFAULT;
|
||||
|
||||
/* fpga resource config*/
|
||||
extern const char* CONFIG_FPGA_RESOURCE;
|
||||
extern const char* CONFIG_FPGA_RESOURCE_ENABLE;
|
||||
extern const char* CONFIG_FPGA_RESOURCE_CACHE_CAPACITY;
|
||||
extern const char* CONFIG_FPGA_RESOURCE_CACHE_CAPACITY_DEFAULT; /* 1 GB */
|
||||
extern const char* CONFIG_FPGA_RESOURCE_CACHE_THRESHOLD;
|
||||
extern const char* CONFIG_FPGA_RESOURCE_CACHE_THRESHOLD_DEFAULT;
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
extern const char* CONFIG_FPGA_RESOURCE_ENABLE_DEFAULT;
|
||||
#else
|
||||
extern const char* CONFIG_FPGA_RESOURCE_ENABLE_DEFAULT;
|
||||
#endif
|
||||
extern const char* CONFIG_FPGA_RESOURCE_DELIMITER;
|
||||
extern const char* CONFIG_FPGA_RESOURCE_SEARCH_RESOURCES;
|
||||
extern const char* CONFIG_FPGA_RESOURCE_SEARCH_RESOURCES_DEFAULT;
|
||||
/* gpu resource config */
|
||||
extern const char* CONFIG_GPU_RESOURCE;
|
||||
extern const char* CONFIG_GPU_RESOURCE_ENABLE;
|
||||
|
@ -272,7 +286,16 @@ class Config {
|
|||
CheckEngineConfigSimdType(const std::string& value);
|
||||
Status
|
||||
CheckEngineSearchCombineMaxNq(const std::string& value);
|
||||
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
Status
|
||||
GetFpgaResourceConfigCacheThreshold(float& value);
|
||||
Status
|
||||
CheckFpgaResourceConfigEnable(const std::string& value);
|
||||
Status
|
||||
GetFpgaResourceConfigCacheCapacity(int64_t& value);
|
||||
Status
|
||||
CheckFpgaResourceConfigCacheThreshold(const std::string& value);
|
||||
#endif
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
/* gpu resource config */
|
||||
Status
|
||||
|
@ -396,7 +419,13 @@ class Config {
|
|||
GetEngineConfigSimdType(std::string& value);
|
||||
Status
|
||||
GetEngineSearchCombineMaxNq(int64_t& value);
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
|
||||
Status
|
||||
GetFpgaResourceConfigEnable(bool& value);
|
||||
Status
|
||||
GetFpgaResourceConfigSearchResources(std::vector<int64_t>& value);
|
||||
#endif
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
/* gpu resource config */
|
||||
Status
|
||||
|
|
|
@ -94,6 +94,9 @@ class ExecutionEngine {
|
|||
virtual Status
|
||||
Load(bool to_cache = true) = 0;
|
||||
|
||||
virtual Status
|
||||
CopyToFpga() = 0;
|
||||
|
||||
virtual Status
|
||||
CopyToGpu(uint64_t device_id, bool hybrid) = 0;
|
||||
|
||||
|
@ -136,6 +139,9 @@ class ExecutionEngine {
|
|||
virtual Status
|
||||
Cache() = 0;
|
||||
|
||||
virtual Status
|
||||
FpgaCache() = 0;
|
||||
|
||||
virtual Status
|
||||
Init() = 0;
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "cache/CpuCacheMgr.h"
|
||||
#include "cache/FpgaCacheMgr.h"
|
||||
#include "cache/GpuCacheMgr.h"
|
||||
#include "config/Config.h"
|
||||
#include "db/Utils.h"
|
||||
|
@ -31,6 +32,12 @@
|
|||
#include "knowhere/index/vector_index/VecIndex.h"
|
||||
#include "knowhere/index/vector_index/VecIndexFactory.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
#include <faiss/index_io.h>
|
||||
#include "knowhere/index/vector_index/IndexIVFPQ.h"
|
||||
#include "knowhere/index/vector_index/fpga/IndexFPGAIVFPQ.h"
|
||||
#include "knowhere/index/vector_index/fpga/utils.h"
|
||||
#endif
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include "knowhere/index/vector_index/gpu/GPUIndex.h"
|
||||
#include "knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h"
|
||||
|
@ -168,7 +175,6 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) {
|
|||
mode = knowhere::IndexMode::MODE_GPU;
|
||||
}
|
||||
#endif
|
||||
|
||||
fiu_do_on("ExecutionEngineImpl.CreateVecIndex.invalid_type", type = EngineType::INVALID);
|
||||
knowhere::VecIndexPtr index = nullptr;
|
||||
switch (type) {
|
||||
|
@ -490,7 +496,6 @@ ExecutionEngineImpl::Load(bool to_cache) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
index_->SetBlacklist(concurrent_bitset_ptr);
|
||||
|
||||
std::vector<segment::doc_id_t> uids;
|
||||
|
@ -663,6 +668,33 @@ ExecutionEngineImpl::CopyToCpu() {
|
|||
#endif
|
||||
}
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::CopyToFpga() {
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
auto cache_index_ =
|
||||
std::static_pointer_cast<knowhere::VecIndex>(cache::FpgaCacheMgr::GetInstance()->GetIndex(location_));
|
||||
bool already_in_cache = (cache_index_ != nullptr);
|
||||
if (!already_in_cache) {
|
||||
int64_t indexsize = index_->IndexSize();
|
||||
std::shared_ptr<knowhere::IVFPQ> ivfpq = std::static_pointer_cast<knowhere::IVFPQ>(index_);
|
||||
std::shared_ptr<knowhere::FPGAIVFPQ> indexFpga = std::make_shared<knowhere::FPGAIVFPQ>(ivfpq->index_);
|
||||
indexFpga->SetIndexSize(indexsize);
|
||||
indexFpga->CopyIndexToFpga();
|
||||
indexFpga->SetBlacklist(index_->GetBlacklist());
|
||||
|
||||
// do real copy now, may optimizer later
|
||||
auto uids = index_->GetUids();
|
||||
indexFpga->SetUids(uids);
|
||||
|
||||
index_ = indexFpga;
|
||||
FpgaCache();
|
||||
} else {
|
||||
index_ = cache_index_;
|
||||
}
|
||||
LOG_ENGINE_DEBUG_ << "copy to fpga time ";
|
||||
#endif
|
||||
return Status::OK();
|
||||
}
|
||||
ExecutionEnginePtr
|
||||
ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_type) {
|
||||
LOG_ENGINE_DEBUG_ << "Build index file: " << location << " from: " << location_;
|
||||
|
@ -714,14 +746,12 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
|
|||
to_index = device_index->CopyGpuToCpu(conf);
|
||||
}
|
||||
#endif
|
||||
|
||||
to_index->SetUids(uids);
|
||||
LOG_ENGINE_DEBUG_ << "Set " << to_index->GetUids().size() << "uids for " << location;
|
||||
if (blacklist != nullptr) {
|
||||
to_index->SetBlacklist(blacklist);
|
||||
LOG_ENGINE_DEBUG_ << "Set blacklist for index " << location;
|
||||
}
|
||||
|
||||
LOG_ENGINE_DEBUG_ << "Finish build index: " << location;
|
||||
return std::make_shared<ExecutionEngineImpl>(to_index, location, engine_type, metric_type_, index_params_);
|
||||
}
|
||||
|
@ -1248,7 +1278,15 @@ ExecutionEngineImpl::Cache() {
|
|||
cpu_cache_mgr->InsertItem(location_, obj);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::FpgaCache() {
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
auto fpga_cache_mgr = milvus::cache::FpgaCacheMgr::GetInstance();
|
||||
cache::DataObjPtr obj = std::static_pointer_cast<cache::DataObj>(index_);
|
||||
fpga_cache_mgr->InsertItem(location_, obj);
|
||||
#endif
|
||||
return Status::OK();
|
||||
}
|
||||
// TODO(linxj): remove.
|
||||
Status
|
||||
ExecutionEngineImpl::Init() {
|
||||
|
|
|
@ -63,6 +63,9 @@ class ExecutionEngineImpl : public ExecutionEngine {
|
|||
Status
|
||||
CopyToCpu() override;
|
||||
|
||||
Status
|
||||
CopyToFpga() override;
|
||||
|
||||
#if 0
|
||||
Status
|
||||
GetVectorByID(const int64_t id, float* vector, bool hybrid) override;
|
||||
|
@ -90,6 +93,9 @@ class ExecutionEngineImpl : public ExecutionEngine {
|
|||
Status
|
||||
Cache() override;
|
||||
|
||||
Status
|
||||
FpgaCache() override;
|
||||
|
||||
Status
|
||||
Init() override;
|
||||
|
||||
|
|
|
@ -65,7 +65,11 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${INDEX_SOURCE_DIR}/cmake")
|
|||
include(ExternalProject)
|
||||
include(DefineOptionsCore)
|
||||
include(BuildUtilsCore)
|
||||
|
||||
if (MILVUS_FPGA_VERSION OR KNOWHERE_FPGA_VERSION)
|
||||
add_compile_definitions("MILVUS_FPGA_VERSION")
|
||||
else ()
|
||||
message(STATUS "Building Knowhere CPU version")
|
||||
endif ()
|
||||
if (MILVUS_GPU_VERSION OR KNOWHERE_GPU_VERSION)
|
||||
message(STATUS "Building Knowhere GPU version")
|
||||
add_compile_definitions("MILVUS_GPU_VERSION")
|
||||
|
|
|
@ -39,7 +39,14 @@ macro(define_option_string name description default)
|
|||
set_property(CACHE ${name} PROPERTY STRINGS ${ARGN})
|
||||
endif ()
|
||||
endmacro()
|
||||
#-------------------------------------------------
|
||||
set_option_category("FPGA version")
|
||||
|
||||
if (MILVUS_FPGA_VERSION)
|
||||
define_option(KNOWHERE_FPGA_VERSION "Build FPGA version" ON)
|
||||
else ()
|
||||
define_option(KNOWHERE_FPGA_VERSION "Build FPGA version" OFF)
|
||||
endif ()
|
||||
#----------------------------------------------------------------------
|
||||
set_option_category("GPU version")
|
||||
|
||||
|
|
|
@ -75,6 +75,20 @@ else ()
|
|||
${LAPACK_LIBRARIES}
|
||||
)
|
||||
endif ()
|
||||
if (MILVUS_FPGA_VERSION)
|
||||
set(depend_libs
|
||||
armadillo
|
||||
${depend_libs}
|
||||
)
|
||||
set(index_srcs ${index_srcs}
|
||||
knowhere/index/vector_index/fpga/Fpga.cpp
|
||||
knowhere/index/vector_index/fpga/IndexFPGAIVFPQ.cpp
|
||||
knowhere/index/vector_index/fpga/xilinx_c.cpp
|
||||
knowhere/index/vector_index/fpga/utils.cpp
|
||||
knowhere/index/vector_index/fpga/FpgaInst.cpp
|
||||
)
|
||||
|
||||
endif ()
|
||||
|
||||
if (KNOWHERE_GPU_VERSION)
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
|
|
|
@ -169,7 +169,7 @@ IVFPQConfAdapter::GetValidM(int64_t dimension, int64_t m, IndexMode& mode) {
|
|||
mode = knowhere::IndexMode::MODE_CPU;
|
||||
}
|
||||
#endif
|
||||
if (mode == knowhere::IndexMode::MODE_CPU && !IVFPQConfAdapter::GetValidCPUM(dimension, m)) {
|
||||
if (mode != knowhere::IndexMode::MODE_GPU && !IVFPQConfAdapter::GetValidCPUM(dimension, m)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ extern const char* INDEX_HNSW;
|
|||
extern const char* INDEX_ANNOY;
|
||||
} // namespace IndexEnum
|
||||
|
||||
enum class IndexMode { MODE_CPU = 0, MODE_GPU = 1 };
|
||||
enum class IndexMode { MODE_CPU = 0, MODE_GPU = 1, MODE_FPGA = 2 };
|
||||
|
||||
extern std::string
|
||||
OldIndexTypeToStr(const int32_t type);
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
#include "knowhere/index/vector_index/IndexNSG.h"
|
||||
#include "knowhere/index/vector_index/IndexSPTAG.h"
|
||||
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
#include "knowhere/index/vector_index/fpga/IndexFPGAIVFPQ.h"
|
||||
#endif
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include <cuda.h>
|
||||
#include "knowhere/index/vector_index/gpu/IndexGPUIDMAP.h"
|
||||
|
@ -54,6 +57,12 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
|
|||
if (mode == IndexMode::MODE_GPU) {
|
||||
return std::make_shared<knowhere::GPUIVFPQ>(gpu_device);
|
||||
}
|
||||
#endif
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
if (mode == IndexMode::MODE_FPGA) {
|
||||
// LOG_ENGINE_DEBUG_ << " fpga enable indexmode::mode_fpga ";
|
||||
return std::make_shared<knowhere::FPGAIVFPQ>();
|
||||
}
|
||||
#endif
|
||||
return std::make_shared<knowhere::IVFPQ>();
|
||||
} else if (type == IndexEnum::INDEX_FAISS_IVFSQ8) {
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "knowhere/index/vector_index/VecIndex.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
class FPGAIndex {
|
||||
public:
|
||||
explicit FPGAIndex(const int& device_id) : fpga_id_(device_id) {
|
||||
}
|
||||
|
||||
virtual VecIndexPtr
|
||||
CopyFpgaToCpu(const Config&) = 0;
|
||||
|
||||
void
|
||||
SetFpgaDevice(const int& fpga_id) {
|
||||
fpga_id_ = fpga_id;
|
||||
}
|
||||
|
||||
const int64_t
|
||||
GetFpgaDevice() {
|
||||
return fpga_id_;
|
||||
}
|
||||
|
||||
protected:
|
||||
int64_t fpga_id_;
|
||||
};
|
||||
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
|
@ -0,0 +1,572 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include <armadillo>
|
||||
#include "knowhere/index/vector_index/fpga/Fpga.h"
|
||||
#include "knowhere/index/vector_index/fpga/utils.h"
|
||||
#include "knowhere/index/vector_index/fpga/xilinx_c.h"
|
||||
|
||||
namespace Fpga {
|
||||
FpgaInterface::~FpgaInterface() {
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::GetDevAddr() {
|
||||
ci_s = 0;
|
||||
tm2_s = ci_s + nlist + 10;
|
||||
db_s = tm2_s + (nb / 64 + 10 + nlist);
|
||||
pq_s = db_s + (nb / (256 / m) + 4 * nlist);
|
||||
que_s = pq_s + 256 + 10;
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::GetConfigTable(idx_t nprobe) {
|
||||
step1_s = que_s + 1020;
|
||||
if ((nq * nlist * 32) % 2048 == 0) {
|
||||
step2_s = step1_s + ((nq * nlist * 32) / 2048);
|
||||
} else {
|
||||
step2_s = step1_s + ((nq * 8 * m) / 2048);
|
||||
}
|
||||
|
||||
step3_s = step2_s + (nq * 256 / (64 / m));
|
||||
final_s = step2_s;
|
||||
|
||||
cfg_table = {nprobe, nq, nlist, m, ci_s, tm2_s, db_s, pq_s, que_s, step1_s, step2_s, step3_s};
|
||||
|
||||
/*if (verbose) {
|
||||
auto iter = cfg_table.begin();
|
||||
while (iter != cfg_table.end()) {
|
||||
LOG_KNOWHERE_DEBUG_ << *iter;
|
||||
iter++;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::TransFile2FPGA(const char* filename, loff_t base, size_t offset) {
|
||||
uint32_t file_size;
|
||||
loff_t dst_addr = base + offset;
|
||||
|
||||
std::ifstream fin(filename, std::ifstream::in | std::ifstream::binary);
|
||||
if (fin.is_open()) {
|
||||
fin.seekg(0, std::ios::end);
|
||||
file_size = fin.tellg();
|
||||
fin.close();
|
||||
}
|
||||
|
||||
DMAFile2FPGA((char*)DEVICE_H2C0, dst_addr, file_size, 0, 1, filename);
|
||||
}
|
||||
|
||||
char*
|
||||
FpgaInterface::LoadFile2Mem(const char* filename, size_t& file_size, char* buffer) {
|
||||
// size_t file_size;
|
||||
loff_t base_addr;
|
||||
|
||||
std::ifstream fin(filename, std::ifstream::in | std::ifstream::binary);
|
||||
if (fin.is_open()) {
|
||||
fin.seekg(0, std::ios::end);
|
||||
file_size = fin.tellg();
|
||||
fin.close();
|
||||
}
|
||||
|
||||
// posix_memalign((void **)&buffer, 4096/*alignment*/, file_size + 4096);
|
||||
// assert(buffer);
|
||||
|
||||
return ReadFile2Mem(filename, buffer, file_size);
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::TransMem2FPGA(char* buffer, size_t size, loff_t offset) {
|
||||
char* buffer0 = NULL;
|
||||
char* buffer1 = NULL;
|
||||
char* buffer2 = NULL;
|
||||
char* buffer3 = NULL;
|
||||
|
||||
posix_memalign((void**)&buffer0, 4096 /*alignment*/, size / 4 + 4096);
|
||||
assert(buffer0);
|
||||
posix_memalign((void**)&buffer1, 4096 /*alignment*/, size / 4 + 4096);
|
||||
assert(buffer1);
|
||||
posix_memalign((void**)&buffer2, 4096 /*alignment*/, size / 4 + 4096);
|
||||
assert(buffer2);
|
||||
posix_memalign((void**)&buffer3, 4096 /*alignment*/, size / 4 + 4096);
|
||||
assert(buffer3);
|
||||
|
||||
SplitMemory(buffer, size, buffer0, buffer1, buffer2, buffer3);
|
||||
TransBuffer2FPGA(buffer0, size / 4, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_0 + offset, 1);
|
||||
TransBuffer2FPGA(buffer1, size / 4, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_1 + offset, 1);
|
||||
TransBuffer2FPGA(buffer2, size / 4, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_2 + offset, 1);
|
||||
TransBuffer2FPGA(buffer3, size / 4, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_3 + offset, 1);
|
||||
|
||||
free(buffer0);
|
||||
free(buffer1);
|
||||
free(buffer2);
|
||||
free(buffer3);
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::WriteFileFromFPGA(const char* filename, size_t size, loff_t base, size_t offset) {
|
||||
loff_t src_addr = base + offset;
|
||||
|
||||
DMAFPGA2File((char*)DEVICE_C2H0, src_addr, size, 0, 1, filename);
|
||||
}
|
||||
|
||||
char*
|
||||
FpgaInterface::LoadFPGA2Mem(loff_t fpga_addr, char* buffer, size_t size) {
|
||||
// char *buffer = NULL;
|
||||
|
||||
// posix_memalign((void **)&buffer, 4096/*alignment*/, size + 4096);
|
||||
// assert(buffer);
|
||||
|
||||
// memset(buffer, 0x00, size);
|
||||
|
||||
return TransFPGA2Mem((char*)DEVICE_C2H0, fpga_addr, size, buffer, 1);
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::SaveMem2File(char* buffer, size_t size, const char* filename) {
|
||||
WriteMem2File(buffer, size, 1, filename);
|
||||
|
||||
// free(buffer);
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::OpenCtrlDev() {
|
||||
return open_dev((char*)DEVICE_CTRL);
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::CloseCtrlDev(int fd) {
|
||||
return close_dev(fd);
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::WritePara(int fd, uint32_t addr, uint32_t val) {
|
||||
return reg_rw(fd, PARA_BASE_ADDR + addr, (char*)"write", 'w', val);
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::ReadPara(int fd, uint32_t addr) {
|
||||
return reg_rw(fd, PARA_BASE_ADDR + addr, (char*)"read", 'w', 0);
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::WriteReg(int fd, uint32_t addr, uint32_t val) {
|
||||
return reg_rw(fd, REG_BASE_ADDR + addr, (char*)"write", 'w', val);
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::ReadReg(int fd, uint32_t addr) {
|
||||
return reg_rw(fd, REG_BASE_ADDR + addr, (char*)"read", 'w', 0);
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::TriggerSearch(int fd) {
|
||||
WritePara(fd, TRIGGER_REG_ADDR, 0);
|
||||
WriteReg(fd, 0, (0x1 << 6));
|
||||
}
|
||||
|
||||
uint32_t
|
||||
FpgaInterface::GetSearchTime(int fd) {
|
||||
int index;
|
||||
std::vector<uint32_t> vec(8, 0);
|
||||
uint32_t search_time = 0;
|
||||
|
||||
for (index = 0; index < 8; index++) {
|
||||
vec[index] = ReadPara(fd, Time_REG_ADDR + index * 4) & 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
search_time = (vec[6] - vec[0]) * (1000 / FPGA_RATE) / 1000000;
|
||||
|
||||
return search_time;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static arma::Cube<T>
|
||||
CubeTranspose(arma::Cube<T>& cube) {
|
||||
arma::uword D1 = cube.n_rows;
|
||||
arma::uword D2 = cube.n_cols;
|
||||
arma::uword D3 = cube.n_slices;
|
||||
arma::Cube<T> output(D1, D3, D2);
|
||||
|
||||
for (arma::uword s = 0; s < D3; ++s) {
|
||||
for (arma::uword c = 0; c < D2; ++c) {
|
||||
for (arma::uword r = 0; r < D1; ++r) {
|
||||
output.at(r, s, c) = cube.at(r, c, s);
|
||||
// output[ D1*D3*c + D1*s+ r ] = cube[ D1*D2*s + D1*c + r ];
|
||||
}
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
static void
|
||||
cal_term2(faiss::IndexIVFPQ* index, std::vector<double>* dis_term2, std::vector<double>* all_codes,
|
||||
std::vector<int>* all_list_size, std::vector<idx_t>* all_ids) {
|
||||
int nb = index->ntotal;
|
||||
|
||||
int term2_count = 0;
|
||||
|
||||
for (size_t i = 0; i < index->nlist; i++) {
|
||||
all_list_size->at(i) = (index->invlists->list_size(i));
|
||||
const uint8_t* codes = index->invlists->get_codes(i);
|
||||
const idx_t* ids = index->invlists->get_ids(i);
|
||||
for (size_t j = 0; j < index->invlists->list_size(i); j++) {
|
||||
all_ids->at(term2_count) = ids[j];
|
||||
|
||||
dis_term2->at(term2_count) = 0;
|
||||
for (size_t m = 0; m < index->pq.M; m++) {
|
||||
all_codes->at(term2_count * index->pq.M + m) = codes[m];
|
||||
dis_term2->at(term2_count) +=
|
||||
index->precomputed_table[i * index->pq.M * index->pq.ksub + m * index->pq.ksub + codes[m]];
|
||||
}
|
||||
codes += index->pq.code_size;
|
||||
term2_count += 1;
|
||||
}
|
||||
}
|
||||
assert(term2_count == nb);
|
||||
}
|
||||
|
||||
static void
|
||||
cal_ci(faiss::IndexIVFPQ* index, std::vector<float>& ci_vec) {
|
||||
index->quantizer->reconstruct_n(0, index->nlist, ci_vec.data());
|
||||
|
||||
for (size_t i = 0; i < ci_vec.size(); i++) {
|
||||
ci_vec[i] = -ci_vec[i];
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::CopyIndexToFPGA() {
|
||||
{
|
||||
// Step1: Transfer coarse_centroids
|
||||
std::vector<float> ci(nlist * d, 0);
|
||||
cal_ci(index_ptr, ci);
|
||||
|
||||
TransMem2FPGA((char*)ci.data(), ci.size() * sizeof(float), ci_s * 64);
|
||||
|
||||
// WriteMem2File((char *)ci_negitve.data(), ci_negitve.size()*sizeof(float), 1, "coarse_centroids.bin");
|
||||
}
|
||||
|
||||
{
|
||||
// Step2: Transfer pq_quantizer
|
||||
float* centroids_ptr = &index_ptr->pq.centroids[0];
|
||||
arma::fcube PQ(centroids_ptr, index_ptr->pq.dsub, index_ptr->pq.ksub, index_ptr->pq.M);
|
||||
PQ = CubeTranspose(PQ);
|
||||
TransMem2FPGA((char*)PQ.memptr(), PQ.size() * sizeof(float), pq_s * 64);
|
||||
|
||||
// WriteMem2File((char *)PQ.memptr(), PQ.size()*sizeof(float), 1, "pq_quantizer.bin");
|
||||
}
|
||||
|
||||
{
|
||||
// Step3: all_list_size and all_index_id
|
||||
std::vector<double> dis_term2(nb, 0);
|
||||
std::vector<double> all_codes(nb * m, 0);
|
||||
cal_term2(index_ptr, &dis_term2, &all_codes, &all_list_size, &all_index_id);
|
||||
|
||||
// code database
|
||||
int database_width = m * 8;
|
||||
int alignment_remain = 8192 % database_width;
|
||||
assert(alignment_remain <= 0);
|
||||
int alignment_num = (int)(8192 / database_width);
|
||||
int index_base = 0;
|
||||
std::vector<double> database;
|
||||
for (size_t i = 0; i < all_list_size.size(); i++) {
|
||||
int current_size = all_list_size[i];
|
||||
auto iter = all_codes.begin();
|
||||
database.insert(database.end(), iter + index_base * m, iter + (index_base + current_size) * m);
|
||||
|
||||
int page_size = ceil((float)current_size / alignment_num) * alignment_num;
|
||||
|
||||
for (size_t j = 0; j < (page_size - current_size) * m; j++) {
|
||||
database.push_back(3.14);
|
||||
}
|
||||
|
||||
index_base += current_size;
|
||||
}
|
||||
std::vector<uint8_t> database_u8(database.size());
|
||||
for (size_t i = 0; i < database.size(); i++) {
|
||||
database_u8[i] = (uint8_t)database[i];
|
||||
}
|
||||
TransMem2FPGA((char*)database_u8.data(), database_u8.size(), db_s * 64);
|
||||
|
||||
// term2
|
||||
index_base = 0;
|
||||
std::vector<double> term2_temp;
|
||||
for (size_t i = 0; i < all_list_size.size(); i++) {
|
||||
int current_size = all_list_size[i];
|
||||
auto iter = dis_term2.begin();
|
||||
term2_temp.insert(term2_temp.end(), iter + index_base, iter + index_base + current_size);
|
||||
|
||||
int ceil_size = ceil((float)current_size / 64) * 64;
|
||||
for (size_t j = 0; j < ceil_size - current_size; j++) {
|
||||
term2_temp.push_back(3.14);
|
||||
}
|
||||
|
||||
index_base += current_size;
|
||||
}
|
||||
std::vector<float> term2_flt(term2_temp.size());
|
||||
for (size_t i = 0; i < term2_temp.size(); i++) {
|
||||
term2_flt[i] = (float)term2_temp[i];
|
||||
}
|
||||
TransMem2FPGA((char*)term2_flt.data(), term2_flt.size() * sizeof(float), tm2_s * 64);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::GetDatabaseListAlign(std::vector<int>& data_list, uint32_t align, std::vector<int>& align_list) {
|
||||
auto iter = data_list.begin();
|
||||
|
||||
while (iter != data_list.end()) {
|
||||
int temp = (int)(ceil((float)*iter / align) * align); // round up the value
|
||||
align_list.push_back(temp);
|
||||
iter++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::GetDatabaseListSum(std::vector<int>& data_vec, std::vector<int>& sum_vec) {
|
||||
int temp = 0;
|
||||
auto iter = data_vec.begin();
|
||||
|
||||
while (iter != data_vec.end()) {
|
||||
sum_vec.push_back(temp);
|
||||
temp += *iter;
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::GetProbeDatabaseAddressList(uint32_t base_addr, std::vector<int>& data_vec, int align,
|
||||
std::vector<uint32_t>& addr_vec, uint32_t m) {
|
||||
std::vector<int> offset_vec;
|
||||
std::vector<int> sum_vec;
|
||||
|
||||
GetDatabaseListAlign(data_vec, align, offset_vec);
|
||||
GetDatabaseListSum(offset_vec, sum_vec);
|
||||
|
||||
auto iter = sum_vec.begin();
|
||||
while (iter != sum_vec.end()) {
|
||||
uint32_t addr = base_addr + ((*iter) * m / 256);
|
||||
addr_vec.push_back(addr);
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::GetProbeTerm2AddressList(uint32_t base_addr, std::vector<int>& data_vec, int align,
|
||||
std::vector<uint32_t>& addr_vec) {
|
||||
std::vector<int> offset_vec;
|
||||
std::vector<int> sum_vec;
|
||||
uint32_t addr;
|
||||
|
||||
GetDatabaseListAlign(data_vec, align, offset_vec);
|
||||
GetDatabaseListSum(offset_vec, sum_vec);
|
||||
|
||||
auto iter = sum_vec.begin();
|
||||
while (iter != sum_vec.end()) {
|
||||
addr = base_addr + ((*iter) * 32 / 2048);
|
||||
addr_vec.push_back(addr);
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
FpgaInterface::CfgDataBaseNumReal(int fd) {
|
||||
int index = 0;
|
||||
std::vector<uint32_t> database_vec;
|
||||
std::vector<uint32_t> term2_vec;
|
||||
uint32_t addr = cfg_table.size() * 4;
|
||||
|
||||
uint32_t align = 8192 / m / 8;
|
||||
GetProbeDatabaseAddressList((uint32_t)db_s, all_list_size, align, database_vec, m);
|
||||
|
||||
GetProbeTerm2AddressList(tm2_s, all_list_size, 64, term2_vec);
|
||||
|
||||
for (index = 0; index < nlist; index++) {
|
||||
WritePara(fd, addr, all_list_size[index]);
|
||||
addr += 4;
|
||||
}
|
||||
|
||||
for (index = 0; index < nlist; index++) {
|
||||
WritePara(fd, addr, database_vec[index]);
|
||||
addr += 4;
|
||||
}
|
||||
|
||||
for (index = 0; index < nlist; index++) {
|
||||
WritePara(fd, addr, term2_vec[index]);
|
||||
addr += 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::TransferCfg() {
|
||||
int index = 0;
|
||||
int fd;
|
||||
|
||||
fd = OpenCtrlDev();
|
||||
|
||||
auto iter = cfg_table.begin();
|
||||
while (iter != cfg_table.end()) {
|
||||
WritePara(fd, index, *iter);
|
||||
iter++;
|
||||
index += 4;
|
||||
}
|
||||
|
||||
CfgDataBaseNumReal(fd);
|
||||
|
||||
CloseCtrlDev(fd);
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::FlushResultMem() {
|
||||
uint32_t buffer_size = topk * nq * 4 * 2 / 4;
|
||||
uint32_t offset = step3_s * 64;
|
||||
char* flush_buffer = new char[buffer_size];
|
||||
|
||||
memset(flush_buffer, 0x1, buffer_size);
|
||||
|
||||
TransBuffer2FPGA(flush_buffer, buffer_size, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_0 + offset, 1);
|
||||
TransBuffer2FPGA(flush_buffer, buffer_size, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_1 + offset, 1);
|
||||
TransBuffer2FPGA(flush_buffer, buffer_size, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_2 + offset, 1);
|
||||
TransBuffer2FPGA(flush_buffer, buffer_size, (char*)DEVICE_H2C0, FPGA_BASE_ADDR_3 + offset, 1);
|
||||
|
||||
delete[] flush_buffer;
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::StartFPGASearch() {
|
||||
int index = 0;
|
||||
int fd = -1;
|
||||
|
||||
fd = OpenCtrlDev();
|
||||
TriggerSearch(fd);
|
||||
|
||||
while (index < 10000) {
|
||||
if (ReadPara(fd, TRIGGER_REG_ADDR)) {
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
sleep(0.001);
|
||||
}
|
||||
// milvus::LOG_ENGINE_DEBUG_ << " Trigger FPGA search";
|
||||
|
||||
if (verbose) {
|
||||
GetSearchTime(fd);
|
||||
}
|
||||
|
||||
CloseCtrlDev(fd);
|
||||
}
|
||||
|
||||
void
|
||||
ReadSingleBlock(char* buffer, std::vector<uint32_t>& label_vec, std::vector<float>& dis_vec) {
|
||||
int index;
|
||||
uint32_t* label = NULL;
|
||||
float* distance = NULL;
|
||||
|
||||
for (index = 0; index < 8; index++) {
|
||||
label = (uint32_t*)(buffer + index * 8);
|
||||
distance = reinterpret_cast<float*>(buffer + index * 8 + 4);
|
||||
label_vec.push_back(*label);
|
||||
dis_vec.push_back(*distance);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FpgaInterface::ReadResultFile(std::vector<idx_t>& id_vec, std::vector<float>& dis_vec) {
|
||||
uint32_t index;
|
||||
uint32_t offset = step3_s * 64;
|
||||
uint32_t single_size = topk * nq * 4 * 2 / 4;
|
||||
|
||||
std::vector<uint32_t> id_vec_0;
|
||||
std::vector<uint32_t> id_vec_1;
|
||||
std::vector<float> dis_vec_0;
|
||||
std::vector<float> dis_vec_1;
|
||||
|
||||
char* buffer0 = NULL;
|
||||
char* buffer1 = NULL;
|
||||
char* buffer2 = NULL;
|
||||
char* buffer3 = NULL;
|
||||
|
||||
posix_memalign((void**)&buffer0, 4096 /*alignment*/, single_size + 4096);
|
||||
assert(buffer0);
|
||||
posix_memalign((void**)&buffer1, 4096 /*alignment*/, single_size + 4096);
|
||||
assert(buffer1);
|
||||
posix_memalign((void**)&buffer2, 4096 /*alignment*/, single_size + 4096);
|
||||
assert(buffer2);
|
||||
posix_memalign((void**)&buffer3, 4096 /*alignment*/, single_size + 4096);
|
||||
assert(buffer3);
|
||||
|
||||
LoadFPGA2Mem(FPGA_BASE_ADDR_0 + offset, buffer0, single_size);
|
||||
LoadFPGA2Mem(FPGA_BASE_ADDR_1 + offset, buffer1, single_size);
|
||||
LoadFPGA2Mem(FPGA_BASE_ADDR_2 + offset, buffer2, single_size);
|
||||
LoadFPGA2Mem(FPGA_BASE_ADDR_3 + offset, buffer3, single_size);
|
||||
|
||||
/* extract labels and distances from FPGA result file */
|
||||
uint32_t block_num = single_size / 64;
|
||||
for (index = 0; index < block_num; index++) {
|
||||
if (index < block_num / 2) {
|
||||
ReadSingleBlock(buffer0 + index * 64, id_vec_0, dis_vec_0);
|
||||
ReadSingleBlock(buffer1 + index * 64, id_vec_0, dis_vec_0);
|
||||
ReadSingleBlock(buffer2 + index * 64, id_vec_0, dis_vec_0);
|
||||
ReadSingleBlock(buffer3 + index * 64, id_vec_0, dis_vec_0);
|
||||
} else {
|
||||
ReadSingleBlock(buffer0 + index * 64, id_vec_1, dis_vec_1);
|
||||
ReadSingleBlock(buffer1 + index * 64, id_vec_1, dis_vec_1);
|
||||
ReadSingleBlock(buffer2 + index * 64, id_vec_1, dis_vec_1);
|
||||
ReadSingleBlock(buffer3 + index * 64, id_vec_1, dis_vec_1);
|
||||
}
|
||||
}
|
||||
|
||||
/* assemble labels and distances */
|
||||
for (index = 0; index < nq; index++) {
|
||||
for (int j = 0; j < 64; j++) {
|
||||
// cout << index << " " << j << " " << id_vec_0[index*64+j] << " " << id_vec_0.size()<<endl;
|
||||
id_vec[index * 128 + j] = all_index_id[id_vec_0[index * 64 + j]];
|
||||
dis_vec[index * 128 + j] = dis_vec_0[index * 64 + j];
|
||||
}
|
||||
for (int z = 0; z < 64; z++) {
|
||||
id_vec[index * 128 + z + 64] = all_index_id[id_vec_1[index * 64 + z]];
|
||||
dis_vec[index * 128 + z + 64] = dis_vec_1[index * 64 + z];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
FpgaInterface::BatchAnnQuery(idx_t nprobe, idx_t batch_size, float* items, idx_t expect_k, std::vector<idx_t>& id_vec,
|
||||
std::vector<float>& dis_vec) {
|
||||
topk = expect_k;
|
||||
|
||||
if (first_query || nq != batch_size || pre_nprobe != nprobe) {
|
||||
pre_nprobe = nprobe;
|
||||
nq = batch_size;
|
||||
GetConfigTable(nprobe);
|
||||
TransferCfg();
|
||||
if (first_query || last_field < nq * topk) {
|
||||
FlushResultMem();
|
||||
first_query = false;
|
||||
last_field = nq * topk;
|
||||
}
|
||||
}
|
||||
size_t query_size = batch_size * d * sizeof(float);
|
||||
TransMem2FPGA((char*)items, query_size, que_s * 64);
|
||||
StartFPGASearch();
|
||||
ReadResultFile(id_vec, dis_vec);
|
||||
}
|
||||
} // namespace Fpga
|
|
@ -0,0 +1,163 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#ifndef FPGA_H
|
||||
#define FPGA_H
|
||||
|
||||
#include <faiss/IndexFlat.h>
|
||||
#include <faiss/IndexIVFPQ.h>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#define FPGA_BASE_ADDR_0 0x000000000
|
||||
#define FPGA_BASE_ADDR_1 0x400000000
|
||||
#define FPGA_BASE_ADDR_2 0x800000000
|
||||
#define FPGA_BASE_ADDR_3 0xc00000000
|
||||
|
||||
#define REG_BASE_ADDR 0x400000
|
||||
#define PARA_BASE_ADDR 0x500000
|
||||
#define DATA_BASE_BASE_ADDR 0x501000
|
||||
|
||||
#define Time_REG_ADDR 0xFFFE0
|
||||
#define TRIGGER_REG_ADDR 0xFFFD0
|
||||
|
||||
// device clock frequency :MHz
|
||||
#define FPGA_RATE 200
|
||||
|
||||
namespace Fpga {
|
||||
|
||||
using idx_t = int64_t;
|
||||
|
||||
class FpgaInterface {
|
||||
public:
|
||||
idx_t nb; // number of dataset
|
||||
idx_t nq;
|
||||
idx_t nlist;
|
||||
idx_t topk;
|
||||
idx_t d; // dimension
|
||||
idx_t m; // sub-quantizer
|
||||
idx_t nbits;
|
||||
|
||||
bool verbose; // flag to output debug msg
|
||||
bool first_query; // flag to firt query
|
||||
idx_t last_field; // record to last flush field
|
||||
|
||||
faiss::IndexIVFPQ* index_ptr;
|
||||
|
||||
// data in cal time
|
||||
idx_t ci_s;
|
||||
idx_t tm2_s;
|
||||
idx_t db_s;
|
||||
idx_t pq_s;
|
||||
idx_t que_s;
|
||||
idx_t step1_s;
|
||||
idx_t step2_s;
|
||||
idx_t step3_s;
|
||||
idx_t final_s;
|
||||
std::vector<idx_t> cfg_table;
|
||||
std::vector<idx_t> all_index_id; // id map between initial and added, (nb, )
|
||||
std::vector<int> all_list_size; // dataset number in each cluster, (nlist, )
|
||||
FpgaInterface() {
|
||||
pre_nprobe = 0;
|
||||
}
|
||||
void
|
||||
setIndex(faiss::IndexIVFPQ* index) {
|
||||
nb = index->ntotal;
|
||||
nlist = index->nlist;
|
||||
d = index->d;
|
||||
m = index->pq.M;
|
||||
nbits = index->pq.nbits;
|
||||
first_query = true;
|
||||
verbose = false;
|
||||
index_ptr = index;
|
||||
|
||||
all_index_id.resize(nb);
|
||||
all_list_size.resize(nlist);
|
||||
GetDevAddr();
|
||||
}
|
||||
|
||||
virtual ~FpgaInterface();
|
||||
|
||||
void
|
||||
GetDevAddr();
|
||||
void
|
||||
GetConfigTable(idx_t nprobe);
|
||||
|
||||
// h2c0 functions
|
||||
void
|
||||
TransFile2FPGA(const char* filename, loff_t base, size_t offset);
|
||||
char*
|
||||
LoadFile2Mem(const char* filename, size_t& file_size, char* buffer);
|
||||
void
|
||||
TransMem2FPGA(char* buffer, size_t size, loff_t base_addr);
|
||||
|
||||
// c2ch functions
|
||||
void
|
||||
WriteFileFromFPGA(const char* filename, size_t size, loff_t base, size_t offset);
|
||||
char*
|
||||
LoadFPGA2Mem(loff_t fpga_addr, char* buffer, size_t size);
|
||||
void
|
||||
SaveMem2File(char* buffer, size_t size, const char* filename);
|
||||
|
||||
// control functions
|
||||
int
|
||||
OpenCtrlDev();
|
||||
int
|
||||
CloseCtrlDev(int fd);
|
||||
int
|
||||
WritePara(int fd, uint32_t addr, uint32_t val);
|
||||
int
|
||||
ReadPara(int fd, uint32_t addr);
|
||||
int
|
||||
WriteReg(int fd, uint32_t addr, uint32_t val);
|
||||
int
|
||||
ReadReg(int fd, uint32_t addr);
|
||||
|
||||
void
|
||||
CopyIndexToFPGA();
|
||||
bool
|
||||
BatchAnnQuery(idx_t nprobe, idx_t batch_size, float* items, idx_t expect_k, std::vector<idx_t>& id_vec,
|
||||
std::vector<float>& dis_vec);
|
||||
void
|
||||
TriggerSearch(int fd);
|
||||
uint32_t
|
||||
GetSearchTime(int fd);
|
||||
void
|
||||
StartFPGASearch();
|
||||
|
||||
int
|
||||
GetDatabaseListAlign(std::vector<int>& data_list, uint32_t align, std::vector<int>& align_list);
|
||||
void
|
||||
GetDatabaseListSum(std::vector<int>& data_vec, std::vector<int>& sum_vec);
|
||||
void
|
||||
GetProbeDatabaseAddressList(uint32_t base_addr, std::vector<int>& data_vec, int align,
|
||||
std::vector<uint32_t>& addr_vec, uint32_t m);
|
||||
void
|
||||
GetProbeTerm2AddressList(uint32_t base_addr, std::vector<int>& data_vec, int align,
|
||||
std::vector<uint32_t>& addr_vec);
|
||||
int
|
||||
CfgDataBaseNumReal(int fd);
|
||||
void
|
||||
TransferCfg();
|
||||
void
|
||||
FlushResultMem();
|
||||
void
|
||||
ReadResultFile(std::vector<idx_t>& id_vec, std::vector<float>& dis_vec);
|
||||
|
||||
private:
|
||||
int pre_nprobe;
|
||||
};
|
||||
using FpgaInterfacePtr = std::shared_ptr<FpgaInterface>;
|
||||
} // namespace Fpga
|
||||
#endif
|
|
@ -0,0 +1,18 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "knowhere/index/vector_index/fpga/FpgaInst.h"
|
||||
|
||||
namespace Fpga {
|
||||
|
||||
FpgaInterfacePtr FpgaInst::instance = nullptr;
|
||||
std::mutex FpgaInst::mutex_;
|
||||
} // namespace Fpga
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#ifndef FPGA_INST_H
|
||||
#define FPGA_INST_H
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include "Fpga.h"
|
||||
namespace Fpga {
|
||||
class FpgaInst {
|
||||
public:
|
||||
static FpgaInterfacePtr
|
||||
GetInstance() {
|
||||
if (instance == nullptr) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (instance == nullptr) {
|
||||
instance = std::make_shared<FpgaInterface>();
|
||||
}
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
|
||||
private:
|
||||
static FpgaInterfacePtr instance;
|
||||
static std::mutex mutex_;
|
||||
};
|
||||
} // namespace Fpga
|
||||
#endif
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "knowhere/index/vector_index/fpga/IndexFPGAIVFPQ.h"
|
||||
#include <faiss/IndexFlat.h>
|
||||
#include <faiss/IndexIVF.h>
|
||||
#include <faiss/IndexIVFPQ.h>
|
||||
#include <faiss/index_io.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/index/vector_index/IndexIVFPQ.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "knowhere/index/vector_index/fpga/utils.h"
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include "utils/Log.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
void
|
||||
FPGAIVFPQ::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
GETTENSOR(dataset_ptr)
|
||||
LOG_ENGINE_DEBUG_ << " fpga ivpq train. dim:" << dim;
|
||||
// faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
faiss::Index* coarse_quantizer = new faiss::IndexFlatL2(dim);
|
||||
index_ = std::shared_ptr<faiss::Index>(
|
||||
new faiss::IndexIVFPQ(coarse_quantizer, dim, config[IndexParams::nlist].get<int64_t>(),
|
||||
config[IndexParams::m].get<int64_t>(), config[IndexParams::nbits].get<int64_t>()));
|
||||
|
||||
index_->train(rows, (float*)p_data);
|
||||
}
|
||||
void
|
||||
FPGAIVFPQ::Add(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_ || !index_->is_trained) {
|
||||
KNOWHERE_THROW_MSG("index not initialize or trained");
|
||||
}
|
||||
LOG_ENGINE_DEBUG_ << " fpga ivpq add. ";
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
// GET_TENSOR_DATA_ID(dataset_ptr)
|
||||
GETTENSOR(dataset_ptr)
|
||||
index_->add(rows, (float*)p_data); // we not support add_with_id ,maybe support latter
|
||||
}
|
||||
|
||||
void
|
||||
FPGAIVFPQ::CopyIndexToFpga() {
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
auto Fpga = Fpga::FpgaInst::GetInstance();
|
||||
auto ivf_index = static_cast<faiss::IndexIVFPQ*>(index_.get());
|
||||
ivf_index->make_direct_map();
|
||||
Fpga->setIndex(ivf_index);
|
||||
Fpga->CopyIndexToFPGA();
|
||||
|
||||
LOG_ENGINE_DEBUG_ << " copy index to fpga end";
|
||||
}
|
||||
void
|
||||
FPGAIVFPQ::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& config) {
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
|
||||
try {
|
||||
LOG_ENGINE_DEBUG_ << " run fpga search QueryImpl";
|
||||
auto params = GenParams(config);
|
||||
int nprobe = params->nprobe;
|
||||
LOG_ENGINE_DEBUG_ << "nprobe:" << nprobe << "n:" << n << "k:" << k;
|
||||
auto Fpga = Fpga::FpgaInst::GetInstance();
|
||||
std::vector<faiss::Index::idx_t> vlabels(n * k);
|
||||
|
||||
std::vector<float> vdistances(n * k);
|
||||
auto t0 = Elapsed();
|
||||
// do query
|
||||
Fpga->BatchAnnQuery(nprobe, n, (float*)data, k, vlabels, vdistances);
|
||||
auto t1 = Elapsed();
|
||||
LOG_ENGINE_DEBUG_ << " vlabels size:" << vlabels.size() << "search time:" << t1 - t0;
|
||||
auto elems = n * k;
|
||||
//
|
||||
size_t p_id_size = sizeof(int64_t) * elems;
|
||||
size_t p_dist_size = sizeof(float) * elems;
|
||||
memcpy(distances, vdistances.data(), p_dist_size);
|
||||
memcpy(labels, vlabels.data(), p_id_size);
|
||||
LOG_ENGINE_DEBUG_ << " copy end" << vlabels.size();
|
||||
} catch (...) {
|
||||
}
|
||||
}
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
|
@ -0,0 +1,50 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#ifndef INDEX_FPGA_IVFPQ_H
|
||||
#define INDEX_FPGA_IVFPQ_H
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "knowhere/index/vector_index/IndexIVFPQ.h"
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
#include "knowhere/index/vector_index/fpga/FpgaInst.h"
|
||||
#endif
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
class FPGAIVFPQ : public IVFPQ {
|
||||
public:
|
||||
FPGAIVFPQ() : IVFPQ() {
|
||||
index_type_ = IndexEnum::INDEX_FAISS_IVFPQ;
|
||||
}
|
||||
explicit FPGAIVFPQ(std::shared_ptr<faiss::Index> index) : IVFPQ(std::move(index)) {
|
||||
index_type_ = IndexEnum::INDEX_FAISS_IVFPQ;
|
||||
}
|
||||
|
||||
void
|
||||
Train(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
CopyIndexToFpga();
|
||||
|
||||
void
|
||||
QueryImpl(int64_t, const float*, int64_t, float*, int64_t*, const Config&) override;
|
||||
};
|
||||
|
||||
using FPGAIVFPQPtr = std::shared_ptr<FPGAIVFPQ>;
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
||||
#endif
|
|
@ -0,0 +1,86 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "knowhere/index/vector_index/fpga/utils.h"
|
||||
|
||||
size_t
|
||||
GetFileSize(const char* filename) {
|
||||
size_t file_size = 0;
|
||||
|
||||
std::ifstream fin(filename, std::ifstream::in | std::ifstream::binary);
|
||||
if (fin.is_open()) {
|
||||
fin.seekg(0, std::ios::end);
|
||||
file_size = fin.tellg();
|
||||
fin.close();
|
||||
}
|
||||
return file_size;
|
||||
}
|
||||
|
||||
void
|
||||
SplitFile(const char* filename, size_t file_size, char* buffer0, char* buffer1, char* buffer2, char* buffer3) {
|
||||
FILE* fp;
|
||||
char temp[64];
|
||||
|
||||
fp = fopen(filename, "rb");
|
||||
assert(fp);
|
||||
|
||||
int flag = 0;
|
||||
for (size_t i = 0; i < file_size / 64; i++) {
|
||||
memset(temp, 0x0, 64);
|
||||
fread(temp, 64, 1, fp);
|
||||
if (flag % 4 == 0) {
|
||||
memcpy(buffer0 + i / 4 * 64, temp, 64);
|
||||
} else if (flag % 4 == 1) {
|
||||
memcpy(buffer1 + i / 4 * 64, temp, 64);
|
||||
} else if (flag % 4 == 2) {
|
||||
memcpy(buffer2 + i / 4 * 64, temp, 64);
|
||||
} else if (flag % 4 == 3) {
|
||||
memcpy(buffer3 + i / 4 * 64, temp, 64);
|
||||
}
|
||||
|
||||
flag++;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
void
|
||||
SplitMemory(char* buffer, size_t size, char* buffer0, char* buffer1, char* buffer2, char* buffer3) {
|
||||
int flag = 0;
|
||||
for (size_t i = 0; i < size / 64; i++) {
|
||||
if (flag % 4 == 0) {
|
||||
memcpy(buffer0 + i / 4 * 64, buffer + i * 64, 64);
|
||||
} else if (flag % 4 == 1) {
|
||||
memcpy(buffer1 + i / 4 * 64, buffer + i * 64, 64);
|
||||
} else if (flag % 4 == 2) {
|
||||
memcpy(buffer2 + i / 4 * 64, buffer + i * 64, 64);
|
||||
} else if (flag % 4 == 3) {
|
||||
memcpy(buffer3 + i / 4 * 64, buffer + i * 64, 64);
|
||||
}
|
||||
|
||||
flag++;
|
||||
}
|
||||
}
|
||||
|
||||
double
|
||||
Elapsed() {
|
||||
struct timeval tv;
|
||||
|
||||
gettimeofday(&tv, NULL);
|
||||
|
||||
return tv.tv_sec + tv.tv_usec * 1e-6;
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#ifndef UTILIS_H
|
||||
#define UTILIS_H
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
size_t
|
||||
GetFileSize(const char* filename);
|
||||
void
|
||||
SplitFile(const char* filename, size_t file_size, char* buffer0, char* buffer1, char* buffer2, char* buffer3);
|
||||
void
|
||||
SplitMemory(char* buffer, size_t size, char* buffer0, char* buffer1, char* buffer2, char* buffer3);
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
ShowVecValues(std::vector<T>& vec, int col) {
|
||||
auto iter = vec.begin();
|
||||
int index = 0;
|
||||
while (iter != vec.end()) {
|
||||
std::cout << *iter << " ";
|
||||
iter++;
|
||||
index++;
|
||||
|
||||
if (index == col) {
|
||||
std::cout << "\n";
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
double
|
||||
Elapsed();
|
||||
#endif
|
|
@ -0,0 +1,370 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "knowhere/index/vector_index/fpga/xilinx_c.h"
|
||||
|
||||
/* ltoh: little to host */
|
||||
/* htol: little to host */
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define ltohl(x) (x)
|
||||
#define ltohs(x) (x)
|
||||
#define htoll(x) (x)
|
||||
#define htols(x) (x)
|
||||
#elif __BYTE_ORDER == __BIG_ENDIAN
|
||||
#define ltohl(x) __bswap_32(x)
|
||||
#define ltohs(x) __bswap_16(x)
|
||||
#define htoll(x) __bswap_32(x)
|
||||
#define htols(x) __bswap_16(x)
|
||||
#endif
|
||||
|
||||
#define FATAL \
|
||||
do { \
|
||||
fprintf(stderr, "Error at line %d, file %s (%d) [%s]\n", __LINE__, __FILE__, errno, strerror(errno)); \
|
||||
exit(1); \
|
||||
} while (0)
|
||||
|
||||
#define MAP_SIZE (4 * 1024UL)
|
||||
#define MAP_MASK (MAP_SIZE - 1)
|
||||
|
||||
static int no_write = 0;
|
||||
|
||||
/* Subtract timespec t2 from t1
|
||||
*
|
||||
* Both t1 and t2 must already be normalized
|
||||
* i.e. 0 <= nsec < 1000000000 */
|
||||
static void
|
||||
timespec_sub(struct timespec* t1, const struct timespec* t2) {
|
||||
assert(t1->tv_nsec >= 0);
|
||||
assert(t1->tv_nsec < 1000000000);
|
||||
assert(t2->tv_nsec >= 0);
|
||||
assert(t2->tv_nsec < 1000000000);
|
||||
t1->tv_sec -= t2->tv_sec;
|
||||
t1->tv_nsec -= t2->tv_nsec;
|
||||
if (t1->tv_nsec >= 1000000000) {
|
||||
t1->tv_sec++;
|
||||
t1->tv_nsec -= 1000000000;
|
||||
} else if (t1->tv_nsec < 0) {
|
||||
t1->tv_sec--;
|
||||
t1->tv_nsec += 1000000000;
|
||||
}
|
||||
}
|
||||
|
||||
char*
|
||||
TransFPGA2Mem(char* devicename, loff_t addr, size_t size, char* buffer, int count) {
|
||||
// char *buffer = NULL;
|
||||
// char *allocated = NULL;
|
||||
// struct timespec ts_start, ts_end;
|
||||
|
||||
// posix_memalign((void **)&allocated, 4096/*alignment*/, size + 4096);
|
||||
// assert(allocated);
|
||||
// buffer = allocated + offset;
|
||||
// printf("host memory buffer = %p\n", buffer);
|
||||
|
||||
int fpga_fd = open(devicename, O_RDWR | O_NONBLOCK);
|
||||
assert(fpga_fd >= 0);
|
||||
|
||||
while (count--) {
|
||||
memset(buffer, 0x00, size);
|
||||
|
||||
// rc = clock_gettime(CLOCK_MONOTONIC, &ts_start);
|
||||
// select AXI MM address
|
||||
off_t off = lseek(fpga_fd, addr, SEEK_SET);
|
||||
int rc = read(fpga_fd, buffer, size);
|
||||
|
||||
// rc = clock_gettime(CLOCK_MONOTONIC, &ts_end);
|
||||
}
|
||||
|
||||
// subtract the start time from the end time
|
||||
// timespec_sub(&ts_end, &ts_start);
|
||||
// display passed time, a bit less accurate but side-effects are accounted for
|
||||
// printf("CLOCK_MONOTONIC reports %ld.%09ld seconds (total) for last transfer of %d bytes\n", ts_end.tv_sec,
|
||||
// ts_end.tv_nsec, size);
|
||||
|
||||
close(fpga_fd);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
int
|
||||
WriteMem2File(char* buffer, size_t size, int count, const char* filename) {
|
||||
int rc;
|
||||
int file_fd = -1;
|
||||
// char *buffer = (char *)addr;
|
||||
// char *allocated = buffer - offset;
|
||||
|
||||
if (filename) {
|
||||
file_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0666);
|
||||
assert(file_fd >= 0);
|
||||
}
|
||||
|
||||
while (count--) {
|
||||
if ((file_fd >= 0) & (no_write == 0)) {
|
||||
// write buffer to file
|
||||
rc = write(file_fd, buffer, size);
|
||||
assert(rc == size);
|
||||
}
|
||||
}
|
||||
|
||||
if (file_fd >= 0) {
|
||||
close(file_fd);
|
||||
}
|
||||
// free(allocated);
|
||||
}
|
||||
|
||||
int
|
||||
DMAFPGA2File(char* devicename, loff_t addr, size_t size, size_t offset, int count, const char* filename) {
|
||||
int rc;
|
||||
char* buffer = NULL;
|
||||
char* allocated = NULL;
|
||||
struct timespec ts_start, ts_end;
|
||||
|
||||
posix_memalign((void**)&allocated, 4096 /*alignment*/, size + 4096);
|
||||
assert(allocated);
|
||||
buffer = allocated + offset;
|
||||
|
||||
int file_fd = -1;
|
||||
int fpga_fd = open(devicename, O_RDWR | O_NONBLOCK);
|
||||
assert(fpga_fd >= 0);
|
||||
|
||||
/* create file to write data to */
|
||||
if (filename) {
|
||||
file_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0666);
|
||||
assert(file_fd >= 0);
|
||||
}
|
||||
|
||||
while (count--) {
|
||||
memset(buffer, 0x00, size);
|
||||
/* select AXI MM address */
|
||||
off_t off = lseek(fpga_fd, addr, SEEK_SET);
|
||||
// rc = clock_gettime(CLOCK_MONOTONIC, &ts_start);
|
||||
/* read data from AXI MM into buffer using SGDMA */
|
||||
rc = read(fpga_fd, buffer, size);
|
||||
// rc = clock_gettime(CLOCK_MONOTONIC, &ts_end);
|
||||
/* file argument given? */
|
||||
if ((file_fd >= 0) & (no_write == 0)) {
|
||||
/* write buffer to file */
|
||||
rc = write(file_fd, buffer, size);
|
||||
assert(rc == size);
|
||||
}
|
||||
}
|
||||
/* subtract the start time from the end time */
|
||||
// timespec_sub(&ts_end, &ts_start);
|
||||
/* display passed time, a bit less accurate but side-effects are accounted for */
|
||||
// printf("CLOCK_MONOTONIC reports %ld.%09ld seconds (total) for last transfer of %ld bytes\n", ts_end.tv_sec,
|
||||
// ts_end.tv_nsec, size);
|
||||
|
||||
close(fpga_fd);
|
||||
if (file_fd >= 0) {
|
||||
close(file_fd);
|
||||
}
|
||||
free(allocated);
|
||||
}
|
||||
|
||||
char*
|
||||
ReadFile2Mem(const char* filename, char* buffer, size_t size) {
|
||||
int rc;
|
||||
// char *buffer = NULL;
|
||||
// char *allocated = NULL;
|
||||
int file_fd = -1;
|
||||
|
||||
// allocate dst buffer in memory
|
||||
// posix_memalign((void **)&allocated, 4096/*alignment*/, size + 4096);
|
||||
// assert(allocated);
|
||||
// buffer = allocated + offset;
|
||||
// printf("host memory buffer = %p\n", buffer);
|
||||
|
||||
if (filename) {
|
||||
file_fd = open(filename, O_RDONLY);
|
||||
assert(file_fd >= 0);
|
||||
}
|
||||
|
||||
if (file_fd >= 0) {
|
||||
rc = read(file_fd, buffer, size);
|
||||
assert(rc == size);
|
||||
}
|
||||
|
||||
if (file_fd >= 0) {
|
||||
close(file_fd);
|
||||
}
|
||||
// free(allocated);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
int
|
||||
TransBuffer2FPGA(char* buffer, size_t size, char* devicename, loff_t addr, int count) {
|
||||
int rc;
|
||||
// struct timespec ts_start, ts_end;
|
||||
|
||||
int fpga_fd = open(devicename, O_RDWR);
|
||||
assert(fpga_fd >= 0);
|
||||
|
||||
// select AXI MM address
|
||||
lseek(fpga_fd, addr, SEEK_SET);
|
||||
while (count--) {
|
||||
// rc = clock_gettime(CLOCK_MONOTONIC, &ts_start);
|
||||
/* write buffer to AXI MM address using SGDMA */
|
||||
rc = write(fpga_fd, buffer, size);
|
||||
assert(rc == size);
|
||||
// rc = clock_gettime(CLOCK_MONOTONIC, &ts_end);
|
||||
}
|
||||
|
||||
/* subtract the start time from the end time */
|
||||
// timespec_sub(&ts_end, &ts_start);
|
||||
/* display passed time, a bit less accurate but side-effects are accounted for */
|
||||
// printf("CLOCK_MONOTONIC reports %ld.%09ld seconds (total) for last transfer of %lu bytes\n", ts_end.tv_sec,
|
||||
// ts_end.tv_nsec, size);
|
||||
|
||||
close(fpga_fd);
|
||||
}
|
||||
|
||||
int
|
||||
DMAFile2FPGA(char* devicename, loff_t addr, size_t size, size_t offset, int count, const char* filename) {
|
||||
int rc;
|
||||
char* buffer = NULL;
|
||||
char* allocated = NULL;
|
||||
struct timespec ts_start, ts_end;
|
||||
|
||||
posix_memalign((void**)&allocated, 4096 /*alignment*/, size + 4096);
|
||||
assert(allocated);
|
||||
buffer = allocated + offset;
|
||||
int file_fd = -1;
|
||||
int fpga_fd = open(devicename, O_RDWR);
|
||||
assert(fpga_fd >= 0);
|
||||
|
||||
if (filename) {
|
||||
file_fd = open(filename, O_RDONLY);
|
||||
assert(file_fd >= 0);
|
||||
}
|
||||
/* fill the buffer with data from file? */
|
||||
if (file_fd >= 0) {
|
||||
/* read data from file into memory buffer */
|
||||
rc = read(file_fd, buffer, size);
|
||||
assert(rc == size);
|
||||
}
|
||||
/* select AXI MM address */
|
||||
off_t off = lseek(fpga_fd, addr, SEEK_SET);
|
||||
while (count--) {
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts_start);
|
||||
/* write buffer to AXI MM address using SGDMA */
|
||||
rc = write(fpga_fd, buffer, size);
|
||||
assert(rc == size);
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts_end);
|
||||
}
|
||||
/* subtract the start time from the end time */
|
||||
timespec_sub(&ts_end, &ts_start);
|
||||
/* display passed time, a bit less accurate but side-effects are accounted for */
|
||||
close(fpga_fd);
|
||||
if (file_fd >= 0) {
|
||||
close(file_fd);
|
||||
}
|
||||
free(allocated);
|
||||
}
|
||||
|
||||
int
|
||||
open_dev(char* devicename) {
|
||||
int fd;
|
||||
char* device;
|
||||
device = devicename;
|
||||
if ((fd = open(device, O_RDWR | O_SYNC)) == -1)
|
||||
FATAL;
|
||||
else
|
||||
return fd;
|
||||
}
|
||||
|
||||
int
|
||||
close_dev(int fd) {
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
reg_rw(int fd, loff_t addr, char* rw_mode, char access_mode, uint32_t data) {
|
||||
// int fd;
|
||||
void *map_base, *virt_addr;
|
||||
uint32_t read_result = 0, writeval = 0;
|
||||
off_t target;
|
||||
int access_width = 'w';
|
||||
char* access_type;
|
||||
|
||||
target = addr;
|
||||
|
||||
access_type = rw_mode;
|
||||
access_width = access_mode;
|
||||
|
||||
/* map one page */
|
||||
map_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, target / 4096 * 4096);
|
||||
if (map_base == (void*)-1)
|
||||
FATAL;
|
||||
|
||||
/* calculate the virtual address to be accessed */
|
||||
virt_addr = (void*)((uint8_t*)map_base + target % 4096);
|
||||
/* read only */
|
||||
|
||||
if (strcmp(access_type, "read") == 0) {
|
||||
// printf("Read from address %p.\n", virt_addr);
|
||||
switch (access_width) {
|
||||
case 'b':
|
||||
read_result = *((uint8_t*)virt_addr);
|
||||
// printf("Read 8-bits value at address 0x%08x (%p): 0x%02x\n", (unsigned int)target, virt_addr,
|
||||
// (unsigned int)read_result);
|
||||
break;
|
||||
case 'h':
|
||||
read_result = *((uint16_t*)virt_addr);
|
||||
/* swap 16-bit endianess if host is not little-endian */
|
||||
read_result = ltohs(read_result);
|
||||
// printf("Read 16-bit value at address 0x%08x (%p): 0x%04x\n", (unsigned int)target, virt_addr,
|
||||
// (unsigned int)read_result);
|
||||
break;
|
||||
case 'w':
|
||||
read_result = *((uint32_t*)virt_addr);
|
||||
/* swap 32-bit endianess if host is not little-endian */
|
||||
read_result = ltohl(read_result);
|
||||
// printf("Read 32-bit value at address 0x%08x (%p): 0x%08x\n", (unsigned int)target, virt_addr,
|
||||
// (unsigned int)read_result);
|
||||
return (int)read_result;
|
||||
break;
|
||||
default:
|
||||
exit(2);
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
/* data value given, i.e. writing? */
|
||||
if (strcmp(access_type, "write") == 0) {
|
||||
writeval = data;
|
||||
switch (access_width) {
|
||||
case 'b':
|
||||
// printf("Write 8-bits value 0x%02x to 0x%08x (0x%p)\n", (unsigned int)writeval, (unsigned int)target,
|
||||
// virt_addr);
|
||||
*((uint8_t*)virt_addr) = writeval;
|
||||
break;
|
||||
case 'h':
|
||||
// printf("Write 16-bits value 0x%04x to 0x%08x (0x%p)\n", (unsigned int)writeval, (unsigned int)target,
|
||||
// virt_addr);
|
||||
/* swap 16-bit endianess if host is not little-endian */
|
||||
writeval = htols(writeval);
|
||||
*((uint16_t*)virt_addr) = writeval;
|
||||
break;
|
||||
case 'w':
|
||||
// printf("Write 32-bits value 0x%08x to 0x%08x (0x%p)\n", (unsigned int)writeval, (unsigned int)target,
|
||||
// virt_addr);
|
||||
/* swap 32-bit endianess if host is not little-endian */
|
||||
writeval = htoll(writeval);
|
||||
*((uint32_t*)virt_addr) = writeval;
|
||||
break;
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
if (munmap(map_base, MAP_SIZE) == -1)
|
||||
FATAL;
|
||||
// close(fd);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#ifndef XILINX_C_H
|
||||
#define XILINX_C_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <byteswap.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <signal.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <termios.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define DEVICE_C2H0 "/dev/xdma/card0/c2h0"
|
||||
#define DEVICE_H2C0 "/dev/xdma/card0/h2c0"
|
||||
#define DEVICE_CTRL "/dev/xdma0_bypass"
|
||||
|
||||
// extern "C"{
|
||||
// c2h0 functions
|
||||
char*
|
||||
TransFPGA2Mem(char* devicename, loff_t addr, size_t size, char* buffer, int count);
|
||||
int
|
||||
WriteMem2File(char* buffer, size_t size, int count, const char* filename);
|
||||
int
|
||||
DMAFPGA2File(char* devicename, loff_t addr, size_t size, size_t offset, int count, const char* filename);
|
||||
|
||||
// h2c0 functions
|
||||
char*
|
||||
ReadFile2Mem(const char* filename, char* buffer, size_t size);
|
||||
int
|
||||
TransBuffer2FPGA(char* buffer, size_t size, char* devicename, loff_t addr, int count);
|
||||
int
|
||||
DMAFile2FPGA(char* devicename, loff_t addr, size_t size, size_t offset, int count, const char* filename);
|
||||
|
||||
// register functions
|
||||
int
|
||||
open_dev(char* devicename);
|
||||
int
|
||||
close_dev(int fd);
|
||||
int
|
||||
reg_rw(int fd, loff_t addr, char* rw_mode, char access_mode, uint32_t data);
|
||||
//}
|
||||
|
||||
#endif
|
|
@ -22,6 +22,8 @@ ResourceFactory::Create(const std::string& name, const std::string& type, uint64
|
|||
return std::make_shared<CpuResource>(name, device_id, enable_executor);
|
||||
} else if (type == "GPU") {
|
||||
return std::make_shared<GpuResource>(name, device_id, enable_executor);
|
||||
} else if (type == "FPGA") {
|
||||
return std::make_shared<FpgaResource>(name, device_id, enable_executor);
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "resource/CpuResource.h"
|
||||
#include "resource/DiskResource.h"
|
||||
#include "resource/FpgaResource.h"
|
||||
#include "resource/GpuResource.h"
|
||||
#include "resource/Resource.h"
|
||||
|
||||
|
|
|
@ -73,6 +73,11 @@ ResourceMgr::Add(ResourcePtr&& resource) {
|
|||
gpu_resources_.emplace_back(ResourceWPtr(resource));
|
||||
break;
|
||||
}
|
||||
case ResourceType::FPGA: {
|
||||
LOG_ENGINE_DEBUG_ << "add fpga resource";
|
||||
fpga_resources_.emplace_back(ResourceWPtr(resource));
|
||||
break;
|
||||
}
|
||||
default: { break; }
|
||||
}
|
||||
resources_.emplace_back(resource);
|
||||
|
@ -103,6 +108,7 @@ ResourceMgr::Clear() {
|
|||
disk_resources_.clear();
|
||||
cpu_resources_.clear();
|
||||
gpu_resources_.clear();
|
||||
fpga_resources_.clear();
|
||||
resources_.clear();
|
||||
}
|
||||
|
||||
|
@ -171,11 +177,9 @@ ResourceMgr::Dump() const {
|
|||
resources.push_back(res->Dump());
|
||||
}
|
||||
json ret{
|
||||
{"number_of_resource", resources_.size()},
|
||||
{"number_of_disk_resource", disk_resources_.size()},
|
||||
{"number_of_cpu_resource", cpu_resources_.size()},
|
||||
{"number_of_gpu_resource", gpu_resources_.size()},
|
||||
{"resources", resources},
|
||||
{"number_of_resource", resources_.size()}, {"number_of_disk_resource", disk_resources_.size()},
|
||||
{"number_of_cpu_resource", cpu_resources_.size()}, {"number_of_gpu_resource", gpu_resources_.size()},
|
||||
{"number_of_fpga_resource", fpga_resources_.size()}, {"resources", resources},
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -119,6 +119,7 @@ class ResourceMgr : public interface::dumpable {
|
|||
std::vector<ResourceWPtr> disk_resources_;
|
||||
std::vector<ResourceWPtr> cpu_resources_;
|
||||
std::vector<ResourceWPtr> gpu_resources_;
|
||||
std::vector<ResourceWPtr> fpga_resources_;
|
||||
std::vector<ResourcePtr> resources_;
|
||||
mutable std::mutex resources_mutex_;
|
||||
|
||||
|
|
|
@ -50,10 +50,30 @@ load_simple_config() {
|
|||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create("cpu", "CPU", 0));
|
||||
ResMgrInst::GetInstance()->Connect("disk", "cpu", io);
|
||||
|
||||
#if defined(MILVUS_GPU_VERSION) || defined(MILVUS_FPGA_VERSION)
|
||||
server::Config& config = server::Config::GetInstance();
|
||||
#endif
|
||||
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
bool enable_fpga = false;
|
||||
config.GetFpgaResourceConfigEnable(enable_fpga);
|
||||
if (enable_fpga) {
|
||||
std::vector<int64_t> fpga_ids;
|
||||
config.GetFpgaResourceConfigSearchResources(fpga_ids);
|
||||
auto pcie = Connection("pcie", 12000);
|
||||
|
||||
for (auto& fpga_id : fpga_ids) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%ld]", fpga_id);
|
||||
std::string fpga_name = "fpga" + std::to_string(fpga_id);
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create(fpga_name, "FPGA", fpga_id));
|
||||
ResMgrInst::GetInstance()->Connect("cpu", fpga_name, pcie);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
// get resources
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
bool enable_gpu = false;
|
||||
server::Config& config = server::Config::GetInstance();
|
||||
config.GetGpuResourceConfigEnable(enable_gpu);
|
||||
if (enable_gpu) {
|
||||
std::vector<int64_t> gpu_ids;
|
||||
|
@ -78,13 +98,15 @@ load_simple_config() {
|
|||
}
|
||||
|
||||
for (auto& gpu_id : gpu_ids) {
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id));
|
||||
ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), pcie);
|
||||
std::string gpu_name = "gpu" + std::to_string(gpu_id);
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create(gpu_name, "GPU", gpu_id));
|
||||
ResMgrInst::GetInstance()->Connect("cpu", gpu_name, pcie);
|
||||
}
|
||||
|
||||
for (auto& not_find_id : not_find_build_ids) {
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(not_find_id), "GPU", not_find_id));
|
||||
ResMgrInst::GetInstance()->Connect("cpu", std::to_string(not_find_id), pcie);
|
||||
std::string gpu_name = "gpu" + std::to_string(not_find_id);
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create(gpu_name, "GPU", not_find_id));
|
||||
ResMgrInst::GetInstance()->Connect("cpu", gpu_name, pcie);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "ResourceMgr.h"
|
||||
#include "Scheduler.h"
|
||||
#include "Utils.h"
|
||||
#include "config/Config.h"
|
||||
#include "selector/BuildIndexPass.h"
|
||||
#include "selector/FaissFlatPass.h"
|
||||
#include "selector/FaissIVFPass.h"
|
||||
|
@ -123,6 +124,14 @@ class OptimizerInst {
|
|||
pass_list.push_back(std::make_shared<FaissIVFPass>());
|
||||
pass_list.push_back(std::make_shared<FaissIVFSQ8HPass>());
|
||||
}
|
||||
#elif defined MILVUS_FPGA_VERSION
|
||||
bool enable_fpga = false;
|
||||
server::Config& config = server::Config::GetInstance();
|
||||
config.GetFpgaResourceConfigEnable(enable_fpga);
|
||||
if (enable_fpga) {
|
||||
pass_list.push_back(std::make_shared<FaissIVFPass>());
|
||||
LOG_SERVER_DEBUG_ << LogOut("add fpga ");
|
||||
}
|
||||
#endif
|
||||
pass_list.push_back(std::make_shared<FallbackPass>());
|
||||
instance = std::make_shared<Optimizer>(pass_list);
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "scheduler/resource/FpgaResource.h"
|
||||
|
||||
#include <utility>
|
||||
#include "utils/Log.h"
|
||||
namespace milvus {
|
||||
namespace scheduler {
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& out, const FpgaResource& resource) {
|
||||
out << resource.Dump().dump();
|
||||
return out;
|
||||
}
|
||||
|
||||
FpgaResource::FpgaResource(std::string name, uint64_t device_id, bool enable_executor)
|
||||
: Resource(std::move(name), ResourceType::FPGA, device_id, enable_executor) {
|
||||
}
|
||||
|
||||
void
|
||||
FpgaResource::LoadFile(TaskPtr task) {
|
||||
LOG_ENGINE_DEBUG_ << "jack: LoadFile loadType cpu2fpga";
|
||||
task->Load(LoadType::CPU2FPGA, 0);
|
||||
}
|
||||
|
||||
void
|
||||
FpgaResource::Process(TaskPtr task) {
|
||||
LOG_ENGINE_DEBUG_ << "jack: process loadType cpu2fpga";
|
||||
task->Execute();
|
||||
}
|
||||
|
||||
} // namespace scheduler
|
||||
} // namespace milvus
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "Resource.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace scheduler {
|
||||
|
||||
class FpgaResource : public Resource {
|
||||
public:
|
||||
explicit FpgaResource(std::string name, uint64_t device_id, bool enable_executor);
|
||||
|
||||
friend std::ostream&
|
||||
operator<<(std::ostream& out, const FpgaResource& resource);
|
||||
|
||||
protected:
|
||||
void
|
||||
LoadFile(TaskPtr task) override;
|
||||
|
||||
void
|
||||
Process(TaskPtr task) override;
|
||||
};
|
||||
|
||||
} // namespace scheduler
|
||||
} // namespace milvus
|
|
@ -38,6 +38,7 @@ enum class ResourceType {
|
|||
CPU = 1,
|
||||
GPU = 2,
|
||||
TEST = 3,
|
||||
FPGA = 4,
|
||||
};
|
||||
|
||||
class Resource : public Node, public std::enable_shared_from_this<Resource> {
|
||||
|
|
|
@ -8,9 +8,10 @@
|
|||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include "scheduler/selector/FaissIVFPass.h"
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include "cache/GpuCacheMgr.h"
|
||||
#endif
|
||||
#include "config/Config.h"
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include "scheduler/SchedInst.h"
|
||||
|
@ -25,6 +26,7 @@ namespace scheduler {
|
|||
|
||||
void
|
||||
FaissIVFPass::Init() {
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
server::Config& config = server::Config::GetInstance();
|
||||
Status s = config.GetGpuResourceConfigGpuSearchThreshold(threshold_);
|
||||
if (!s.ok()) {
|
||||
|
@ -39,6 +41,7 @@ FaissIVFPass::Init() {
|
|||
AddGpuEnableListener();
|
||||
AddGpuSearchThresholdListener();
|
||||
AddGpuSearchResourcesListener();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -59,28 +62,52 @@ FaissIVFPass::Run(const TaskPtr& task) {
|
|||
|
||||
auto search_job = std::static_pointer_cast<SearchJob>(search_task->job_.lock());
|
||||
ResourcePtr res_ptr;
|
||||
if (!gpu_enable_) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: gpu disable, specify cpu to search!", "search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else if (search_job->nq() < (uint64_t)threshold_) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: nq < gpu_search_threshold, specify cpu to search!",
|
||||
"search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else if (search_job->topk() > milvus::server::GPU_QUERY_MAX_TOPK) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: topk > gpu_topk_threshold, specify cpu to search!",
|
||||
"search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else if (search_job->extra_params()[knowhere::IndexParams::nprobe].get<int64_t>() >
|
||||
milvus::server::GPU_QUERY_MAX_NPROBE) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: nprobe > gpu_nprobe_threshold, specify cpu to search!",
|
||||
"search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: nq >= gpu_search_threshold, specify gpu %d to search!",
|
||||
"search", 0, search_gpus_[idx_]);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[idx_]);
|
||||
idx_ = (idx_ + 1) % search_gpus_.size();
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
auto is_fpga_support = [&]() {
|
||||
// now, only IVF_PQ is supported
|
||||
return static_cast<engine::EngineType>(search_task->file_->engine_type_) == engine::EngineType::FAISS_PQ;
|
||||
};
|
||||
|
||||
bool fpga_enable_ = is_fpga_support();
|
||||
if (fpga_enable_) {
|
||||
server::Config& config = server::Config::GetInstance();
|
||||
config.GetFpgaResourceConfigEnable(fpga_enable_);
|
||||
}
|
||||
if (fpga_enable_) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: fpga enable, specify fpga to search!", "search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::FPGA, 0);
|
||||
} else {
|
||||
#endif
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
if (!gpu_enable_) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: gpu disable, specify cpu to search!", "search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else if (search_job->nq() < (uint64_t)threshold_) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: nq < gpu_search_threshold, specify cpu to search!",
|
||||
"search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else if (search_job->topk() > milvus::server::GPU_QUERY_MAX_TOPK) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: topk > gpu_topk_threshold, specify cpu to search!",
|
||||
"search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else if (search_job->extra_params()[knowhere::IndexParams::nprobe].get<int64_t>() >
|
||||
milvus::server::GPU_QUERY_MAX_NPROBE) {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: nprobe > gpu_nprobe_threshold, specify cpu to search!",
|
||||
"search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
} else {
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: nq >= gpu_search_threshold, specify gpu %d to search!",
|
||||
"search", 0, search_gpus_[idx_]);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[idx_]);
|
||||
idx_ = (idx_ + 1) % search_gpus_.size();
|
||||
}
|
||||
#else
|
||||
LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPass: fpga disable, specify cpu to search!", "search", 0);
|
||||
res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
|
||||
#endif
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
}
|
||||
#endif
|
||||
auto label = std::make_shared<SpecResLabel>(res_ptr);
|
||||
task->label() = label;
|
||||
return true;
|
||||
|
@ -88,4 +115,3 @@ FaissIVFPass::Run(const TaskPtr& task) {
|
|||
|
||||
} // namespace scheduler
|
||||
} // namespace milvus
|
||||
#endif
|
||||
|
|
|
@ -8,9 +8,7 @@
|
|||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <deque>
|
||||
#include <limits>
|
||||
|
@ -22,14 +20,20 @@
|
|||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include "config/handler/GpuResourceConfigHandler.h"
|
||||
#endif
|
||||
#include "scheduler/selector/Pass.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace scheduler {
|
||||
|
||||
class FaissIVFPass : public Pass, public server::GpuResourceConfigHandler {
|
||||
class FaissIVFPass : public Pass
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
,
|
||||
public server::GpuResourceConfigHandler
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
FaissIVFPass() = default;
|
||||
|
||||
|
@ -49,4 +53,3 @@ using FaissIVFPassPtr = std::shared_ptr<FaissIVFPass>;
|
|||
|
||||
} // namespace scheduler
|
||||
} // namespace milvus
|
||||
#endif
|
||||
|
|
|
@ -155,6 +155,9 @@ XSearchTask::Load(LoadType type, uint8_t device_id) {
|
|||
} else if (type == LoadType::GPU2CPU) {
|
||||
stat = index_engine_->CopyToCpu();
|
||||
type_str = "GPU2CPU";
|
||||
} else if (type == LoadType::CPU2FPGA) {
|
||||
stat = index_engine_->CopyToFpga();
|
||||
type_str = "CPU2FPGA";
|
||||
} else {
|
||||
error_msg = "Wrong load type";
|
||||
stat = Status(SERVER_UNEXPECTED_ERROR, error_msg);
|
||||
|
|
|
@ -23,12 +23,7 @@
|
|||
namespace milvus {
|
||||
namespace scheduler {
|
||||
|
||||
enum class LoadType {
|
||||
DISK2CPU,
|
||||
CPU2GPU,
|
||||
GPU2CPU,
|
||||
TEST,
|
||||
};
|
||||
enum class LoadType { DISK2CPU, CPU2GPU, GPU2CPU, TEST, CPU2FPGA };
|
||||
|
||||
enum class TaskType {
|
||||
SearchTask,
|
||||
|
|
Loading…
Reference in New Issue