mirror of https://github.com/milvus-io/milvus.git
MS-515 & MS-516 Fix index building gpu config issue
Former-commit-id: 14d56eef1f177ebcb24eb6b3186cdc2878dd80b4pull/191/head
parent
7b2a668b3c
commit
c4058c3877
|
@ -61,9 +61,9 @@ ServerConfig::LoadConfigFile(const std::string& config_filename) {
|
|||
ErrorCode ServerConfig::ValidateConfig() const {
|
||||
//server config validation
|
||||
ConfigNode server_config = GetConfig(CONFIG_SERVER);
|
||||
uint32_t gpu_index = (uint32_t)server_config.GetInt32Value(CONFIG_GPU_INDEX, 0);
|
||||
if(ValidationUtil::ValidateGpuIndex(gpu_index) != SERVER_SUCCESS) {
|
||||
std::cout << "Error: invalid gpu_index " << std::to_string(gpu_index) << std::endl;
|
||||
uint32_t build_index_gpu_index = (uint32_t)server_config.GetInt32Value(CONFIG_GPU_INDEX, 0);
|
||||
if(ValidationUtil::ValidateGpuIndex(build_index_gpu_index) != SERVER_SUCCESS) {
|
||||
std::cerr << "Error: invalid gpu_index " << std::to_string(build_index_gpu_index) << std::endl;
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
|
@ -75,7 +75,7 @@ ErrorCode ServerConfig::ValidateConfig() const {
|
|||
uint64_t insert_buffer_size = (uint64_t)db_config.GetInt32Value(CONFIG_DB_INSERT_BUFFER_SIZE, 4);
|
||||
insert_buffer_size *= GB;
|
||||
if(insert_buffer_size >= total_mem) {
|
||||
std::cout << "Error: insert_buffer_size execeed system memory" << std::endl;
|
||||
std::cerr << "Error: insert_buffer_size execeed system memory" << std::endl;
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
|
@ -84,20 +84,50 @@ ErrorCode ServerConfig::ValidateConfig() const {
|
|||
uint64_t cache_cap = (uint64_t)cache_config.GetInt64Value(CONFIG_CPU_CACHE_CAPACITY, 16);
|
||||
cache_cap *= GB;
|
||||
if(cache_cap >= total_mem) {
|
||||
std::cout << "Error: cpu_cache_capacity execeed system memory" << std::endl;
|
||||
std::cerr << "Error: cpu_cache_capacity execeed system memory" << std::endl;
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
} if(cache_cap > (double)total_mem*0.9) {
|
||||
std::cout << "Warnning: cpu_cache_capacity value is too aggressive" << std::endl;
|
||||
std::cerr << "Warning: cpu_cache_capacity value is too aggressive" << std::endl;
|
||||
}
|
||||
|
||||
if(insert_buffer_size + cache_cap >= total_mem) {
|
||||
std::cout << "Error: sum of cpu_cache_capacity and insert_buffer_size execeed system memory" << std::endl;
|
||||
std::cerr << "Error: sum of cpu_cache_capacity and insert_buffer_size execeed system memory" << std::endl;
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
double free_percent = cache_config.GetDoubleValue(server::CACHE_FREE_PERCENT, 0.85);
|
||||
if(free_percent < std::numeric_limits<double>::epsilon() || free_percent > 1.0) {
|
||||
std::cout << "Error: invalid cache_free_percent " << std::to_string(free_percent) << std::endl;
|
||||
std::cerr << "Error: invalid cache_free_percent " << std::to_string(free_percent) << std::endl;
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Resource config validation
|
||||
server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
|
||||
if (config.GetChildren().empty()) {
|
||||
std::cerr << "Error: no context under resource" << std::endl;
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
auto resources = config.GetChild(server::CONFIG_RESOURCES).GetChildren();
|
||||
|
||||
if (resources.empty()) {
|
||||
std::cerr << "Children of resource_config null exception" << std::endl;
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
bool resource_valid_flag = false;
|
||||
for (auto &resource : resources) {
|
||||
auto &resconf = resource.second;
|
||||
auto type = resconf.GetValue(server::CONFIG_RESOURCE_TYPE);
|
||||
if(type == "GPU") {
|
||||
auto device_id = resconf.GetInt64Value(server::CONFIG_RESOURCE_DEVICE_ID, 0);
|
||||
if(device_id == build_index_gpu_index) {
|
||||
resource_valid_flag = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!resource_valid_flag) {
|
||||
return SERVER_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
|
|
|
@ -78,6 +78,7 @@ constexpr ErrorCode DB_INVALID_PATH = ToDbErrorCode(5);
|
|||
constexpr ErrorCode KNOWHERE_ERROR = ToKnowhereErrorCode(1);
|
||||
constexpr ErrorCode KNOWHERE_INVALID_ARGUMENT = ToKnowhereErrorCode(2);
|
||||
constexpr ErrorCode KNOWHERE_UNEXPECTED_ERROR = ToKnowhereErrorCode(3);
|
||||
constexpr ErrorCode KNOWHERE_NO_SPACE = ToKnowhereErrorCode(4);
|
||||
|
||||
namespace server {
|
||||
class ServerException : public std::exception {
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "db/meta/MetaTypes.h"
|
||||
#include "Error.h"
|
||||
#include "
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
|
@ -38,6 +39,9 @@ public:
|
|||
|
||||
static ErrorCode
|
||||
GetGpuMemory(uint32_t gpu_index, size_t &memory);
|
||||
|
||||
static ErrorCode
|
||||
ValidateConfig();
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -140,7 +140,7 @@ VecIndexPtr read_index(const std::string &location) {
|
|||
FileIOReader reader(location);
|
||||
reader.fs.seekg(0, reader.fs.end);
|
||||
int64_t length = reader.fs.tellg();
|
||||
if(length <= 0) {
|
||||
if (length <= 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,13 @@ ErrorCode write_index(VecIndexPtr index, const std::string &location) {
|
|||
return KNOWHERE_UNEXPECTED_ERROR;
|
||||
} catch (std::exception &e) {
|
||||
WRAPPER_LOG_ERROR << e.what();
|
||||
return KNOWHERE_ERROR;
|
||||
std::string estring(e.what());
|
||||
if (estring.find("No space left on device") != estring.npos) {
|
||||
WRAPPER_LOG_ERROR << "No space left on the device";
|
||||
return KNOWHERE_NO_SPACE;
|
||||
} else {
|
||||
return KNOWHERE_ERROR;
|
||||
}
|
||||
}
|
||||
return KNOWHERE_SUCCESS;
|
||||
}
|
||||
|
@ -213,7 +219,7 @@ void AutoGenParams(const IndexType &type, const long &size, zilliz::knowhere::Co
|
|||
if (size <= TYPICAL_COUNT / 16384 + 1) {
|
||||
//handle less row count, avoid nlist set to 0
|
||||
cfg["nlist"] = 1;
|
||||
} else if (int(size / TYPICAL_COUNT) * nlist == 0) {
|
||||
} else if (int(size / TYPICAL_COUNT) *nlist == 0) {
|
||||
//calculate a proper nlist if nlist not specified or size less than TYPICAL_COUNT
|
||||
cfg["nlist"] = int(size / TYPICAL_COUNT * 16384);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue