MS-515 & MS-516 Fix index building gpu config issue

Former-commit-id: 14d56eef1f177ebcb24eb6b3186cdc2878dd80b4
pull/191/head
jinhai 2019-09-08 10:41:25 +08:00
parent 7b2a668b3c
commit c4058c3877
4 changed files with 52 additions and 11 deletions

View File

@ -61,9 +61,9 @@ ServerConfig::LoadConfigFile(const std::string& config_filename) {
ErrorCode ServerConfig::ValidateConfig() const { ErrorCode ServerConfig::ValidateConfig() const {
//server config validation //server config validation
ConfigNode server_config = GetConfig(CONFIG_SERVER); ConfigNode server_config = GetConfig(CONFIG_SERVER);
uint32_t gpu_index = (uint32_t)server_config.GetInt32Value(CONFIG_GPU_INDEX, 0); uint32_t build_index_gpu_index = (uint32_t)server_config.GetInt32Value(CONFIG_GPU_INDEX, 0);
if(ValidationUtil::ValidateGpuIndex(gpu_index) != SERVER_SUCCESS) { if(ValidationUtil::ValidateGpuIndex(build_index_gpu_index) != SERVER_SUCCESS) {
std::cout << "Error: invalid gpu_index " << std::to_string(gpu_index) << std::endl; std::cerr << "Error: invalid gpu_index " << std::to_string(build_index_gpu_index) << std::endl;
return SERVER_INVALID_ARGUMENT; return SERVER_INVALID_ARGUMENT;
} }
@ -75,7 +75,7 @@ ErrorCode ServerConfig::ValidateConfig() const {
uint64_t insert_buffer_size = (uint64_t)db_config.GetInt32Value(CONFIG_DB_INSERT_BUFFER_SIZE, 4); uint64_t insert_buffer_size = (uint64_t)db_config.GetInt32Value(CONFIG_DB_INSERT_BUFFER_SIZE, 4);
insert_buffer_size *= GB; insert_buffer_size *= GB;
if(insert_buffer_size >= total_mem) { if(insert_buffer_size >= total_mem) {
std::cout << "Error: insert_buffer_size execeed system memory" << std::endl; std::cerr << "Error: insert_buffer_size execeed system memory" << std::endl;
return SERVER_INVALID_ARGUMENT; return SERVER_INVALID_ARGUMENT;
} }
@ -84,20 +84,50 @@ ErrorCode ServerConfig::ValidateConfig() const {
uint64_t cache_cap = (uint64_t)cache_config.GetInt64Value(CONFIG_CPU_CACHE_CAPACITY, 16); uint64_t cache_cap = (uint64_t)cache_config.GetInt64Value(CONFIG_CPU_CACHE_CAPACITY, 16);
cache_cap *= GB; cache_cap *= GB;
if(cache_cap >= total_mem) { if(cache_cap >= total_mem) {
std::cout << "Error: cpu_cache_capacity execeed system memory" << std::endl; std::cerr << "Error: cpu_cache_capacity execeed system memory" << std::endl;
return SERVER_INVALID_ARGUMENT; return SERVER_INVALID_ARGUMENT;
} if(cache_cap > (double)total_mem*0.9) { } if(cache_cap > (double)total_mem*0.9) {
std::cout << "Warnning: cpu_cache_capacity value is too aggressive" << std::endl; std::cerr << "Warning: cpu_cache_capacity value is too aggressive" << std::endl;
} }
if(insert_buffer_size + cache_cap >= total_mem) { if(insert_buffer_size + cache_cap >= total_mem) {
std::cout << "Error: sum of cpu_cache_capacity and insert_buffer_size execeed system memory" << std::endl; std::cerr << "Error: sum of cpu_cache_capacity and insert_buffer_size execeed system memory" << std::endl;
return SERVER_INVALID_ARGUMENT; return SERVER_INVALID_ARGUMENT;
} }
double free_percent = cache_config.GetDoubleValue(server::CACHE_FREE_PERCENT, 0.85); double free_percent = cache_config.GetDoubleValue(server::CACHE_FREE_PERCENT, 0.85);
if(free_percent < std::numeric_limits<double>::epsilon() || free_percent > 1.0) { if(free_percent < std::numeric_limits<double>::epsilon() || free_percent > 1.0) {
std::cout << "Error: invalid cache_free_percent " << std::to_string(free_percent) << std::endl; std::cerr << "Error: invalid cache_free_percent " << std::to_string(free_percent) << std::endl;
return SERVER_INVALID_ARGUMENT;
}
// Resource config validation
server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
if (config.GetChildren().empty()) {
std::cerr << "Error: no context under resource" << std::endl;
return SERVER_INVALID_ARGUMENT;
}
auto resources = config.GetChild(server::CONFIG_RESOURCES).GetChildren();
if (resources.empty()) {
std::cerr << "Children of resource_config null exception" << std::endl;
return SERVER_INVALID_ARGUMENT;
}
bool resource_valid_flag = false;
for (auto &resource : resources) {
auto &resconf = resource.second;
auto type = resconf.GetValue(server::CONFIG_RESOURCE_TYPE);
if(type == "GPU") {
auto device_id = resconf.GetInt64Value(server::CONFIG_RESOURCE_DEVICE_ID, 0);
if(device_id == build_index_gpu_index) {
resource_valid_flag = true;
}
}
}
if(!resource_valid_flag) {
return SERVER_INVALID_ARGUMENT; return SERVER_INVALID_ARGUMENT;
} }

View File

@ -78,6 +78,7 @@ constexpr ErrorCode DB_INVALID_PATH = ToDbErrorCode(5);
constexpr ErrorCode KNOWHERE_ERROR = ToKnowhereErrorCode(1); constexpr ErrorCode KNOWHERE_ERROR = ToKnowhereErrorCode(1);
constexpr ErrorCode KNOWHERE_INVALID_ARGUMENT = ToKnowhereErrorCode(2); constexpr ErrorCode KNOWHERE_INVALID_ARGUMENT = ToKnowhereErrorCode(2);
constexpr ErrorCode KNOWHERE_UNEXPECTED_ERROR = ToKnowhereErrorCode(3); constexpr ErrorCode KNOWHERE_UNEXPECTED_ERROR = ToKnowhereErrorCode(3);
constexpr ErrorCode KNOWHERE_NO_SPACE = ToKnowhereErrorCode(4);
namespace server { namespace server {
class ServerException : public std::exception { class ServerException : public std::exception {

View File

@ -2,6 +2,7 @@
#include "db/meta/MetaTypes.h" #include "db/meta/MetaTypes.h"
#include "Error.h" #include "Error.h"
#include "
namespace zilliz { namespace zilliz {
namespace milvus { namespace milvus {
@ -38,6 +39,9 @@ public:
static ErrorCode static ErrorCode
GetGpuMemory(uint32_t gpu_index, size_t &memory); GetGpuMemory(uint32_t gpu_index, size_t &memory);
static ErrorCode
ValidateConfig();
}; };
} }

View File

@ -140,7 +140,7 @@ VecIndexPtr read_index(const std::string &location) {
FileIOReader reader(location); FileIOReader reader(location);
reader.fs.seekg(0, reader.fs.end); reader.fs.seekg(0, reader.fs.end);
int64_t length = reader.fs.tellg(); int64_t length = reader.fs.tellg();
if(length <= 0) { if (length <= 0) {
return nullptr; return nullptr;
} }
@ -201,7 +201,13 @@ ErrorCode write_index(VecIndexPtr index, const std::string &location) {
return KNOWHERE_UNEXPECTED_ERROR; return KNOWHERE_UNEXPECTED_ERROR;
} catch (std::exception &e) { } catch (std::exception &e) {
WRAPPER_LOG_ERROR << e.what(); WRAPPER_LOG_ERROR << e.what();
return KNOWHERE_ERROR; std::string estring(e.what());
if (estring.find("No space left on device") != estring.npos) {
WRAPPER_LOG_ERROR << "No space left on the device";
return KNOWHERE_NO_SPACE;
} else {
return KNOWHERE_ERROR;
}
} }
return KNOWHERE_SUCCESS; return KNOWHERE_SUCCESS;
} }
@ -213,7 +219,7 @@ void AutoGenParams(const IndexType &type, const long &size, zilliz::knowhere::Co
if (size <= TYPICAL_COUNT / 16384 + 1) { if (size <= TYPICAL_COUNT / 16384 + 1) {
//handle less row count, avoid nlist set to 0 //handle less row count, avoid nlist set to 0
cfg["nlist"] = 1; cfg["nlist"] = 1;
} else if (int(size / TYPICAL_COUNT) * nlist == 0) { } else if (int(size / TYPICAL_COUNT) *nlist == 0) {
//calculate a proper nlist if nlist not specified or size less than TYPICAL_COUNT //calculate a proper nlist if nlist not specified or size less than TYPICAL_COUNT
cfg["nlist"] = int(size / TYPICAL_COUNT * 16384); cfg["nlist"] = int(size / TYPICAL_COUNT * 16384);
} }