mirror of https://github.com/milvus-io/milvus.git
Server down during loading data (#2807)
* Server down during loading data Signed-off-by: fishpenguin <kun.yu@zilliz.com> * Fix test_config.cpp Signed-off-by: fishpenguin <kun.yu@zilliz.com> * ci retry Signed-off-by: fishpenguin <kun.yu@zilliz.com> * Change limit_in_bytes init value to max Signed-off-by: fishpenguin <kun.yu@zilliz.com> Co-authored-by: Jin Hai <hai.jin@zilliz.com>pull/2793/head^2
parent
585c8ea018
commit
a474ff269a
|
@ -20,6 +20,7 @@ Please mark all change in change log and use the issue from GitHub
|
|||
- \#2752 Milvus formats vectors data to double-precision and return to http client
|
||||
- \#2767 Fix a bug of getting wrong nprobe limitation in knowhere on GPU version
|
||||
- \#2768 After building the index,the number of vectors increases
|
||||
- \#2774 Server down during loading data
|
||||
- \#2776 Fix too many data copies during creating IVF index
|
||||
- \#2813 To implemente RNSG IP
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
@ -1333,6 +1334,15 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) {
|
|||
|
||||
int64_t total_mem = 0, free_mem = 0;
|
||||
CommonUtil::GetSystemMemInfo(total_mem, free_mem);
|
||||
|
||||
int64_t cgroup_limit_mem = std::numeric_limits<int64_t>::max();
|
||||
CommonUtil::GetSysCgroupMemLimit(cgroup_limit_mem);
|
||||
if (cgroup_limit_mem < total_mem && cache_size >= cgroup_limit_mem) {
|
||||
std::string msg = "Invalid cpu cache size: " + value +
|
||||
". Possible reason: cache.cache_size exceeds system cgroup memory.";
|
||||
return Status{SERVER_INVALID_ARGUMENT, msg};
|
||||
}
|
||||
|
||||
if (cache_size >= total_mem) {
|
||||
std::string msg =
|
||||
"Invalid cpu cache size: " + value + ". Possible reason: cache.cache_size exceeds system memory.";
|
||||
|
|
|
@ -53,6 +53,19 @@ CommonUtil::GetSystemMemInfo(int64_t& total_mem, int64_t& free_mem) {
|
|||
return ret == 0; // succeed 0, failed -1
|
||||
}
|
||||
|
||||
bool
|
||||
CommonUtil::GetSysCgroupMemLimit(int64_t& limit_in_bytes) {
|
||||
try {
|
||||
std::ifstream file("/sys/fs/cgroup/memory/memory.limit_in_bytes");
|
||||
file >> limit_in_bytes;
|
||||
} catch (std::exception& ex) {
|
||||
std::string msg =
|
||||
"Failed to read /sys/fs/cgroup/memory/memory.limit_in_bytes, reason: " + std::string(ex.what());
|
||||
LOG_SERVER_ERROR_ << msg;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
CommonUtil::GetSystemAvailableThreads(int64_t& thread_count) {
|
||||
// threadCnt = std::thread::hardware_concurrency();
|
||||
|
|
|
@ -24,6 +24,8 @@ class CommonUtil {
|
|||
static bool
|
||||
GetSystemMemInfo(int64_t& total_mem, int64_t& free_mem);
|
||||
static bool
|
||||
GetSysCgroupMemLimit(int64_t& limit_in_bytes);
|
||||
static bool
|
||||
GetSystemAvailableThreads(int64_t& thread_count);
|
||||
|
||||
static bool
|
||||
|
|
|
@ -294,8 +294,15 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) {
|
|||
// #2564
|
||||
int64_t total_mem = 0, free_mem = 0;
|
||||
milvus::server::CommonUtil::GetSystemMemInfo(total_mem, free_mem);
|
||||
int64_t cgroup_limit_size = 0;
|
||||
milvus::server::CommonUtil::GetSysCgroupMemLimit(cgroup_limit_size);
|
||||
ASSERT_TRUE(config.SetCacheConfigInsertBufferSize("1GB").ok());
|
||||
int64_t cache_cpu_cache_size = total_mem / 2;
|
||||
int64_t cache_cpu_cache_size = 0;
|
||||
if (cgroup_limit_size < total_mem) {
|
||||
cache_cpu_cache_size = cgroup_limit_size / 2;
|
||||
} else {
|
||||
cache_cpu_cache_size = total_mem / 2;
|
||||
}
|
||||
float cache_cpu_cache_threshold = 0.7;
|
||||
ASSERT_TRUE(config.SetCacheConfigCpuCacheThreshold(std::to_string(cache_cpu_cache_threshold)).ok());
|
||||
ASSERT_TRUE(config.SetCacheConfigCpuCacheCapacity(std::to_string(cache_cpu_cache_size)).ok());
|
||||
|
@ -306,14 +313,20 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) {
|
|||
{
|
||||
int64_t total_mem = 0, free_mem = 0;
|
||||
milvus::server::CommonUtil::GetSystemMemInfo(total_mem, free_mem);
|
||||
int64_t cgroup_limit_size = 0;
|
||||
milvus::server::CommonUtil::GetSysCgroupMemLimit(cgroup_limit_size);
|
||||
ASSERT_TRUE(config.SetCacheConfigInsertBufferSize("1GB").ok());
|
||||
int64_t cache_cpu_cache_size = total_mem - 1073741824 - 1; // total_size - 1GB - 1
|
||||
int64_t cache_cpu_cache_size = 0;
|
||||
if (cgroup_limit_size < total_mem) {
|
||||
cache_cpu_cache_size = cgroup_limit_size - 1073741824 - 1;
|
||||
} else {
|
||||
cache_cpu_cache_size = total_mem - 1073741824 - 1; // total_size - 1GB - 1
|
||||
}
|
||||
ASSERT_TRUE(config.SetCacheConfigCpuCacheCapacity(std::to_string(cache_cpu_cache_size)).ok());
|
||||
ASSERT_TRUE(config.GetCacheConfigCpuCacheCapacity(int64_val).ok());
|
||||
ASSERT_TRUE(int64_val == cache_cpu_cache_size);
|
||||
}
|
||||
|
||||
|
||||
/* engine config */
|
||||
int64_t engine_use_blas_threshold = 50;
|
||||
ASSERT_TRUE(config.SetEngineConfigUseBlasThreshold(std::to_string(engine_use_blas_threshold)).ok());
|
||||
|
@ -498,8 +511,6 @@ TEST_F(ConfigTest, SERVER_CONFIG_CLI_TEST) {
|
|||
s = config.ProcessConfigCli(result, get_cmd);
|
||||
ASSERT_TRUE(s.ok());
|
||||
|
||||
|
||||
|
||||
/* cache config */
|
||||
std::string cache_cpu_cache_capacity = "1";
|
||||
get_cmd = gen_get_command(ms::CONFIG_CACHE, ms::CONFIG_CACHE_CPU_CACHE_CAPACITY);
|
||||
|
@ -681,7 +692,6 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) {
|
|||
|
||||
ASSERT_FALSE(config.SetDBConfigArchiveDaysThreshold("0x10").ok());
|
||||
|
||||
|
||||
/* storage config */
|
||||
ASSERT_FALSE(config.SetStorageConfigPath("").ok());
|
||||
ASSERT_FALSE(config.SetStorageConfigPath("./milvus").ok());
|
||||
|
@ -1288,8 +1298,8 @@ TEST_F(ConfigTest, SERVER_CONFIG_UPDATE_TEST) {
|
|||
std::string reply_set, reply_get;
|
||||
std::string cmd_set, cmd_get;
|
||||
|
||||
auto lambda = [&conf_file](const std::string& key, const std::string& child_key,
|
||||
const std::string& default_value, std::string& value) {
|
||||
auto lambda = [&conf_file](const std::string& key, const std::string& child_key, const std::string& default_value,
|
||||
std::string& value) {
|
||||
auto* ymgr = milvus::server::YamlConfigMgr::GetInstance();
|
||||
auto status = ymgr->LoadConfigFile(conf_file);
|
||||
|
||||
|
@ -1310,52 +1320,58 @@ TEST_F(ConfigTest, SERVER_CONFIG_UPDATE_TEST) {
|
|||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_CACHE, ms::CONFIG_CACHE_INSERT_BUFFER_SIZE,
|
||||
ms::CONFIG_CACHE_INSERT_BUFFER_SIZE_DEFAULT, yaml_value).ok());
|
||||
ms::CONFIG_CACHE_INSERT_BUFFER_SIZE_DEFAULT, yaml_value)
|
||||
.ok());
|
||||
ASSERT_EQ("2", yaml_value);
|
||||
|
||||
// test boolean config value
|
||||
cmd_set = gen_set_command(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, "True");
|
||||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR,
|
||||
ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT, yaml_value).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT,
|
||||
yaml_value)
|
||||
.ok());
|
||||
ASSERT_EQ("true", yaml_value);
|
||||
|
||||
cmd_set = gen_set_command(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, "On");
|
||||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR,
|
||||
ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT, yaml_value).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT,
|
||||
yaml_value)
|
||||
.ok());
|
||||
ASSERT_EQ("true", yaml_value);
|
||||
|
||||
cmd_set = gen_set_command(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, "False");
|
||||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR,
|
||||
ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT, yaml_value).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT,
|
||||
yaml_value)
|
||||
.ok());
|
||||
ASSERT_EQ("false", yaml_value);
|
||||
|
||||
cmd_set = gen_set_command(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, "Off");
|
||||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR,
|
||||
ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT, yaml_value).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_METRIC, ms::CONFIG_METRIC_ENABLE_MONITOR, ms::CONFIG_METRIC_ENABLE_MONITOR_DEFAULT,
|
||||
yaml_value)
|
||||
.ok());
|
||||
ASSERT_EQ("false", yaml_value);
|
||||
|
||||
// test path
|
||||
cmd_set = gen_set_command(ms::CONFIG_STORAGE, ms::CONFIG_STORAGE_PATH, "/tmp/milvus_config_unittest");
|
||||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_STORAGE, ms::CONFIG_STORAGE_PATH,
|
||||
ms::CONFIG_STORAGE_PATH_DEFAULT, yaml_value).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_STORAGE, ms::CONFIG_STORAGE_PATH, ms::CONFIG_STORAGE_PATH_DEFAULT, yaml_value).ok());
|
||||
ASSERT_EQ("/tmp/milvus_config_unittest", yaml_value);
|
||||
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
cmd_set = gen_set_command(ms::CONFIG_GPU_RESOURCE, ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, "gpu0");
|
||||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_GPU_RESOURCE, ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES,
|
||||
ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT, yaml_value).ok());
|
||||
ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT, yaml_value)
|
||||
.ok());
|
||||
ASSERT_EQ("gpu0", yaml_value);
|
||||
|
||||
cmd_set = gen_set_command(ms::CONFIG_GPU_RESOURCE, ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, "GPU0");
|
||||
ASSERT_TRUE(config.ProcessConfigCli(reply_set, cmd_set).ok());
|
||||
ASSERT_TRUE(lambda(ms::CONFIG_GPU_RESOURCE, ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES,
|
||||
ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT, yaml_value).ok());
|
||||
ms::CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT, yaml_value)
|
||||
.ok());
|
||||
ASSERT_EQ("gpu0", yaml_value);
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue