mirror of https://github.com/milvus-io/milvus.git
Merge branch 'branch-0.3.1' into 'branch-0.3.1'
Branch 0.3.1 See merge request megasearch/milvus!204 Former-commit-id: 776b4ad0582b1f585f1ce311b481426240b39505pull/191/head
commit
3c33d73cb1
|
@ -17,3 +17,4 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||||
- MS-1 - Add CHANGELOG.md
|
- MS-1 - Add CHANGELOG.md
|
||||||
- MS-161 - Add CI / CD Module to Milvus Project
|
- MS-161 - Add CI / CD Module to Milvus Project
|
||||||
- MS-202 - Add Milvus Jenkins project email notification
|
- MS-202 - Add Milvus Jenkins project email notification
|
||||||
|
- MS-215 - Add Milvus cluster CI/CD groovy file
|
||||||
|
|
|
@ -12,6 +12,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||||
- MS-153 - fix c_str error when connecting to MySQL
|
- MS-153 - fix c_str error when connecting to MySQL
|
||||||
- MS-157 - fix changelog
|
- MS-157 - fix changelog
|
||||||
- MS-190 - use env variable to switch mem manager and fix cmake
|
- MS-190 - use env variable to switch mem manager and fix cmake
|
||||||
|
- MS-217 - Fix SQ8 row count bug
|
||||||
- MS-224 - Return AlreadyExist status in MySQLMetaImpl::CreateTable if table already exists
|
- MS-224 - Return AlreadyExist status in MySQLMetaImpl::CreateTable if table already exists
|
||||||
|
|
||||||
## Improvement
|
## Improvement
|
||||||
|
|
|
@ -14,8 +14,8 @@ db_config:
|
||||||
db_backend_url: sqlite://:@:/
|
db_backend_url: sqlite://:@:/
|
||||||
|
|
||||||
index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB
|
index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB
|
||||||
archive_disk_threshold: 512 # triger archive action if storage size exceed this value, unit: GB
|
archive_disk_threshold: 0 # triger archive action if storage size exceed this value, 0 means no limit, unit: GB
|
||||||
archive_days_threshold: 30 # files older than x days will be archived, unit: day
|
archive_days_threshold: 0 # files older than x days will be archived, 0 means no limit, unit: day
|
||||||
maximum_memory: 4 # maximum memory allowed, default: 4, unit: GB, should be at least 1 GB.
|
maximum_memory: 4 # maximum memory allowed, default: 4, unit: GB, should be at least 1 GB.
|
||||||
# the sum of maximum_memory and cpu_cache_capacity should be less than total memory
|
# the sum of maximum_memory and cpu_cache_capacity should be less than total memory
|
||||||
|
|
||||||
|
@ -39,4 +39,4 @@ engine_config:
|
||||||
nprobe: 10
|
nprobe: 10
|
||||||
nlist: 16384
|
nlist: 16384
|
||||||
use_blas_threshold: 20
|
use_blas_threshold: 20
|
||||||
metric_type: L2 #L2 or Inner Product
|
metric_type: L2 # compare vectors by euclidean distance(L2) or inner product(IP), optional: L2 or IP
|
||||||
|
|
|
@ -20,6 +20,11 @@ public:
|
||||||
: index_(index)
|
: index_(index)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
DataObj(const engine::Index_ptr& index, int64_t size)
|
||||||
|
: index_(index),
|
||||||
|
size_(size)
|
||||||
|
{}
|
||||||
|
|
||||||
engine::Index_ptr data() { return index_; }
|
engine::Index_ptr data() { return index_; }
|
||||||
const engine::Index_ptr& data() const { return index_; }
|
const engine::Index_ptr& data() const { return index_; }
|
||||||
|
|
||||||
|
@ -28,11 +33,16 @@ public:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(size_ > 0) {
|
||||||
|
return size_;
|
||||||
|
}
|
||||||
|
|
||||||
return index_->ntotal*(index_->dim*4);
|
return index_->ntotal*(index_->dim*4);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
engine::Index_ptr index_ = nullptr;
|
engine::Index_ptr index_ = nullptr;
|
||||||
|
int64_t size_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using DataObjPtr = std::shared_ptr<DataObj>;
|
using DataObjPtr = std::shared_ptr<DataObj>;
|
||||||
|
|
|
@ -169,7 +169,10 @@ Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return QueryAsync(table_id, file_id_array, k, nq, vectors, dates, results);
|
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info before query
|
||||||
|
status = QueryAsync(table_id, file_id_array, k, nq, vectors, dates, results);
|
||||||
|
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info after query
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_ids,
|
Status DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_ids,
|
||||||
|
@ -194,7 +197,10 @@ Status DBImpl::Query(const std::string& table_id, const std::vector<std::string>
|
||||||
return Status::Error("Invalid file id");
|
return Status::Error("Invalid file id");
|
||||||
}
|
}
|
||||||
|
|
||||||
return QueryAsync(table_id, files_array, k, nq, vectors, dates, results);
|
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info before query
|
||||||
|
status = QueryAsync(table_id, files_array, k, nq, vectors, dates, results);
|
||||||
|
cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info after query
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files,
|
Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files,
|
||||||
|
@ -486,7 +492,7 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
|
||||||
|
|
||||||
//step 6: update meta
|
//step 6: update meta
|
||||||
table_file.file_type_ = meta::TableFileSchema::INDEX;
|
table_file.file_type_ = meta::TableFileSchema::INDEX;
|
||||||
table_file.size_ = index->PhysicalSize();
|
table_file.size_ = index->Size();
|
||||||
|
|
||||||
auto to_remove = file;
|
auto to_remove = file;
|
||||||
to_remove.file_type_ = meta::TableFileSchema::TO_DELETE;
|
to_remove.file_type_ = meta::TableFileSchema::TO_DELETE;
|
||||||
|
|
|
@ -674,16 +674,22 @@ Status DBMetaImpl::Archive() {
|
||||||
Status DBMetaImpl::Size(uint64_t &result) {
|
Status DBMetaImpl::Size(uint64_t &result) {
|
||||||
result = 0;
|
result = 0;
|
||||||
try {
|
try {
|
||||||
auto selected = ConnectorPtr->select(columns(sum(&TableFileSchema::size_)),
|
auto files = ConnectorPtr->select(columns(&TableFileSchema::size_,
|
||||||
where(
|
&TableFileSchema::file_type_,
|
||||||
c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE
|
&TableFileSchema::engine_type_),
|
||||||
));
|
where(
|
||||||
|
c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE
|
||||||
|
));
|
||||||
|
|
||||||
for (auto &sub_query : selected) {
|
for (auto &file : files) {
|
||||||
if (!std::get<0>(sub_query)) {
|
auto file_size = std::get<0>(file);
|
||||||
continue;
|
auto file_type = std::get<1>(file);
|
||||||
|
auto engine_type = std::get<2>(file);
|
||||||
|
if(file_type == (int)TableFileSchema::INDEX && engine_type == (int)EngineType::FAISS_IVFSQ8) {
|
||||||
|
result += (uint64_t)file_size/4;//hardcode for sq8
|
||||||
|
} else {
|
||||||
|
result += (uint64_t)file_size;
|
||||||
}
|
}
|
||||||
result += (uint64_t) (*std::get<0>(sub_query));
|
|
||||||
}
|
}
|
||||||
} catch (std::exception &e) {
|
} catch (std::exception &e) {
|
||||||
return HandleException("Encounter exception when calculte db size", e);
|
return HandleException("Encounter exception when calculte db size", e);
|
||||||
|
|
|
@ -110,7 +110,7 @@ Status FaissExecutionEngine::Merge(const std::string& location) {
|
||||||
if (location == location_) {
|
if (location == location_) {
|
||||||
return Status::Error("Cannot Merge Self");
|
return Status::Error("Cannot Merge Self");
|
||||||
}
|
}
|
||||||
ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_;
|
ENGINE_LOG_DEBUG << "Merge raw file: " << location << " to: " << location_;
|
||||||
|
|
||||||
auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
|
auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
|
||||||
if (!to_merge) {
|
if (!to_merge) {
|
||||||
|
@ -165,8 +165,9 @@ Status FaissExecutionEngine::Search(long n,
|
||||||
}
|
}
|
||||||
|
|
||||||
Status FaissExecutionEngine::Cache() {
|
Status FaissExecutionEngine::Cache() {
|
||||||
zilliz::milvus::cache::CpuCacheMgr::GetInstance(
|
auto index = std::make_shared<Index>(pIndex_);
|
||||||
)->InsertItem(location_, std::make_shared<Index>(pIndex_));
|
cache::DataObjPtr data_obj = std::make_shared<cache::DataObj>(index, PhysicalSize());
|
||||||
|
zilliz::milvus::cache::CpuCacheMgr::GetInstance()->InsertItem(location_, data_obj);
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,10 @@ void ArchiveConf::ParseCritirias(const std::string& criterias) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& token : tokens) {
|
for (auto& token : tokens) {
|
||||||
|
if(token.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::string> kv;
|
std::vector<std::string> kv;
|
||||||
boost::algorithm::split(kv, token, boost::is_any_of(":"));
|
boost::algorithm::split(kv, token, boost::is_any_of(":"));
|
||||||
if (kv.size() != 2) {
|
if (kv.size() != 2) {
|
||||||
|
|
|
@ -22,7 +22,7 @@ static constexpr uint64_t ONE_GB = ONE_KB*ONE_MB;
|
||||||
|
|
||||||
static const std::string ARCHIVE_CONF_DISK = "disk";
|
static const std::string ARCHIVE_CONF_DISK = "disk";
|
||||||
static const std::string ARCHIVE_CONF_DAYS = "days";
|
static const std::string ARCHIVE_CONF_DAYS = "days";
|
||||||
static const std::string ARCHIVE_CONF_DEFAULT = ARCHIVE_CONF_DISK + ":512";
|
static const std::string ARCHIVE_CONF_DEFAULT = "";
|
||||||
|
|
||||||
struct ArchiveConf {
|
struct ArchiveConf {
|
||||||
using CriteriaT = std::map<std::string, int>;
|
using CriteriaT = std::map<std::string, int>;
|
||||||
|
|
|
@ -107,7 +107,7 @@ Status SearchTask::ClusterResult(const std::vector<long> &output_ids,
|
||||||
uint64_t nq,
|
uint64_t nq,
|
||||||
uint64_t topk,
|
uint64_t topk,
|
||||||
SearchContext::ResultSet &result_set) {
|
SearchContext::ResultSet &result_set) {
|
||||||
if(output_ids.size() != nq*topk || output_distence.size() != nq*topk) {
|
if(output_ids.size() < nq*topk || output_distence.size() < nq*topk) {
|
||||||
std::string msg = "Invalid id array size: " + std::to_string(output_ids.size()) +
|
std::string msg = "Invalid id array size: " + std::to_string(output_ids.size()) +
|
||||||
" distance array size: " + std::to_string(output_distence.size());
|
" distance array size: " + std::to_string(output_distence.size());
|
||||||
SERVER_LOG_ERROR << msg;
|
SERVER_LOG_ERROR << msg;
|
||||||
|
|
|
@ -23,6 +23,7 @@ namespace {
|
||||||
static constexpr int64_t TOP_K = 10;
|
static constexpr int64_t TOP_K = 10;
|
||||||
static constexpr int64_t SEARCH_TARGET = 5000; //change this value, result is different
|
static constexpr int64_t SEARCH_TARGET = 5000; //change this value, result is different
|
||||||
static constexpr int64_t ADD_VECTOR_LOOP = 10;
|
static constexpr int64_t ADD_VECTOR_LOOP = 10;
|
||||||
|
static constexpr int64_t SECONDS_EACH_HOUR = 3600;
|
||||||
|
|
||||||
#define BLOCK_SPLITER std::cout << "===========================================" << std::endl;
|
#define BLOCK_SPLITER std::cout << "===========================================" << std::endl;
|
||||||
|
|
||||||
|
@ -59,7 +60,7 @@ namespace {
|
||||||
std::string CurrentTime() {
|
std::string CurrentTime() {
|
||||||
time_t tt;
|
time_t tt;
|
||||||
time( &tt );
|
time( &tt );
|
||||||
tt = tt + 8*3600;
|
tt = tt + 8*SECONDS_EACH_HOUR;
|
||||||
tm* t= gmtime( &tt );
|
tm* t= gmtime( &tt );
|
||||||
|
|
||||||
std::string str = std::to_string(t->tm_year + 1900) + "_" + std::to_string(t->tm_mon + 1)
|
std::string str = std::to_string(t->tm_year + 1900) + "_" + std::to_string(t->tm_mon + 1)
|
||||||
|
@ -69,10 +70,11 @@ namespace {
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string CurrentTmDate() {
|
std::string CurrentTmDate(int64_t offset_day = 0) {
|
||||||
time_t tt;
|
time_t tt;
|
||||||
time( &tt );
|
time( &tt );
|
||||||
tt = tt + 8*3600;
|
tt = tt + 8*SECONDS_EACH_HOUR;
|
||||||
|
tt = tt + 24*SECONDS_EACH_HOUR*offset_day;
|
||||||
tm* t= gmtime( &tt );
|
tm* t= gmtime( &tt );
|
||||||
|
|
||||||
std::string str = std::to_string(t->tm_year + 1900) + "-" + std::to_string(t->tm_mon + 1)
|
std::string str = std::to_string(t->tm_year + 1900) + "-" + std::to_string(t->tm_mon + 1)
|
||||||
|
@ -160,7 +162,7 @@ namespace {
|
||||||
std::vector<Range> query_range_array;
|
std::vector<Range> query_range_array;
|
||||||
Range rg;
|
Range rg;
|
||||||
rg.start_value = CurrentTmDate();
|
rg.start_value = CurrentTmDate();
|
||||||
rg.end_value = CurrentTmDate();
|
rg.end_value = CurrentTmDate(1);
|
||||||
query_range_array.emplace_back(rg);
|
query_range_array.emplace_back(rg);
|
||||||
|
|
||||||
std::vector<RowRecord> record_array;
|
std::vector<RowRecord> record_array;
|
||||||
|
|
|
@ -28,6 +28,15 @@ IndexType resolveIndexType(const string &index_type) {
|
||||||
return IndexType::Invalid_Option;
|
return IndexType::Invalid_Option;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int CalcBacketCount(int nb, size_t nlist) {
|
||||||
|
int backet_count = int(nb / 1000000.0 * nlist);
|
||||||
|
if(backet_count == 0) {
|
||||||
|
backet_count = 1; //avoid faiss rash
|
||||||
|
}
|
||||||
|
|
||||||
|
return backet_count;
|
||||||
|
}
|
||||||
|
|
||||||
// nb at least 100
|
// nb at least 100
|
||||||
string Operand::get_index_type(const int &nb) {
|
string Operand::get_index_type(const int &nb) {
|
||||||
if (!index_str.empty()) { return index_str; }
|
if (!index_str.empty()) { return index_str; }
|
||||||
|
@ -45,7 +54,7 @@ string Operand::get_index_type(const int &nb) {
|
||||||
size_t nlist = engine_config.GetInt32Value(CONFIG_NLIST, 16384);
|
size_t nlist = engine_config.GetInt32Value(CONFIG_NLIST, 16384);
|
||||||
|
|
||||||
index_str += (ncent != 0 ? index_type + std::to_string(ncent) :
|
index_str += (ncent != 0 ? index_type + std::to_string(ncent) :
|
||||||
index_type + std::to_string(int(nb / 1000000.0 * nlist)));
|
index_type + std::to_string(CalcBacketCount(nb, nlist)));
|
||||||
// std::cout<<"nlist = "<<nlist<<std::endl;
|
// std::cout<<"nlist = "<<nlist<<std::endl;
|
||||||
if (!postproc.empty()) { index_str += ("," + postproc); }
|
if (!postproc.empty()) { index_str += ("," + postproc); }
|
||||||
break;
|
break;
|
||||||
|
@ -58,7 +67,7 @@ string Operand::get_index_type(const int &nb) {
|
||||||
size_t nlist = engine_config.GetInt32Value(CONFIG_NLIST, 16384);
|
size_t nlist = engine_config.GetInt32Value(CONFIG_NLIST, 16384);
|
||||||
|
|
||||||
index_str += (ncent != 0 ? "IVF" + std::to_string(ncent) :
|
index_str += (ncent != 0 ? "IVF" + std::to_string(ncent) :
|
||||||
"IVF" + std::to_string(int(nb / 1000000.0 * nlist)));
|
"IVF" + std::to_string(CalcBacketCount(nb, nlist)));
|
||||||
index_str += ",SQ8";
|
index_str += ",SQ8";
|
||||||
// std::cout<<"nlist = "<<nlist<<std::endl;
|
// std::cout<<"nlist = "<<nlist<<std::endl;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -55,15 +55,13 @@ TEST_F(DBTest, CONFIG_TEST) {
|
||||||
engine::ArchiveConf conf("delete");
|
engine::ArchiveConf conf("delete");
|
||||||
ASSERT_EQ(conf.GetType(), "delete");
|
ASSERT_EQ(conf.GetType(), "delete");
|
||||||
auto criterias = conf.GetCriterias();
|
auto criterias = conf.GetCriterias();
|
||||||
ASSERT_TRUE(criterias.size() == 1);
|
ASSERT_TRUE(criterias.size() == 0);
|
||||||
ASSERT_TRUE(criterias["disk"] == 512);
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
engine::ArchiveConf conf("swap");
|
engine::ArchiveConf conf("swap");
|
||||||
ASSERT_EQ(conf.GetType(), "swap");
|
ASSERT_EQ(conf.GetType(), "swap");
|
||||||
auto criterias = conf.GetCriterias();
|
auto criterias = conf.GetCriterias();
|
||||||
ASSERT_TRUE(criterias.size() == 1);
|
ASSERT_TRUE(criterias.size() == 0);
|
||||||
ASSERT_TRUE(criterias["disk"] == 512);
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
|
ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
|
||||||
|
@ -209,11 +207,21 @@ TEST_F(DBTest, SEARCH_TEST) {
|
||||||
ASSERT_STATS(stat);
|
ASSERT_STATS(stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
sleep(2); // wait until build index finish
|
db_->BuildIndex(TABLE_NAME); // wait until build index finish
|
||||||
|
|
||||||
engine::QueryResults results;
|
{
|
||||||
stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results);
|
engine::QueryResults results;
|
||||||
ASSERT_STATS(stat);
|
stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results);
|
||||||
|
ASSERT_STATS(stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
{//search by specify index file
|
||||||
|
engine::meta::DatesT dates;
|
||||||
|
std::vector<std::string> file_ids = {"1", "2", "3", "4"};
|
||||||
|
engine::QueryResults results;
|
||||||
|
stat = db_->Query(TABLE_NAME, file_ids, k, nq, xq.data(), dates, results);
|
||||||
|
ASSERT_STATS(stat);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(linxj): add groundTruth assert
|
// TODO(linxj): add groundTruth assert
|
||||||
};
|
};
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "db/Options.h"
|
#include "db/Options.h"
|
||||||
#include "db/DBMetaImpl.h"
|
#include "db/DBMetaImpl.h"
|
||||||
#include "db/EngineFactory.h"
|
#include "db/EngineFactory.h"
|
||||||
|
#include "db/Utils.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -135,3 +136,31 @@ TEST(DBMiscTest, META_TEST) {
|
||||||
engine::meta::DateT dt = impl.GetDate(tt, delta);
|
engine::meta::DateT dt = impl.GetDate(tt, delta);
|
||||||
ASSERT_GT(dt, 0);
|
ASSERT_GT(dt, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(DBMiscTest, UTILS_TEST) {
|
||||||
|
engine::DBMetaOptions options;
|
||||||
|
options.path = "/tmp/milvus_test/main";
|
||||||
|
options.slave_paths.push_back("/tmp/milvus_test/slave_1");
|
||||||
|
options.slave_paths.push_back("/tmp/milvus_test/slave_2");
|
||||||
|
|
||||||
|
const std::string TABLE_NAME = "test_tbl";
|
||||||
|
auto status = engine::utils::CreateTablePath(options, TABLE_NAME);
|
||||||
|
ASSERT_TRUE(status.ok());
|
||||||
|
ASSERT_TRUE(boost::filesystem::exists(options.path));
|
||||||
|
for(auto& path : options.slave_paths) {
|
||||||
|
ASSERT_TRUE(boost::filesystem::exists(path));
|
||||||
|
}
|
||||||
|
|
||||||
|
engine::meta::TableFileSchema file;
|
||||||
|
file.id_ = 50;
|
||||||
|
file.table_id_ = TABLE_NAME;
|
||||||
|
file.file_type_ = 3;
|
||||||
|
file.date_ = 155000;
|
||||||
|
status = engine::utils::GetTableFilePath(options, file);
|
||||||
|
ASSERT_FALSE(status.ok());
|
||||||
|
ASSERT_TRUE(file.location_.empty());
|
||||||
|
|
||||||
|
status = engine::utils::DeleteTablePath(options, TABLE_NAME);
|
||||||
|
ASSERT_TRUE(status.ok());
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue