#1580 Old segment folder not removed after merge / compact if create_index is called before adding data

Signed-off-by: groot <yihua.mo@zilliz.com>
pull/1592/head
groot 2020-03-09 20:52:10 +08:00
parent b20e16aba1
commit 13d3cff9b7
3 changed files with 95 additions and 34 deletions

View File

@ -50,6 +50,7 @@ Please mark all change in change log and use the issue from GitHub
- \#1571 Meta engine type become IDMAP after dropping index for BINARY table
- \#1574 Set all existing bitset in cache when applying deletes
- \#1577 Row count incorrect if delete vectors then create index
- \#1580 Old segment folder not removed after merge/compact if create_index is called before adding data
## Feature
- \#216 Add CLI to get server info

View File

@ -2026,6 +2026,7 @@ Status
MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) {
auto now = utils::GetMicroSecTimeStamp();
std::set<std::string> table_ids;
std::map<std::string, TableFileSchema> segment_ids;
// remove to_delete files
try {
@ -2053,7 +2054,7 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/)
mysqlpp::StoreQueryResult res = query.store();
TableFileSchema table_file;
std::vector<std::string> idsToDelete;
std::vector<std::string> delete_ids;
int64_t clean_files = 0;
for (auto& resRow : res) {
@ -2078,30 +2079,22 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/)
server::CommonUtil::EraseFromCache(table_file.location_);
if (table_file.file_type_ == (int)TableFileSchema::TO_DELETE) {
// If we are deleting a raw table file, it means it's okay to delete the entire segment directory.
// Else, we can only delete the single file
// TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky
if (utils::IsRawIndexType(table_file.engine_type_)) {
utils::DeleteSegment(options_, table_file);
std::string segment_dir;
utils::GetParentPath(table_file.location_, segment_dir);
ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir;
} else {
utils::DeleteTableFilePath(options_, table_file);
ENGINE_LOG_DEBUG << "Remove table file: " << table_file.location_;
}
// delete file from disk storage
utils::DeleteTableFilePath(options_, table_file);
ENGINE_LOG_DEBUG << "Remove file id:" << table_file.id_ << " location:" << table_file.location_;
idsToDelete.emplace_back(std::to_string(table_file.id_));
delete_ids.emplace_back(std::to_string(table_file.id_));
table_ids.insert(table_file.table_id_);
segment_ids.insert(std::make_pair(table_file.segment_id_, table_file));
++clean_files;
clean_files++;
}
}
// delete file from meta
if (!idsToDelete.empty()) {
if (!delete_ids.empty()) {
std::stringstream idsToDeleteSS;
for (auto& id : idsToDelete) {
for (auto& id : delete_ids) {
idsToDeleteSS << "id = " << id << " OR ";
}
@ -2217,6 +2210,51 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/)
return HandleException("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", e.what());
}
// remove deleted segment folder
// don't remove segment folder until all its tablefiles has been deleted
try {
server::MetricCollector metric;
{
mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);
bool is_null_connection = (connectionPtr == nullptr);
fiu_do_on("MySQLMetaImpl.CleanUpFilesWithTTL.RemoveDeletedSegmentFolder_NUllConnection",
is_null_connection = true);
fiu_do_on("MySQLMetaImpl.CleanUpFilesWithTTL.RemoveDeletedSegmentFolder_ThrowException",
throw std::exception(););
if (is_null_connection) {
return Status(DB_ERROR, "Failed to connect to meta server(mysql)");
}
int64_t remove_segments = 0;
for (auto& segment_id : segment_ids) {
mysqlpp::Query query = connectionPtr->query();
query << "SELECT id"
<< " FROM " << META_TABLEFILES << " WHERE segment_id = " << mysqlpp::quote << segment_id.first
<< ";";
ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUpFilesWithTTL: " << query.str();
mysqlpp::StoreQueryResult res = query.store();
if (res.empty()) {
utils::DeleteSegment(options_, segment_id.second);
std::string segment_dir;
utils::GetParentPath(segment_id.second.location_, segment_dir);
ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir;
++remove_segments;
}
}
if (remove_segments > 0) {
ENGINE_LOG_DEBUG << "Remove " << remove_segments << " segments folder";
}
}
} catch (std::exception& e) {
return HandleException("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", e.what());
}
return Status::OK();
}

View File

@ -1231,11 +1231,13 @@ SqliteMetaImpl::FilesByType(const std::string& table_id, const std::vector<int>&
break;
case (int)TableFileSchema::NEW:msg = msg + " new files:" + std::to_string(new_count);
break;
case (int)TableFileSchema::NEW_MERGE:msg = msg + " new_merge files:"
+ std::to_string(new_merge_count);
case (int)TableFileSchema::NEW_MERGE:
msg = msg + " new_merge files:"
+ std::to_string(new_merge_count);
break;
case (int)TableFileSchema::NEW_INDEX:msg = msg + " new_index files:"
+ std::to_string(new_index_count);
case (int)TableFileSchema::NEW_INDEX:
msg = msg + " new_index files:"
+ std::to_string(new_index_count);
break;
case (int)TableFileSchema::TO_INDEX:msg = msg + " to_index files:" + std::to_string(to_index_count);
break;
@ -1360,6 +1362,7 @@ Status
SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) {
auto now = utils::GetMicroSecTimeStamp();
std::set<std::string> table_ids;
std::map<std::string, TableFileSchema> segment_ids;
// remove to_delete files
try {
@ -1409,23 +1412,16 @@ SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/
server::CommonUtil::EraseFromCache(table_file.location_);
if (table_file.file_type_ == (int)TableFileSchema::TO_DELETE) {
// If we are deleting a raw table file, it means it's okay to delete the entire segment directory.
// Else, we can only delete the single file
// TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky
if (utils::IsRawIndexType(table_file.engine_type_)) {
utils::DeleteSegment(options_, table_file);
std::string segment_dir;
utils::GetParentPath(table_file.location_, segment_dir);
ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir;
} else {
utils::DeleteTableFilePath(options_, table_file);
ENGINE_LOG_DEBUG << "Remove table file: " << table_file.location_;
}
// delete file from meta
ConnectorPtr->remove<TableFileSchema>(table_file.id_);
// delete file from disk storage
utils::DeleteTableFilePath(options_, table_file);
ENGINE_LOG_DEBUG << "Remove file id:" << table_file.file_id_ << " location:"
<< table_file.location_;
table_ids.insert(table_file.table_id_);
segment_ids.insert(std::make_pair(table_file.segment_id_, table_file));
++clean_files;
}
@ -1500,6 +1496,32 @@ SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/
return HandleException("Encounter exception when delete table folder", e.what());
}
// remove deleted segment folder
// don't remove segment folder until all its tablefiles has been deleted
try {
fiu_do_on("SqliteMetaImpl.CleanUpFilesWithTTL.RemoveSegmentFolder_ThrowException", throw std::exception());
server::MetricCollector metric;
int64_t remove_segments = 0;
for (auto& segment_id : segment_ids) {
auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_),
where(c(&TableFileSchema::segment_id_) == segment_id.first));
if (selected.size() == 0) {
utils::DeleteSegment(options_, segment_id.second);
std::string segment_dir;
utils::GetParentPath(segment_id.second.location_, segment_dir);
ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir;
++remove_segments;
}
}
if (remove_segments > 0) {
ENGINE_LOG_DEBUG << "Remove " << remove_segments << " segments folder";
}
} catch (std::exception& e) {
return HandleException("Encounter exception when delete table folder", e.what());
}
return Status::OK();
}