enhance: optimize the loading index performance (#29894) (#30018)

this utilizes concurrent loading
pr: #29894

Signed-off-by: yah01 <yang.cen@zilliz.com>
pull/30184/head
yah01 2024-01-22 13:12:56 +08:00 committed by GitHub
parent bac1a1355b
commit a8d9b0ccba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 15 additions and 21 deletions

View File

@ -145,8 +145,6 @@ VectorMemIndex::Load(const Config& config) {
.empty()) { // load with the slice meta info, then we can load batch by batch
std::string index_file_prefix = slice_meta_filepath.substr(
0, slice_meta_filepath.find_last_of('/') + 1);
std::vector<std::string> batch{};
batch.reserve(parallel_degree);
auto result = file_manager_->LoadIndexToMemory({slice_meta_filepath});
auto raw_slice_meta = result[INDEX_FILE_SLICE_META];
@ -161,30 +159,26 @@ VectorMemIndex::Load(const Config& config) {
auto new_field_data =
milvus::storage::CreateFieldData(DataType::INT8, 1, total_len);
auto HandleBatch = [&](int index) {
auto batch_data = file_manager_->LoadIndexToMemory(batch);
for (int j = index - batch.size() + 1; j <= index; j++) {
std::string file_name = GenSlicedFileName(prefix, j);
AssertInfo(batch_data.find(file_name) != batch_data.end(),
"lost index slice data");
auto data = batch_data[file_name];
new_field_data->FillFieldData(data->Data(), data->Size());
}
for (auto& file : batch) {
pending_index_files.erase(file);
}
batch.clear();
};
std::vector<std::string> batch;
batch.reserve(slice_num);
for (auto i = 0; i < slice_num; ++i) {
std::string file_name = GenSlicedFileName(prefix, i);
batch.push_back(index_file_prefix + file_name);
if (batch.size() >= parallel_degree) {
HandleBatch(i);
}
}
if (batch.size() > 0) {
HandleBatch(slice_num - 1);
auto batch_data = file_manager_->LoadIndexToMemory(batch);
for (const auto& file_path : batch) {
const std::string file_name =
file_path.substr(file_path.find_last_of('/') + 1);
AssertInfo(batch_data.find(file_name) != batch_data.end(),
"lost index slice data: {}",
file_name);
auto data = batch_data[file_name];
new_field_data->FillFieldData(data->Data(), data->Size());
}
for (auto& file : batch) {
pending_index_files.erase(file);
}
AssertInfo(