fix:fix incorrect dir operations when create or load inverted index (#38359)

#37944

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
pull/38537/head
zhagnlu 2024-12-17 20:06:45 +08:00 committed by GitHub
parent 93fba1d5ab
commit 9afcc5bc5c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 75 additions and 28 deletions

View File

@ -70,12 +70,8 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
}
template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx)
: ScalarIndex<T>(INVERTED_INDEX_TYPE),
schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
void
InvertedIndexTantivy<T>::InitForBuildIndex() {
auto field =
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
auto prefix = disk_file_manager_->GetIndexIdentifier();
@ -83,13 +79,26 @@ InvertedIndexTantivy<T>::InvertedIndexTantivy(
boost::filesystem::create_directories(path_);
d_type_ = get_tantivy_data_type(schema_);
if (tantivy_index_exist(path_.c_str())) {
LOG_INFO(
"index {} already exists, which should happen in loading progress",
path_);
} else {
wrapper_ = std::make_shared<TantivyIndexWrapper>(
field.c_str(), d_type_, path_.c_str());
PanicInfo(IndexBuildError,
"build inverted index temp dir:{} not empty",
path_);
}
wrapper_ = std::make_shared<TantivyIndexWrapper>(
field.c_str(), d_type_, path_.c_str());
}
template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx)
: ScalarIndex<T>(INVERTED_INDEX_TYPE),
schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
// push init wrapper to load process
if (ctx.for_loading_index) {
return;
}
InitForBuildIndex();
}
template <typename T>
@ -97,6 +106,7 @@ InvertedIndexTantivy<T>::~InvertedIndexTantivy() {
auto local_chunk_manager =
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto prefix = path_;
LOG_INFO("inverted index remove path:{}", path_);
local_chunk_manager->RemoveDir(prefix);
}
@ -214,6 +224,7 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
(size_t)index_valid_data->size);
}
disk_file_manager_->CacheIndexToDisk(files_value);
path_ = prefix;
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
}

View File

@ -42,6 +42,8 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
~InvertedIndexTantivy();
void
InitForBuildIndex();
/*
* deprecated.
* TODO: why not remove this?

View File

@ -157,6 +157,8 @@ appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
milvus::storage::FileManagerContext fileManagerContext(
field_meta, index_meta, remote_chunk_manager);
fileManagerContext.set_for_loading_index(true);
load_index_info->index =
milvus::index::IndexFactory::GetInstance().CreateIndex(
index_info, fileManagerContext);
@ -305,6 +307,8 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) {
milvus::storage::FileManagerContext fileManagerContext(
field_meta, index_meta, remote_chunk_manager);
fileManagerContext.set_for_loading_index(true);
load_index_info->index =
milvus::index::IndexFactory::GetInstance().CreateIndex(
index_info, fileManagerContext);

View File

@ -48,9 +48,15 @@ struct FileManagerContext {
return chunkManagerPtr != nullptr;
}
void
set_for_loading_index(bool value) {
for_loading_index = value;
}
FieldDataMeta fieldDataMeta;
IndexMeta indexMeta;
ChunkManagerPtr chunkManagerPtr;
bool for_loading_index{false};
};
#define FILEMANAGER_TRY try {

View File

@ -248,6 +248,7 @@ class ArrayBitmapIndexTest : public testing::Test {
config["index_files"] = index_files;
ctx.set_for_loading_index(true);
index_ =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index_->Load(milvus::tracer::TraceContext{}, config);
@ -258,6 +259,8 @@ class ArrayBitmapIndexTest : public testing::Test {
nb_ = 10000;
cardinality_ = 30;
nullable_ = false;
index_build_id_ = 2001;
index_version_ = 2001;
}
void
@ -278,8 +281,6 @@ class ArrayBitmapIndexTest : public testing::Test {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
std::string root_path = "/tmp/test-bitmap-index/";
storage::StorageConfig storage_config;
@ -291,8 +292,8 @@ class ArrayBitmapIndexTest : public testing::Test {
partition_id,
segment_id,
field_id,
index_build_id,
index_version);
index_build_id_,
index_version_);
}
virtual ~ArrayBitmapIndexTest() override {
@ -340,6 +341,8 @@ class ArrayBitmapIndexTest : public testing::Test {
bool nullable_;
std::vector<milvus::Array> data_;
FixedVector<bool> valid_data_;
int index_version_;
int index_build_id_;
};
TYPED_TEST_SUITE_P(ArrayBitmapIndexTest);
@ -377,6 +380,8 @@ class ArrayBitmapIndexTestV1 : public ArrayBitmapIndexTest<T> {
this->nb_ = 10000;
this->cardinality_ = 200;
this->nullable_ = false;
this->index_build_id_ = 2002;
this->index_version_ = 2002;
}
virtual ~ArrayBitmapIndexTestV1() {
@ -398,6 +403,8 @@ class ArrayBitmapIndexTestNullable : public ArrayBitmapIndexTest<T> {
this->nb_ = 10000;
this->cardinality_ = 30;
this->nullable_ = true;
this->index_version_ = 2003;
this->index_build_id_ = 2003;
}
virtual ~ArrayBitmapIndexTestNullable() {

View File

@ -176,6 +176,8 @@ class BitmapIndexTest : public testing::Test {
nb_ = 10000;
cardinality_ = 30;
nullable_ = false;
index_version_ = 3000;
index_build_id_ = 3000;
}
void
SetUp() override {
@ -196,8 +198,6 @@ class BitmapIndexTest : public testing::Test {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
std::string root_path = "/tmp/test-bitmap-index/";
storage::StorageConfig storage_config;
@ -209,8 +209,8 @@ class BitmapIndexTest : public testing::Test {
partition_id,
segment_id,
field_id,
index_build_id,
index_version);
index_build_id_,
index_version_);
}
virtual ~BitmapIndexTest() override {
@ -400,6 +400,8 @@ class BitmapIndexTest : public testing::Test {
bool nullable_;
FixedVector<bool> valid_data_;
std::shared_ptr<storage::ChunkManager> chunk_manager_;
int index_version_;
int index_build_id_;
};
TYPED_TEST_SUITE_P(BitmapIndexTest);
@ -450,6 +452,8 @@ class BitmapIndexTestV2 : public BitmapIndexTest<T> {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->nullable_ = false;
this->index_version_ = 3001;
this->index_build_id_ = 3001;
}
virtual ~BitmapIndexTestV2() {
@ -512,6 +516,8 @@ class BitmapIndexTestV3 : public BitmapIndexTest<T> {
this->cardinality_ = 2000;
this->is_mmap_ = true;
this->nullable_ = false;
this->index_version_ = 3002;
this->index_build_id_ = 3002;
}
virtual ~BitmapIndexTestV3() {
@ -574,6 +580,8 @@ class BitmapIndexTestV4 : public BitmapIndexTest<T> {
this->cardinality_ = 2000;
this->is_mmap_ = true;
this->nullable_ = true;
this->index_version_ = 3003;
this->index_build_id_ = 3003;
}
virtual ~BitmapIndexTestV4() {

View File

@ -161,6 +161,7 @@ class HybridIndexTestV1 : public testing::Test {
config["index_files"] = index_files;
ctx.set_for_loading_index(true);
index_ =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index_->Load(milvus::tracer::TraceContext{}, config);
@ -171,6 +172,8 @@ class HybridIndexTestV1 : public testing::Test {
nb_ = 10000;
cardinality_ = 30;
nullable_ = false;
index_version_ = 1001;
index_build_id_ = 1001;
}
void
SetUp() override {
@ -191,8 +194,6 @@ class HybridIndexTestV1 : public testing::Test {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
std::string root_path = "/tmp/test-bitmap-index";
storage::StorageConfig storage_config;
@ -204,8 +205,8 @@ class HybridIndexTestV1 : public testing::Test {
partition_id,
segment_id,
field_id,
index_build_id,
index_version);
index_build_id_,
index_version_);
}
virtual ~HybridIndexTestV1() override {
@ -398,6 +399,8 @@ class HybridIndexTestV1 : public testing::Test {
std::shared_ptr<storage::ChunkManager> chunk_manager_;
bool nullable_;
FixedVector<bool> valid_data_;
int index_build_id_;
int index_version_;
};
TYPED_TEST_SUITE_P(HybridIndexTestV1);
@ -455,6 +458,8 @@ class HybridIndexTestV2 : public HybridIndexTestV1<T> {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->nullable_ = false;
this->index_version_ = 1002;
this->index_build_id_ = 1002;
}
virtual ~HybridIndexTestV2() {
@ -500,6 +505,8 @@ class HybridIndexTestNullable : public HybridIndexTestV1<T> {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->nullable_ = true;
this->index_version_ = 1003;
this->index_build_id_ = 1003;
}
virtual ~HybridIndexTestNullable() {

View File

@ -103,8 +103,8 @@ test_run() {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
int64_t index_build_id = 4000;
int64_t index_version = 4000;
auto field_meta = test::gen_field_meta(collection_id,
partition_id,
@ -207,6 +207,7 @@ test_run() {
Config config;
config["index_files"] = index_files;
ctx.set_for_loading_index(true);
auto index =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index->Load(milvus::tracer::TraceContext{}, config);
@ -384,8 +385,8 @@ test_string() {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
int64_t index_build_id = 4001;
int64_t index_version = 4001;
auto field_meta = test::gen_field_meta(collection_id,
partition_id,
@ -479,6 +480,7 @@ test_string() {
Config config;
config["index_files"] = index_files;
ctx.set_for_loading_index(true);
auto index =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index->Load(milvus::tracer::TraceContext{}, config);