fix: Prevent ChunkCache use absolute path in All-in-one mode (#30666)

See also #30651

Append operator of `std::filesystem::path` will replace whole path when
the param of "/" operation is an absolute path.

In "All-in-one" mode, this shall cause ChunkCache removing the original
vector data file when building chunk cache during/after load procedure.

This PR changes the ChunkCache path generation logic to a separate
function in which will check whether the file path is absolute or not.
If the file path is absolute, it removes the root path prefix and return
concatenated file path.

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/30473/head
congqixia 2024-02-19 20:58:51 +08:00 committed by GitHub
parent b74264881c
commit 18c351efa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 36 additions and 4 deletions

View File

@ -20,7 +20,7 @@ namespace milvus::storage {
std::shared_ptr<ColumnBase>
ChunkCache::Read(const std::string& filepath) {
auto path = std::filesystem::path(path_prefix_) / filepath;
auto path = CachePath(filepath);
{
std::shared_lock lck(mutex_);
@ -46,14 +46,14 @@ ChunkCache::Read(const std::string& filepath) {
void
ChunkCache::Remove(const std::string& filepath) {
auto path = std::filesystem::path(path_prefix_) / filepath;
auto path = CachePath(filepath);
std::unique_lock lck(mutex_);
columns_.erase(path);
}
void
ChunkCache::Prefetch(const std::string& filepath) {
auto path = std::filesystem::path(path_prefix_) / filepath;
auto path = CachePath(filepath);
std::shared_lock lck(mutex_);
auto it = columns_.find(path);
@ -68,7 +68,7 @@ ChunkCache::Prefetch(const std::string& filepath) {
read_ahead_policy_);
AssertInfo(ok == 0,
"failed to madvise to the data file {}, err: {}",
path.c_str(),
path,
strerror(errno));
}
@ -115,4 +115,20 @@ ChunkCache::Mmap(const std::filesystem::path& path,
return column;
}
std::string
ChunkCache::CachePath(const std::string& filepath) {
auto path = std::filesystem::path(filepath);
auto prefix = std::filesystem::path(path_prefix_);
// Cache path shall not use absolute filepath direct, it shall always under path_prefix_
if (path.is_absolute()) {
return (prefix /
filepath.substr(path.root_directory().string().length(),
filepath.length()))
.string();
}
return (prefix / filepath).string();
}
} // namespace milvus::storage

View File

@ -56,6 +56,9 @@ class ChunkCache {
std::shared_ptr<ColumnBase>
Mmap(const std::filesystem::path& path, const FieldDataPtr& field_data);
std::string
CachePath(const std::string& filepath);
private:
using ColumnTable =
std::unordered_map<std::string, std::shared_ptr<ColumnBase>>;

View File

@ -21,6 +21,9 @@
#include "storage/RemoteChunkManagerSingleton.h"
#include "storage/storage_c.h"
#define private public
#include "storage/ChunkCache.h"
using namespace std;
using namespace milvus;
using namespace milvus::storage;
@ -150,3 +153,13 @@ TEST_F(StorageTest, GetStorageMetrics) {
0, strncmp(currentLine, familyName.c_str(), familyName.length()));
}
}
TEST_F(StorageTest, CachePath) {
auto rcm =
RemoteChunkManagerSingleton::GetInstance().GetRemoteChunkManager();
auto cc_ = ChunkCache("tmp/mmap/chunk_cache", "willneed", rcm);
auto relative_result = cc_.CachePath("abc");
EXPECT_EQ("tmp/mmap/chunk_cache/abc", relative_result);
auto absolute_result = cc_.CachePath("/var/lib/milvus/abc");
EXPECT_EQ("tmp/mmap/chunk_cache/var/lib/milvus/abc", absolute_result);
}