Fix BucketExists bug in minio chunk manager (#19548) (#19854)

Signed-off-by: zhagnlu <lu.zhang@zilliz.com>

Signed-off-by: zhagnlu <lu.zhang@zilliz.com>
Co-authored-by: zhagnlu <lu.zhang@zilliz.com>
pull/19926/head
zhagnlu 2022-10-20 10:23:27 +08:00 committed by GitHub
parent c65bf46b61
commit 02a8a07322
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 106 additions and 91 deletions

View File

@ -6,7 +6,7 @@
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@ -14,47 +14,47 @@
# See the License for the specific language governing permissions and
# limitations under the License.
option( EMBEDDED_MILVUS "Enable embedded Milvus" OFF )
option(EMBEDDED_MILVUS "Enable embedded Milvus" OFF)
if ( EMBEDDED_MILVUS )
add_compile_definitions( EMBEDDED_MILVUS )
if(EMBEDDED_MILVUS)
add_compile_definitions(EMBEDDED_MILVUS)
endif()
milvus_add_pkg_config("milvus_storage")
set(STORAGE_FILES
parquet_c.cpp
PayloadStream.cpp
DataCodec.cpp
Util.cpp
PayloadReader.cpp
PayloadWriter.cpp
FieldData.cpp
IndexData.cpp
InsertData.cpp
Event.cpp
storage_c.cpp)
parquet_c.cpp
PayloadStream.cpp
DataCodec.cpp
Util.cpp
PayloadReader.cpp
PayloadWriter.cpp
FieldData.cpp
IndexData.cpp
InsertData.cpp
Event.cpp
storage_c.cpp)
if ( BUILD_DISK_ANN STREQUAL "ON" )
if(BUILD_DISK_ANN STREQUAL "ON")
set(STORAGE_FILES
${STORAGE_FILES}
LocalChunkManager.cpp
MinioChunkManager.cpp
DiskFileManagerImpl.cpp)
endif ()
${STORAGE_FILES}
LocalChunkManager.cpp
MinioChunkManager.cpp
DiskFileManagerImpl.cpp)
endif()
add_library(milvus_storage SHARED ${STORAGE_FILES})
find_package(Boost REQUIRED COMPONENTS filesystem)
if ( BUILD_DISK_ANN STREQUAL "ON" )
target_link_libraries( milvus_storage PUBLIC milvus_common Boost::filesystem aws-cpp-sdk-s3 pthread)
if(BUILD_DISK_ANN STREQUAL "ON")
target_link_libraries(milvus_storage PUBLIC milvus_common Boost::filesystem aws-cpp-sdk-s3 pthread)
else()
target_link_libraries( milvus_storage PUBLIC milvus_common Boost::filesystem pthread)
target_link_libraries(milvus_storage PUBLIC milvus_common Boost::filesystem pthread)
endif()
if(NOT CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR})
endif()
install(TARGETS milvus_storage DESTINATION "${CMAKE_INSTALL_LIBDIR}")
install(TARGETS milvus_storage DESTINATION "${CMAKE_INSTALL_LIBDIR}")

View File

@ -184,34 +184,8 @@ DiskFileManagerImpl::GetLocalRawDataObjectPrefix() {
bool
DiskFileManagerImpl::RemoveFile(const std::string& file) noexcept {
// remove local file
bool localExist = false;
auto& local_chunk_manager = LocalChunkManager::GetInstance();
FILEMANAGER_TRY
localExist = local_chunk_manager.Exist(file);
FILEMANAGER_CATCH
FILEMANAGER_END
if (localExist) {
FILEMANAGER_TRY
local_chunk_manager.Remove(file);
FILEMANAGER_CATCH
FILEMANAGER_END
}
// remove according remote file
std::string remoteFile = "";
bool remoteExist = false;
FILEMANAGER_TRY
remoteExist = rcm_->Exist(remoteFile);
FILEMANAGER_CATCH
FILEMANAGER_END
if (remoteExist) {
FILEMANAGER_TRY
rcm_->Remove(file);
FILEMANAGER_CATCH
FILEMANAGER_END
}
return true;
// TODO: implement this interface
return false;
}
std::optional<bool>

View File

@ -18,6 +18,7 @@
#include "Exception.h"
#include <sstream>
#include <fstream>
#include <boost/filesystem.hpp>
#include <boost/system/error_code.hpp>

View File

@ -20,6 +20,7 @@
#include <aws/core/auth/STSCredentialsProvider.h>
#include <aws/s3/model/CreateBucketRequest.h>
#include <aws/s3/model/DeleteBucketRequest.h>
#include <aws/s3/model/HeadBucketRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/s3/model/HeadObjectRequest.h>
#include <aws/s3/model/ListObjectsRequest.h>
@ -41,6 +42,8 @@
#define S3NoSuchBucket "NoSuchBucket"
namespace milvus::storage {
std::atomic<size_t> MinioChunkManager::init_count_(0);
/**
* @brief convert std::string to Aws::String
* because Aws has String type internally
@ -66,7 +69,10 @@ ConvertFromAwsString(const Aws::String& aws_str) {
MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config)
: default_bucket_name_(storage_config.bucket_name) {
Aws::InitAPI(sdk_options_);
const size_t initCount = init_count_++;
if (initCount == 0) {
Aws::InitAPI(sdk_options_);
}
Aws::Client::ClientConfiguration config;
config.endpointOverride = ConvertToAwsString(storage_config.address);
@ -105,7 +111,10 @@ MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config)
}
MinioChunkManager::~MinioChunkManager() {
Aws::ShutdownAPI(sdk_options_);
const size_t initCount = --init_count_;
if (initCount == 0) {
Aws::ShutdownAPI(sdk_options_);
}
client_.reset();
}
@ -146,17 +155,31 @@ MinioChunkManager::Write(const std::string& filepath, void* buf, uint64_t size)
bool
MinioChunkManager::BucketExists(const std::string& bucket_name) {
auto outcome = client_->ListBuckets();
// auto outcome = client_->ListBuckets();
// if (!outcome.IsSuccess()) {
// THROWS3ERROR(BucketExists);
// }
// for (auto&& b : outcome.GetResult().GetBuckets()) {
// if (ConvertFromAwsString(b.GetName()) == bucket_name) {
// return true;
// }
// }
Aws::S3::Model::HeadBucketRequest request;
request.SetBucket(bucket_name.c_str());
auto outcome = client_->HeadBucket(request);
if (!outcome.IsSuccess()) {
THROWS3ERROR(BucketExists);
}
for (auto&& b : outcome.GetResult().GetBuckets()) {
if (ConvertFromAwsString(b.GetName()) == bucket_name) {
return true;
auto error = outcome.GetError();
if (!error.GetExceptionName().empty()) {
std::stringstream err_msg;
err_msg << "Error: BucketExists: " << error.GetExceptionName() + " - " + error.GetMessage();
throw S3ErrorException(err_msg.str());
}
return false;
}
return false;
return true;
}
std::vector<std::string>

View File

@ -114,7 +114,8 @@ class MinioChunkManager : public RemoteChunkManager {
ListObjects(const char* bucket_name, const char* prefix = NULL);
private:
Aws::SDKOptions sdk_options_;
const Aws::SDKOptions sdk_options_;
static std::atomic<size_t> init_count_;
std::shared_ptr<Aws::S3::S3Client> client_;
std::string default_bucket_name_;
};

View File

@ -1,4 +1,4 @@
#-------------------------------------------------------------------------------
# -------------------------------------------------------------------------------
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
@ -9,46 +9,63 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
# -------------------------------------------------------------------------------
set ( AWS_SDK_VERSION "1.8.186" )
set ( AWS_SDK_SOURCE_URL
"https://github.com/aws/aws-sdk-cpp/archive/refs/tags/${AWS_SDK_VERSION}.tar.gz" )
set ( AWS_SDK_MD5 "ef4351b0969474cb85f39bc9f1975eb5" )
set(AWS_SDK_VERSION "1.8.186")
macro ( build_aws_sdk_s3 )
message( STATUS "Building AWS-SDK-${AWS_SDK_VERSION} from source" )
macro(build_aws_sdk_s3)
message(STATUS "Building AWS-SDK-${AWS_SDK_VERSION} from source")
set ( AWS_SDK_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX} )
set(AWS_SDK_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
set( AWS_SDK_BUILD_COMMAND make )
set( AWS_SDK_INSTALL_COMMAND make install )
set(AWS_SDK_BUILD_COMMAND make)
set(AWS_SDK_INSTALL_COMMAND make install)
set (AWS_SDK_S3_CMAKE_ARGS
set(AWS_SDK_S3_CMAKE_ARGS
"-DCMAKE_BUILD_TYPE=Release"
"-DBUILD_ONLY=s3"
"-DENABLE_TESTING=OFF"
"-DENABLE_TESTING=OFF"
"-DAUTORUN_UNIT_TESTS=OFF"
"-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}" )
"-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}")
ExternalProject_Add(aws_sdk_s3_ep
URL ${AWS_SDK_SOURCE_URL}
URL_MD5 ${AWS_SDK_MD5}
BINARY_DIR aws-s3-bin
PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/aws-sdk-subbuild
BUILD_COMMAND ${AWS_SDK_BUILD_COMMAND}
INSTALL_COMMAND ${AWS_SDK_INSTALL_COMMAND}
CMAKE_ARGS ${AWS_SDK_S3_CMAKE_ARGS}
GIT_REPOSITORY https://github.com/aws/aws-sdk-cpp.git
GIT_TAG ${AWS_SDK_VERSION}
# BINARY_DIR aws-s3-bin
PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/aws-sdk-subbuild
BUILD_IN_SOURCE 1
#PATCH_COMMAND sh prefetch_crt_dependency.sh
LIST_SEPARATOR "|"
BUILD_COMMAND ${AWS_SDK_BUILD_COMMAND}
INSTALL_COMMAND ${AWS_SDK_INSTALL_COMMAND}
CMAKE_ARGS ${AWS_SDK_S3_CMAKE_ARGS}
)
add_library(aws-cpp-sdk-core SHARED IMPORTED)
set_target_properties(aws-cpp-sdk-core
PROPERTIES
IMPORTED_GLOBAL TRUE
IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-core${CMAKE_SHARED_LIBRARY_SUFFIX}
INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
add_dependencies(aws-cpp-sdk-core aws_sdk_s3_ep)
add_library(aws-cpp-sdk-s3 SHARED IMPORTED)
set_target_properties(aws-cpp-sdk-s3
PROPERTIES
IMPORTED_GLOBAL TRUE
IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_SHARED_LIBRARY_SUFFIX}
INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
PROPERTIES
IMPORTED_GLOBAL TRUE
IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_SHARED_LIBRARY_SUFFIX}
INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
add_dependencies(aws-cpp-sdk-s3 aws_sdk_s3_ep)
# add_library(aws-cpp-sdk-sts SHARED IMPORTED)
# set_target_properties(aws-cpp-sdk-sts
# PROPERTIES
# IMPORTED_GLOBAL TRUE
# IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-sts${CMAKE_SHARED_LIBRARY_SUFFIX}
# INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
# add_dependencies(aws-cpp-sdk-sts aws_sdk_s3_ep)
get_target_property(S3_IMPORTED_LOCATION aws-cpp-sdk-s3 IMPORTED_LOCATION)
get_target_property(S3_INTERFACE_INCLUDE_DIRECTORIES aws-cpp-sdk-s3 INTERFACE_INCLUDE_DIRECTORIES)
message("AWS_SDK_INSTALL_PREFIX: ${AWS_SDK_INSTALL_PREFIX}")
@ -57,5 +74,4 @@ macro ( build_aws_sdk_s3 )
message("S3_INTERFACE_INCLUDE_DIRECTORIES: ${S3_INTERFACE_INCLUDE_DIRECTORIES}")
endmacro()
build_aws_sdk_s3()

View File

@ -52,8 +52,8 @@ set(MILVUS_TEST_FILES
if ( BUILD_DISK_ANN STREQUAL "ON" )
set(MILVUS_TEST_FILES
${MILVUS_TEST_FILES}
# test_minio_chunk_manager.cpp
# test_disk_file_manager_test.cpp
#test_minio_chunk_manager.cpp
#test_disk_file_manager_test.cpp
test_local_chunk_manager.cpp
)
endif()