diff --git a/Makefile b/Makefile index 48441d5dba..f3daaff0b6 100644 --- a/Makefile +++ b/Makefile @@ -335,6 +335,16 @@ test-querycoord: @echo "Running go unittests..." @(env bash $(PWD)/scripts/run_go_unittest.sh -t querycoord) +generate-mockery-flushcommon: getdeps + $(INSTALL_PATH)/mockery --name=MetaCache --dir=$(PWD)/internal/flushcommon/metacache --output=$(PWD)/internal/flushcommon/metacache --filename=mock_meta_cache.go --with-expecter --structname=MockMetaCache --outpkg=metacache --inpackage + $(INSTALL_PATH)/mockery --name=SyncManager --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_sync_manager.go --with-expecter --structname=MockSyncManager --outpkg=syncmgr --inpackage + $(INSTALL_PATH)/mockery --name=MetaWriter --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_meta_writer.go --with-expecter --structname=MockMetaWriter --outpkg=syncmgr --inpackage + $(INSTALL_PATH)/mockery --name=Serializer --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_serializer.go --with-expecter --structname=MockSerializer --outpkg=syncmgr --inpackage + $(INSTALL_PATH)/mockery --name=Task --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_task.go --with-expecter --structname=MockTask --outpkg=syncmgr --inpackage + $(INSTALL_PATH)/mockery --name=WriteBuffer --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_write_buffer.go --with-expecter --structname=MockWriteBuffer --outpkg=writebuffer --inpackage + $(INSTALL_PATH)/mockery --name=BufferManager --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_manager.go --with-expecter --structname=MockBufferManager --outpkg=writebuffer --inpackage + $(INSTALL_PATH)/mockery --name=FlowgraphManager --dir=$(PWD)/internal/flushcommon/pipeline --output=$(PWD)/internal/flushcommon/pipeline --filename=mock_fgmanager.go --with-expecter --structname=MockFlowgraphManager --outpkg=pipeline --inpackage + test-metastore: @echo "Running go unittests..." @(env bash $(PWD)/scripts/run_go_unittest.sh -t metastore) diff --git a/go.mod b/go.mod index 74c5738445..23ae54c6e1 100644 --- a/go.mod +++ b/go.mod @@ -55,8 +55,6 @@ require ( google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f ) -require github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70 - require ( github.com/bits-and-blooms/bitset v1.10.0 github.com/cenkalti/backoff/v4 v4.2.1 diff --git a/go.sum b/go.sum index 456ac745ed..079d045191 100644 --- a/go.sum +++ b/go.sum @@ -608,8 +608,6 @@ github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZz github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4= github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632 h1:CXig0DNtUsCLzchCFe3PR2KgOdobbz9gK2nSV7195PM= github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= -github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70 h1:Z+sp64fmAOxAG7mU0dfVOXvAXlwRB0c8a96rIM5HevI= -github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70/go.mod h1:GPETMcTZq1gLY1WA6Na5kiNAKnq8SEMMiVKUZrM3sho= github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A= github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= diff --git a/internal/core/src/clustering/KmeansClustering.h b/internal/core/src/clustering/KmeansClustering.h index bfb7d0e4a1..500613ea0a 100644 --- a/internal/core/src/clustering/KmeansClustering.h +++ b/internal/core/src/clustering/KmeansClustering.h @@ -21,7 +21,6 @@ #include #include "storage/MemFileManagerImpl.h" -#include "storage/space.h" #include "pb/clustering.pb.h" #include "knowhere/cluster/cluster_factory.h" diff --git a/internal/core/src/clustering/file_utils.h b/internal/core/src/clustering/file_utils.h index 097d57e84b..f5e8b966c7 100644 --- a/internal/core/src/clustering/file_utils.h +++ b/internal/core/src/clustering/file_utils.h @@ -25,7 +25,6 @@ #include "storage/ChunkManager.h" #include "storage/DataCodec.h" #include "storage/Types.h" -#include "storage/space.h" namespace milvus::clustering { diff --git a/internal/core/src/exec/CMakeLists.txt b/internal/core/src/exec/CMakeLists.txt index 9b1ca330c7..8e134f5128 100644 --- a/internal/core/src/exec/CMakeLists.txt +++ b/internal/core/src/exec/CMakeLists.txt @@ -30,4 +30,4 @@ set(MILVUS_EXEC_SRCS add_library(milvus_exec STATIC ${MILVUS_EXEC_SRCS}) -target_link_libraries(milvus_exec milvus_common milvus-storage ${CONAN_LIBS}) +target_link_libraries(milvus_exec milvus_common ${CONAN_LIBS}) diff --git a/internal/core/src/index/BitmapIndex.cpp b/internal/core/src/index/BitmapIndex.cpp index 3052dce0cd..dd8b703f40 100644 --- a/internal/core/src/index/BitmapIndex.cpp +++ b/internal/core/src/index/BitmapIndex.cpp @@ -25,7 +25,6 @@ #include "index/ScalarIndex.h" #include "index/Utils.h" #include "storage/Util.h" -#include "storage/space.h" namespace milvus { namespace index { @@ -42,20 +41,6 @@ BitmapIndex::BitmapIndex( } } -template -BitmapIndex::BitmapIndex( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) - : is_built_(false), - schema_(file_manager_context.fieldDataMeta.field_schema), - space_(space) { - if (file_manager_context.Valid()) { - file_manager_ = std::make_shared( - file_manager_context, space); - AssertInfo(file_manager_ != nullptr, "create file manager failed!"); - } -} - template void BitmapIndex::Build(const Config& config) { @@ -101,32 +86,6 @@ BitmapIndex::Build(size_t n, const T* data) { is_built_ = true; } -template -void -BitmapIndex::BuildV2(const Config& config) { - if (is_built_) { - return; - } - auto field_name = file_manager_->GetIndexMeta().field_name; - auto reader = space_->ScanData(); - std::vector field_datas; - for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { - if (!rec.ok()) { - PanicInfo(DataFormatBroken, "failed to read data"); - } - auto data = rec.ValueUnsafe(); - auto total_num_rows = data->num_rows(); - auto col_data = data->GetColumnByName(field_name); - // todo: support nullable index - auto field_data = storage::CreateFieldData( - DataType(GetDType()), false, 0, total_num_rows); - field_data->FillFieldData(col_data); - field_datas.push_back(field_data); - } - - BuildWithFieldData(field_datas); -} - template void BitmapIndex::BuildPrimitiveField( @@ -302,21 +261,6 @@ BitmapIndex::Upload(const Config& config) { return ret; } -template -BinarySet -BitmapIndex::UploadV2(const Config& config) { - auto binary_set = Serialize(config); - - file_manager_->AddFileV2(binary_set); - - auto remote_path_to_size = file_manager_->GetRemotePathsToFileSize(); - BinarySet ret; - for (auto& file : remote_path_to_size) { - ret.Append(file.first, nullptr, file.second); - } - return ret; -} - template void BitmapIndex::Load(const BinarySet& binary_set, const Config& config) { @@ -420,48 +364,6 @@ BitmapIndex::LoadWithoutAssemble(const BinarySet& binary_set, is_built_ = true; } -template -void -BitmapIndex::LoadV2(const Config& config) { - auto blobs = space_->StatisticsBlobs(); - std::vector index_files; - auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); - for (auto& b : blobs) { - if (b.name.rfind(prefix, 0) == 0) { - index_files.push_back(b.name); - } - } - std::map index_datas{}; - for (auto& file_name : index_files) { - auto res = space_->GetBlobByteSize(file_name); - if (!res.ok()) { - PanicInfo(S3Error, "unable to read index blob"); - } - auto index_blob_data = - std::shared_ptr(new uint8_t[res.value()]); - auto status = space_->ReadBlob(file_name, index_blob_data.get()); - if (!status.ok()) { - PanicInfo(S3Error, "unable to read index blob"); - } - auto raw_index_blob = - storage::DeserializeFileData(index_blob_data, res.value()); - auto key = file_name.substr(file_name.find_last_of('/') + 1); - index_datas[key] = raw_index_blob->GetFieldData(); - } - AssembleIndexDatas(index_datas); - - BinarySet binary_set; - for (auto& [key, data] : index_datas) { - auto size = data->Size(); - auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction - auto buf = std::shared_ptr( - (uint8_t*)const_cast(data->Data()), deleter); - binary_set.Append(key, buf, size); - } - - LoadWithoutAssemble(binary_set, config); -} - template void BitmapIndex::Load(milvus::tracer::TraceContext ctx, const Config& config) { diff --git a/internal/core/src/index/BitmapIndex.h b/internal/core/src/index/BitmapIndex.h index 227e6d1d43..9378ca85de 100644 --- a/internal/core/src/index/BitmapIndex.h +++ b/internal/core/src/index/BitmapIndex.h @@ -25,7 +25,6 @@ #include "storage/FileManager.h" #include "storage/DiskFileManagerImpl.h" #include "storage/MemFileManagerImpl.h" -#include "storage/space.h" namespace milvus { namespace index { @@ -46,10 +45,6 @@ class BitmapIndex : public ScalarIndex { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - explicit BitmapIndex( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); - ~BitmapIndex() override = default; BinarySet @@ -61,9 +56,6 @@ class BitmapIndex : public ScalarIndex { void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; - void - LoadV2(const Config& config = {}) override; - int64_t Count() override { return total_num_rows_; @@ -83,9 +75,6 @@ class BitmapIndex : public ScalarIndex { void BuildWithFieldData(const std::vector& datas) override; - void - BuildV2(const Config& config = {}) override; - const TargetBitmap In(size_t n, const T* values) override; @@ -112,9 +101,6 @@ class BitmapIndex : public ScalarIndex { BinarySet Upload(const Config& config = {}) override; - BinarySet - UploadV2(const Config& config = {}) override; - const bool HasRawData() const override { if (schema_.data_type() == proto::schema::DataType::Array) { @@ -195,7 +181,6 @@ class BitmapIndex : public ScalarIndex { size_t total_num_rows_{0}; proto::schema::FieldSchema schema_; std::shared_ptr file_manager_; - std::shared_ptr space_; }; } // namespace index diff --git a/internal/core/src/index/CMakeLists.txt b/internal/core/src/index/CMakeLists.txt index 3256ab63a0..c4f1012e51 100644 --- a/internal/core/src/index/CMakeLists.txt +++ b/internal/core/src/index/CMakeLists.txt @@ -26,6 +26,6 @@ set(INDEX_FILES milvus_add_pkg_config("milvus_index") add_library(milvus_index SHARED ${INDEX_FILES}) -target_link_libraries(milvus_index milvus_storage milvus-storage tantivy_binding) +target_link_libraries(milvus_index milvus_storage tantivy_binding) install(TARGETS milvus_index DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/internal/core/src/index/HybridScalarIndex.cpp b/internal/core/src/index/HybridScalarIndex.cpp index 628cde37aa..0118039781 100644 --- a/internal/core/src/index/HybridScalarIndex.cpp +++ b/internal/core/src/index/HybridScalarIndex.cpp @@ -23,7 +23,6 @@ #include "index/ScalarIndex.h" #include "index/Utils.h" #include "storage/Util.h" -#include "storage/space.h" namespace milvus { namespace index { @@ -43,23 +42,6 @@ HybridScalarIndex::HybridScalarIndex( internal_index_type_ = ScalarIndexType::NONE; } -template -HybridScalarIndex::HybridScalarIndex( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) - : is_built_(false), - bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND), - file_manager_context_(file_manager_context), - space_(space) { - if (file_manager_context.Valid()) { - mem_file_manager_ = std::make_shared( - file_manager_context, space); - AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!"); - } - field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type(); - internal_index_type_ = ScalarIndexType::NONE; -} - template ScalarIndexType HybridScalarIndex::SelectIndexBuildType(size_t n, const T* values) { @@ -274,39 +256,6 @@ HybridScalarIndex::Build(const Config& config) { is_built_ = true; } -template -void -HybridScalarIndex::BuildV2(const Config& config) { - if (is_built_) { - return; - } - bitmap_index_cardinality_limit_ = - GetBitmapCardinalityLimitFromConfig(config); - LOG_INFO("config bitmap cardinality limit to {}", - bitmap_index_cardinality_limit_); - - auto field_name = mem_file_manager_->GetIndexMeta().field_name; - auto reader = space_->ScanData(); - std::vector field_datas; - for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { - if (!rec.ok()) { - PanicInfo(DataFormatBroken, "failed to read data"); - } - auto data = rec.ValueUnsafe(); - auto total_num_rows = data->num_rows(); - auto col_data = data->GetColumnByName(field_name); - // todo: support nullable index - auto field_data = storage::CreateFieldData( - DataType(GetDType()), false, 0, total_num_rows); - field_data->FillFieldData(col_data); - field_datas.push_back(field_data); - } - - SelectIndexBuildType(field_datas); - BuildInternal(field_datas); - is_built_ = true; -} - template BinarySet HybridScalarIndex::Serialize(const Config& config) { @@ -356,21 +305,6 @@ HybridScalarIndex::Upload(const Config& config) { return index_ret; } -template -BinarySet -HybridScalarIndex::UploadV2(const Config& config) { - auto internal_index = GetInternalIndex(); - auto index_ret = internal_index->Upload(config); - - auto index_type_ret = SerializeIndexType(); - - for (auto& [key, value] : index_type_ret.binary_map_) { - index_ret.Append(key, value); - } - - return index_ret; -} - template void HybridScalarIndex::DeserializeIndexType(const BinarySet& binary_set) { @@ -380,12 +314,6 @@ HybridScalarIndex::DeserializeIndexType(const BinarySet& binary_set) { internal_index_type_ = static_cast(index_type); } -template -void -HybridScalarIndex::LoadV2(const Config& config) { - PanicInfo(Unsupported, "HybridScalarIndex LoadV2 not implemented"); -} - template std::string HybridScalarIndex::GetRemoteIndexTypeFile( diff --git a/internal/core/src/index/HybridScalarIndex.h b/internal/core/src/index/HybridScalarIndex.h index bdd32da41a..22107f2b4b 100644 --- a/internal/core/src/index/HybridScalarIndex.h +++ b/internal/core/src/index/HybridScalarIndex.h @@ -28,7 +28,6 @@ #include "storage/FileManager.h" #include "storage/DiskFileManagerImpl.h" #include "storage/MemFileManagerImpl.h" -#include "storage/space.h" namespace milvus { namespace index { @@ -46,10 +45,6 @@ class HybridScalarIndex : public ScalarIndex { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - explicit HybridScalarIndex( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); - ~HybridScalarIndex() override = default; BinarySet @@ -61,9 +56,6 @@ class HybridScalarIndex : public ScalarIndex { void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; - void - LoadV2(const Config& config = {}) override; - int64_t Count() override { return internal_index_->Count(); @@ -85,9 +77,6 @@ class HybridScalarIndex : public ScalarIndex { void Build(const Config& config = {}) override; - void - BuildV2(const Config& config = {}) override; - const TargetBitmap In(size_t n, const T* values) override { return internal_index_->In(n, values); @@ -133,9 +122,6 @@ class HybridScalarIndex : public ScalarIndex { BinarySet Upload(const Config& config = {}) override; - BinarySet - UploadV2(const Config& config = {}) override; - private: ScalarIndexType SelectBuildTypeForPrimitiveType( @@ -173,7 +159,6 @@ class HybridScalarIndex : public ScalarIndex { std::shared_ptr> internal_index_{nullptr}; storage::FileManagerContext file_manager_context_; std::shared_ptr mem_file_manager_{nullptr}; - std::shared_ptr space_{nullptr}; }; } // namespace index diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 7567bf63e3..0061253de6 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -44,9 +44,6 @@ class IndexBase { virtual void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) = 0; - virtual void - LoadV2(const Config& config = {}) = 0; - virtual void BuildWithRawData(size_t n, const void* values, @@ -58,18 +55,12 @@ class IndexBase { virtual void Build(const Config& config = {}) = 0; - virtual void - BuildV2(const Config& Config = {}) = 0; - virtual int64_t Count() = 0; virtual BinarySet Upload(const Config& config = {}) = 0; - virtual BinarySet - UploadV2(const Config& config = {}) = 0; - virtual const bool HasRawData() const = 0; diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index cb5656d9eb..a80a643ca8 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -78,51 +78,6 @@ IndexFactory::CreatePrimitiveScalarIndex( #endif } -template -ScalarIndexPtr -IndexFactory::CreatePrimitiveScalarIndex( - const IndexType& index_type, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - if (index_type == INVERTED_INDEX_TYPE) { - return std::make_unique>(file_manager_context, - space); - } - if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context, space); - } - if (index_type == HYBRID_INDEX_TYPE) { - return std::make_unique>(file_manager_context, - space); - } - return CreateScalarIndexSort(file_manager_context, space); -} - -template <> -ScalarIndexPtr -IndexFactory::CreatePrimitiveScalarIndex( - const IndexType& index_type, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { -#if defined(__linux__) || defined(__APPLE__) - if (index_type == INVERTED_INDEX_TYPE) { - return std::make_unique>( - file_manager_context, space); - } - if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context, - space); - } - if (index_type == HYBRID_INDEX_TYPE) { - return std::make_unique>( - file_manager_context, space); - } - return CreateStringIndexMarisa(file_manager_context, space); -#else - PanicInfo(Unsupported, "unsupported platform"); -#endif -} - IndexBasePtr IndexFactory::CreateIndex( const CreateIndexInfo& create_index_info, @@ -134,19 +89,6 @@ IndexFactory::CreateIndex( return CreateScalarIndex(create_index_info, file_manager_context); } -IndexBasePtr -IndexFactory::CreateIndex( - const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - if (IsVectorDataType(create_index_info.field_type)) { - return CreateVectorIndex( - create_index_info, file_manager_context, space); - } - - return CreateScalarIndex(create_index_info, file_manager_context, space); -} - IndexBasePtr IndexFactory::CreatePrimitiveScalarIndex( DataType data_type, @@ -307,90 +249,4 @@ IndexFactory::CreateVectorIndex( } } } - -IndexBasePtr -IndexFactory::CreateVectorIndex( - const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - auto data_type = create_index_info.field_type; - auto index_type = create_index_info.index_type; - auto metric_type = create_index_info.metric_type; - auto version = create_index_info.index_engine_version; - - if (knowhere::UseDiskLoad(index_type, version)) { - switch (data_type) { - case DataType::VECTOR_FLOAT: { - return std::make_unique>( - index_type, - metric_type, - version, - space, - file_manager_context); - } - case DataType::VECTOR_FLOAT16: { - return std::make_unique>( - index_type, - metric_type, - version, - space, - file_manager_context); - } - case DataType::VECTOR_BFLOAT16: { - return std::make_unique>( - index_type, - metric_type, - version, - space, - file_manager_context); - } - case DataType::VECTOR_BINARY: { - return std::make_unique>( - index_type, - metric_type, - version, - space, - file_manager_context); - } - case DataType::VECTOR_SPARSE_FLOAT: { - return std::make_unique>( - index_type, - metric_type, - version, - space, - file_manager_context); - } - default: - PanicInfo( - DataTypeInvalid, - fmt::format("invalid data type to build disk index: {}", - data_type)); - } - } else { // create mem index - switch (data_type) { - case DataType::VECTOR_FLOAT: - case DataType::VECTOR_SPARSE_FLOAT: { - return std::make_unique>( - create_index_info, file_manager_context, space); - } - case DataType::VECTOR_BINARY: { - return std::make_unique>( - create_index_info, file_manager_context, space); - } - case DataType::VECTOR_FLOAT16: { - return std::make_unique>( - create_index_info, file_manager_context, space); - } - case DataType::VECTOR_BFLOAT16: { - return std::make_unique>( - create_index_info, file_manager_context, space); - } - default: - PanicInfo( - DataTypeInvalid, - fmt::format("invalid data type to build mem index: {}", - data_type)); - } - } -} } // namespace milvus::index diff --git a/internal/core/src/index/IndexFactory.h b/internal/core/src/index/IndexFactory.h index 61c5119d4c..db46330a17 100644 --- a/internal/core/src/index/IndexFactory.h +++ b/internal/core/src/index/IndexFactory.h @@ -32,7 +32,6 @@ #include "index/ScalarIndexSort.h" #include "index/StringIndexMarisa.h" #include "index/BoolIndex.h" -#include "storage/space.h" namespace milvus::index { @@ -56,11 +55,6 @@ class IndexFactory { CreateIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context); - IndexBasePtr - CreateIndex(const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); - IndexBasePtr CreateVectorIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context); @@ -92,19 +86,6 @@ class IndexFactory { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - IndexBasePtr - CreateVectorIndex(const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); - - IndexBasePtr - CreateScalarIndex(const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - PanicInfo(ErrorCode::Unsupported, - "CreateScalarIndexV2 not implemented"); - } - // IndexBasePtr // CreateIndex(DataType dtype, const IndexType& index_type); private: @@ -115,12 +96,6 @@ class IndexFactory { CreatePrimitiveScalarIndex(const IndexType& index_type, const storage::FileManagerContext& file_manager = storage::FileManagerContext()); - - template - ScalarIndexPtr - CreatePrimitiveScalarIndex(const IndexType& index_type, - const storage::FileManagerContext& file_manager, - std::shared_ptr space); }; } // namespace milvus::index diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 22a71ed637..e95d107617 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -65,11 +65,10 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) { template InvertedIndexTantivy::InvertedIndexTantivy( - const storage::FileManagerContext& ctx, - std::shared_ptr space) - : space_(space), schema_(ctx.fieldDataMeta.field_schema) { - mem_file_manager_ = std::make_shared(ctx, ctx.space_); - disk_file_manager_ = std::make_shared(ctx, ctx.space_); + const storage::FileManagerContext& ctx) + : schema_(ctx.fieldDataMeta.field_schema) { + mem_file_manager_ = std::make_shared(ctx); + disk_file_manager_ = std::make_shared(ctx); auto field = std::to_string(disk_file_manager_->GetFieldDataMeta().field_id); auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix(); @@ -139,12 +138,6 @@ InvertedIndexTantivy::Upload(const Config& config) { return ret; } -template -BinarySet -InvertedIndexTantivy::UploadV2(const Config& config) { - return Upload(config); -} - template void InvertedIndexTantivy::Build(const Config& config) { @@ -156,28 +149,6 @@ InvertedIndexTantivy::Build(const Config& config) { BuildWithFieldData(field_datas); } -template -void -InvertedIndexTantivy::BuildV2(const Config& config) { - auto field_name = mem_file_manager_->GetIndexMeta().field_name; - auto reader = space_->ScanData(); - std::vector field_datas; - for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { - if (!rec.ok()) { - PanicInfo(DataFormatBroken, "failed to read data"); - } - auto data = rec.ValueUnsafe(); - auto total_num_rows = data->num_rows(); - auto col_data = data->GetColumnByName(field_name); - // todo: support nullable index - auto field_data = storage::CreateFieldData( - DataType(GetDType()), false, 0, total_num_rows); - field_data->FillFieldData(col_data); - field_datas.push_back(field_data); - } - BuildWithFieldData(field_datas); -} - template void InvertedIndexTantivy::Load(milvus::tracer::TraceContext ctx, @@ -201,14 +172,6 @@ InvertedIndexTantivy::Load(milvus::tracer::TraceContext ctx, wrapper_ = std::make_shared(prefix.c_str()); } -template -void -InvertedIndexTantivy::LoadV2(const Config& config) { - disk_file_manager_->CacheIndexToDisk(); - auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix(); - wrapper_ = std::make_shared(prefix.c_str()); -} - inline void apply_hits(TargetBitmap& bitset, const RustArrayWrapper& w, bool v) { for (size_t j = 0; j < w.array_.len; j++) { diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index faac636df2..14a34ddb3c 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -18,7 +18,6 @@ #include "tantivy-binding.h" #include "tantivy-wrapper.h" #include "index/StringIndex.h" -#include "storage/space.h" namespace milvus::index { @@ -34,13 +33,7 @@ class InvertedIndexTantivy : public ScalarIndex { using DiskFileManagerPtr = std::shared_ptr; InvertedIndexTantivy() = default; - - explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx) - : InvertedIndexTantivy(ctx, nullptr) { - } - - explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx, - std::shared_ptr space); + explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx); ~InvertedIndexTantivy(); @@ -56,9 +49,6 @@ class InvertedIndexTantivy : public ScalarIndex { void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; - void - LoadV2(const Config& config = {}) override; - /* * deprecated. * TODO: why not remove this? @@ -78,9 +68,6 @@ class InvertedIndexTantivy : public ScalarIndex { void Build(const Config& config = {}) override; - void - BuildV2(const Config& config = {}) override; - int64_t Count() override { return wrapper_->count(); @@ -102,9 +89,6 @@ class InvertedIndexTantivy : public ScalarIndex { BinarySet Upload(const Config& config = {}) override; - BinarySet - UploadV2(const Config& config = {}) override; - /* * deprecated, only used in small chunk index. */ @@ -196,6 +180,5 @@ class InvertedIndexTantivy : public ScalarIndex { */ MemFileManagerPtr mem_file_manager_; DiskFileManagerPtr disk_file_manager_; - std::shared_ptr space_; }; } // namespace milvus::index diff --git a/internal/core/src/index/ScalarIndexSort.cpp b/internal/core/src/index/ScalarIndexSort.cpp index 842cd13e8f..e2d1bdb17a 100644 --- a/internal/core/src/index/ScalarIndexSort.cpp +++ b/internal/core/src/index/ScalarIndexSort.cpp @@ -44,73 +44,6 @@ ScalarIndexSort::ScalarIndexSort( } } -template -inline ScalarIndexSort::ScalarIndexSort( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) - : is_built_(false), data_(), space_(space) { - if (file_manager_context.Valid()) { - file_manager_ = std::make_shared( - file_manager_context, space); - AssertInfo(file_manager_ != nullptr, "create file manager failed!"); - } -} - -template -inline void -ScalarIndexSort::BuildV2(const Config& config) { - if (is_built_) { - return; - } - auto field_name = file_manager_->GetIndexMeta().field_name; - auto reader = space_->ScanData(); - std::vector field_datas; - for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { - if (!rec.ok()) { - PanicInfo(DataFormatBroken, "failed to read data"); - } - auto data = rec.ValueUnsafe(); - auto total_num_rows = data->num_rows(); - auto col_data = data->GetColumnByName(field_name); - auto nullable = - col_data->type()->id() == arrow::Type::NA ? true : false; - // will support build scalar index when nullable in the future just skip it - // now, not support to build index in nullable field_data - // todo: support nullable index - AssertInfo(!nullable, - "not support to build index in nullable field_data"); - auto field_data = storage::CreateFieldData( - DataType(GetDType()), nullable, 0, total_num_rows); - field_data->FillFieldData(col_data); - field_datas.push_back(field_data); - } - int64_t total_num_rows = 0; - for (const auto& data : field_datas) { - total_num_rows += data->get_num_rows(); - } - if (total_num_rows == 0) { - PanicInfo(DataIsEmpty, "ScalarIndexSort cannot build null values!"); - } - - data_.reserve(total_num_rows); - int64_t offset = 0; - for (const auto& data : field_datas) { - auto slice_num = data->get_num_rows(); - for (size_t i = 0; i < slice_num; ++i) { - auto value = reinterpret_cast(data->RawValue(i)); - data_.emplace_back(IndexStructure(*value, offset)); - offset++; - } - } - - std::sort(data_.begin(), data_.end()); - idx_to_offsets_.resize(total_num_rows); - for (size_t i = 0; i < total_num_rows; ++i) { - idx_to_offsets_[data_[i].idx_] = i; - } - is_built_ = true; -} - template void ScalarIndexSort::Build(const Config& config) { @@ -215,21 +148,6 @@ ScalarIndexSort::Upload(const Config& config) { return ret; } -template -BinarySet -ScalarIndexSort::UploadV2(const Config& config) { - auto binary_set = Serialize(config); - file_manager_->AddFileV2(binary_set); - - auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); - BinarySet ret; - for (auto& file : remote_paths_to_size) { - ret.Append(file.first, nullptr, file.second); - } - - return ret; -} - template void ScalarIndexSort::LoadWithoutAssemble(const BinarySet& index_binary, @@ -277,47 +195,6 @@ ScalarIndexSort::Load(milvus::tracer::TraceContext ctx, LoadWithoutAssemble(binary_set, config); } -template -void -ScalarIndexSort::LoadV2(const Config& config) { - auto blobs = space_->StatisticsBlobs(); - std::vector index_files; - auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); - for (auto& b : blobs) { - if (b.name.rfind(prefix, 0) == 0) { - index_files.push_back(b.name); - } - } - std::map index_datas{}; - for (auto& file_name : index_files) { - auto res = space_->GetBlobByteSize(file_name); - if (!res.ok()) { - PanicInfo(S3Error, "unable to read index blob"); - } - auto index_blob_data = - std::shared_ptr(new uint8_t[res.value()]); - auto status = space_->ReadBlob(file_name, index_blob_data.get()); - if (!status.ok()) { - PanicInfo(S3Error, "unable to read index blob"); - } - auto raw_index_blob = - storage::DeserializeFileData(index_blob_data, res.value()); - auto key = file_name.substr(file_name.find_last_of('/') + 1); - index_datas[key] = raw_index_blob->GetFieldData(); - } - AssembleIndexDatas(index_datas); - BinarySet binary_set; - for (auto& [key, data] : index_datas) { - auto size = data->Size(); - auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction - auto buf = std::shared_ptr( - (uint8_t*)const_cast(data->Data()), deleter); - binary_set.Append(key, buf, size); - } - - LoadWithoutAssemble(binary_set, config); -} - template const TargetBitmap ScalarIndexSort::In(const size_t n, const T* values) { diff --git a/internal/core/src/index/ScalarIndexSort.h b/internal/core/src/index/ScalarIndexSort.h index da24dc530b..cee30ca62e 100644 --- a/internal/core/src/index/ScalarIndexSort.h +++ b/internal/core/src/index/ScalarIndexSort.h @@ -26,7 +26,6 @@ #include "index/IndexStructure.h" #include "index/ScalarIndex.h" #include "storage/MemFileManagerImpl.h" -#include "storage/space.h" namespace milvus::index { @@ -37,10 +36,6 @@ class ScalarIndexSort : public ScalarIndex { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - explicit ScalarIndexSort( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); - BinarySet Serialize(const Config& config) override; @@ -50,9 +45,6 @@ class ScalarIndexSort : public ScalarIndex { void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; - void - LoadV2(const Config& config = {}) override; - int64_t Count() override { return data_.size(); @@ -69,9 +61,6 @@ class ScalarIndexSort : public ScalarIndex { void Build(const Config& config = {}) override; - void - BuildV2(const Config& config = {}) override; - const TargetBitmap In(size_t n, const T* values) override; @@ -97,8 +86,6 @@ class ScalarIndexSort : public ScalarIndex { BinarySet Upload(const Config& config = {}) override; - BinarySet - UploadV2(const Config& config = {}) override; const bool HasRawData() const override { @@ -133,7 +120,6 @@ class ScalarIndexSort : public ScalarIndex { std::vector idx_to_offsets_; // used to retrieve. std::vector> data_; std::shared_ptr file_manager_; - std::shared_ptr space_; }; template @@ -148,11 +134,4 @@ CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()) { return std::make_unique>(file_manager_context); } - -template -inline ScalarIndexSortPtr -CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - return std::make_unique>(file_manager_context, space); -} } // namespace milvus::index diff --git a/internal/core/src/index/StringIndexMarisa.cpp b/internal/core/src/index/StringIndexMarisa.cpp index 3d861793f0..9f3e455781 100644 --- a/internal/core/src/index/StringIndexMarisa.cpp +++ b/internal/core/src/index/StringIndexMarisa.cpp @@ -36,7 +36,6 @@ #include "index/Utils.h" #include "index/Index.h" #include "storage/Util.h" -#include "storage/space.h" namespace milvus::index { @@ -48,16 +47,6 @@ StringIndexMarisa::StringIndexMarisa( } } -StringIndexMarisa::StringIndexMarisa( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) - : space_(space) { - if (file_manager_context.Valid()) { - file_manager_ = std::make_shared( - file_manager_context, space_); - } -} - int64_t StringIndexMarisa::Size() { return trie_.size(); @@ -68,65 +57,6 @@ valid_str_id(size_t str_id) { return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID; } -void -StringIndexMarisa::BuildV2(const Config& config) { - if (built_) { - throw std::runtime_error("index has been built"); - } - auto field_name = file_manager_->GetIndexMeta().field_name; - auto reader = space_->ScanData(); - std::vector field_datas; - for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { - if (!rec.ok()) { - PanicInfo(DataFormatBroken, "failed to read data"); - } - auto data = rec.ValueUnsafe(); - auto total_num_rows = data->num_rows(); - auto col_data = data->GetColumnByName(field_name); - auto nullable = - col_data->type()->id() == arrow::Type::NA ? true : false; - // will support build scalar index when nullable in the future just skip it - // now, not support to build index in nullable field_data - // todo: support nullable index - AssertInfo(!nullable, - "not support to build index in nullable field_data"); - auto field_data = storage::CreateFieldData( - DataType::STRING, nullable, 0, total_num_rows); - field_data->FillFieldData(col_data); - field_datas.push_back(field_data); - } - int64_t total_num_rows = 0; - - // fill key set. - marisa::Keyset keyset; - for (auto data : field_datas) { - auto slice_num = data->get_num_rows(); - for (size_t i = 0; i < slice_num; ++i) { - keyset.push_back( - (*static_cast(data->RawValue(i))).c_str()); - } - total_num_rows += slice_num; - } - trie_.build(keyset); - - // fill str_ids_ - str_ids_.resize(total_num_rows); - int64_t offset = 0; - for (auto data : field_datas) { - auto slice_num = data->get_num_rows(); - for (size_t i = 0; i < slice_num; ++i) { - auto str_id = - lookup(*static_cast(data->RawValue(i))); - AssertInfo(valid_str_id(str_id), "invalid marisa key"); - str_ids_[offset++] = str_id; - } - } - - // fill str_ids_to_offsets_ - fill_offsets(); - - built_ = true; -} void StringIndexMarisa::Build(const Config& config) { if (built_) { @@ -245,20 +175,6 @@ StringIndexMarisa::Upload(const Config& config) { return ret; } -BinarySet -StringIndexMarisa::UploadV2(const Config& config) { - auto binary_set = Serialize(config); - file_manager_->AddFileV2(binary_set); - - auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); - BinarySet ret; - for (auto& file : remote_paths_to_size) { - ret.Append(file.first, nullptr, file.second); - } - - return ret; -} - void StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set, const Config& config) { @@ -322,46 +238,6 @@ StringIndexMarisa::Load(milvus::tracer::TraceContext ctx, LoadWithoutAssemble(binary_set, config); } -void -StringIndexMarisa::LoadV2(const Config& config) { - auto blobs = space_->StatisticsBlobs(); - std::vector index_files; - auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); - for (auto& b : blobs) { - if (b.name.rfind(prefix, 0) == 0) { - index_files.push_back(b.name); - } - } - std::map index_datas{}; - for (auto& file_name : index_files) { - auto res = space_->GetBlobByteSize(file_name); - if (!res.ok()) { - PanicInfo(DataFormatBroken, "unable to read index blob"); - } - auto index_blob_data = - std::shared_ptr(new uint8_t[res.value()]); - auto status = space_->ReadBlob(file_name, index_blob_data.get()); - if (!status.ok()) { - PanicInfo(DataFormatBroken, "unable to read index blob"); - } - auto raw_index_blob = - storage::DeserializeFileData(index_blob_data, res.value()); - index_datas[file_name] = raw_index_blob->GetFieldData(); - } - AssembleIndexDatas(index_datas); - BinarySet binary_set; - for (auto& [key, data] : index_datas) { - auto size = data->Size(); - auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction - auto buf = std::shared_ptr( - (uint8_t*)const_cast(data->Data()), deleter); - auto file_name = key.substr(key.find_last_of('/') + 1); - binary_set.Append(file_name, buf, size); - } - - LoadWithoutAssemble(binary_set, config); -} - const TargetBitmap StringIndexMarisa::In(size_t n, const std::string* values) { TargetBitmap bitset(str_ids_.size()); diff --git a/internal/core/src/index/StringIndexMarisa.h b/internal/core/src/index/StringIndexMarisa.h index 8b67549db9..c9e91e3217 100644 --- a/internal/core/src/index/StringIndexMarisa.h +++ b/internal/core/src/index/StringIndexMarisa.h @@ -23,7 +23,6 @@ #include #include #include "storage/MemFileManagerImpl.h" -#include "storage/space.h" namespace milvus::index { @@ -33,10 +32,6 @@ class StringIndexMarisa : public StringIndex { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - explicit StringIndexMarisa( - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); - int64_t Size() override; @@ -49,9 +44,6 @@ class StringIndexMarisa : public StringIndex { void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; - void - LoadV2(const Config& config = {}) override; - int64_t Count() override { return str_ids_.size(); @@ -71,9 +63,6 @@ class StringIndexMarisa : public StringIndex { void BuildWithFieldData(const std::vector& field_datas) override; - void - BuildV2(const Config& Config = {}) override; - const TargetBitmap In(size_t n, const std::string* values) override; @@ -98,9 +87,6 @@ class StringIndexMarisa : public StringIndex { BinarySet Upload(const Config& config = {}) override; - BinarySet - UploadV2(const Config& config = {}); - const bool HasRawData() const override { return true; @@ -131,7 +117,6 @@ class StringIndexMarisa : public StringIndex { std::map> str_ids_to_offsets_; bool built_ = false; std::shared_ptr file_manager_; - std::shared_ptr space_; }; using StringIndexMarisaPtr = std::unique_ptr; @@ -142,10 +127,4 @@ CreateStringIndexMarisa( storage::FileManagerContext()) { return std::make_unique(file_manager_context); } - -inline StringIndexPtr -CreateStringIndexMarisa(const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - return std::make_unique(file_manager_context, space); -} } // namespace milvus::index diff --git a/internal/core/src/index/VectorDiskIndex.cpp b/internal/core/src/index/VectorDiskIndex.cpp index 73f8cb8b86..5bc7400ebe 100644 --- a/internal/core/src/index/VectorDiskIndex.cpp +++ b/internal/core/src/index/VectorDiskIndex.cpp @@ -73,45 +73,6 @@ VectorDiskAnnIndex::VectorDiskAnnIndex( } } -template -VectorDiskAnnIndex::VectorDiskAnnIndex( - const IndexType& index_type, - const MetricType& metric_type, - const IndexVersion& version, - std::shared_ptr space, - const storage::FileManagerContext& file_manager_context) - : space_(space), VectorIndex(index_type, metric_type) { - CheckMetricTypeSupport(metric_type); - file_manager_ = std::make_shared( - file_manager_context, file_manager_context.space_); - AssertInfo(file_manager_ != nullptr, "create file manager failed!"); - auto local_chunk_manager = - storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix(); - - // As we have guarded dup-load in QueryNode, - // this assertion failed only if the Milvus rebooted in the same pod, - // need to remove these files then re-load the segment - if (local_chunk_manager->Exist(local_index_path_prefix)) { - local_chunk_manager->RemoveDir(local_index_path_prefix); - } - CheckCompatible(version); - local_chunk_manager->CreateDir(local_index_path_prefix); - auto diskann_index_pack = - knowhere::Pack(std::shared_ptr(file_manager_)); - auto get_index_obj = knowhere::IndexFactory::Instance().Create( - GetIndexType(), version, diskann_index_pack); - if (get_index_obj.has_value()) { - index_ = get_index_obj.value(); - } else { - auto err = get_index_obj.error(); - if (err == knowhere::Status::invalid_index_error) { - PanicInfo(ErrorCode::Unsupported, get_index_obj.what()); - } - PanicInfo(ErrorCode::KnowhereError, get_index_obj.what()); - } -} - template void VectorDiskAnnIndex::Load(const BinarySet& binary_set /* not used */, @@ -153,21 +114,6 @@ VectorDiskAnnIndex::Load(milvus::tracer::TraceContext ctx, SetDim(index_.Dim()); } -template -void -VectorDiskAnnIndex::LoadV2(const Config& config) { - knowhere::Json load_config = update_load_json(config); - - file_manager_->CacheIndexToDisk(); - - auto stat = index_.Deserialize(knowhere::BinarySet(), load_config); - if (stat != knowhere::Status::success) - PanicInfo(ErrorCode::UnexpectedError, - "failed to Deserialize index, " + KnowhereStatusString(stat)); - - SetDim(index_.Dim()); -} - template BinarySet VectorDiskAnnIndex::Upload(const Config& config) { @@ -185,53 +131,6 @@ VectorDiskAnnIndex::Upload(const Config& config) { return ret; } -template -BinarySet -VectorDiskAnnIndex::UploadV2(const Config& config) { - return Upload(config); -} - -template -void -VectorDiskAnnIndex::BuildV2(const Config& config) { - knowhere::Json build_config; - build_config.update(config); - - auto local_data_path = file_manager_->CacheRawDataToDisk(space_); - build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path; - - auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix(); - build_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix; - - if (GetIndexType() == knowhere::IndexEnum::INDEX_DISKANN) { - auto num_threads = GetValueFromConfig( - build_config, DISK_ANN_BUILD_THREAD_NUM); - AssertInfo( - num_threads.has_value(), - "param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty"); - build_config[DISK_ANN_THREADS_NUM] = - std::atoi(num_threads.value().c_str()); - } - - auto opt_fields = GetValueFromConfig(config, VEC_OPT_FIELDS); - if (opt_fields.has_value() && index_.IsAdditionalScalarSupported()) { - build_config[VEC_OPT_FIELDS_PATH] = - file_manager_->CacheOptFieldToDisk(opt_fields.value()); - // `partition_key_isolation` is already in the config, so it falls through - // into the index Build call directly - } - - build_config.erase("insert_files"); - build_config.erase(VEC_OPT_FIELDS); - index_.Build({}, build_config); - - auto local_chunk_manager = - storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - auto segment_id = file_manager_->GetFieldDataMeta().segment_id; - local_chunk_manager->RemoveDir( - storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id)); -} - template void VectorDiskAnnIndex::Build(const Config& config) { diff --git a/internal/core/src/index/VectorDiskIndex.h b/internal/core/src/index/VectorDiskIndex.h index 0fa4256801..d079bab4e5 100644 --- a/internal/core/src/index/VectorDiskIndex.h +++ b/internal/core/src/index/VectorDiskIndex.h @@ -21,7 +21,6 @@ #include "index/VectorIndex.h" #include "storage/DiskFileManagerImpl.h" -#include "storage/space.h" namespace milvus::index { @@ -35,14 +34,6 @@ class VectorDiskAnnIndex : public VectorIndex { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - explicit VectorDiskAnnIndex( - const IndexType& index_type, - const MetricType& metric_type, - const IndexVersion& version, - std::shared_ptr space, - const storage::FileManagerContext& file_manager_context = - storage::FileManagerContext()); - BinarySet Serialize(const Config& config) override { // deprecated BinarySet binary_set; @@ -58,9 +49,6 @@ class VectorDiskAnnIndex : public VectorIndex { BinarySet Upload(const Config& config = {}) override; - BinarySet - UploadV2(const Config& config = {}) override; - int64_t Count() override { return index_.Count(); @@ -73,9 +61,6 @@ class VectorDiskAnnIndex : public VectorIndex { void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; - void - LoadV2(const Config& config = {}) override; - void BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override; @@ -83,9 +68,6 @@ class VectorDiskAnnIndex : public VectorIndex { void Build(const Config& config = {}) override; - void - BuildV2(const Config& config = {}) override; - void Query(const DatasetPtr dataset, const SearchInfo& search_info, @@ -119,7 +101,6 @@ class VectorDiskAnnIndex : public VectorIndex { knowhere::Index index_; std::shared_ptr file_manager_; uint32_t search_beamwidth_ = 8; - std::shared_ptr space_; }; template diff --git a/internal/core/src/index/VectorMemIndex.cpp b/internal/core/src/index/VectorMemIndex.cpp index 9861222548..0f515a442d 100644 --- a/internal/core/src/index/VectorMemIndex.cpp +++ b/internal/core/src/index/VectorMemIndex.cpp @@ -48,7 +48,6 @@ #include "storage/DataCodec.h" #include "storage/MemFileManagerImpl.h" #include "storage/ThreadPools.h" -#include "storage/space.h" #include "storage/Util.h" #include "monitor/prometheus_client.h" @@ -83,69 +82,6 @@ VectorMemIndex::VectorMemIndex( } } -template -VectorMemIndex::VectorMemIndex( - const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) - : VectorIndex(create_index_info.index_type, create_index_info.metric_type), - space_(space), - create_index_info_(create_index_info) { - CheckMetricTypeSupport(create_index_info.metric_type); - AssertInfo(!is_unsupported(create_index_info.index_type, - create_index_info.metric_type), - create_index_info.index_type + - " doesn't support metric: " + create_index_info.metric_type); - if (file_manager_context.Valid()) { - file_manager_ = std::make_shared( - file_manager_context, file_manager_context.space_); - AssertInfo(file_manager_ != nullptr, "create file manager failed!"); - } - auto version = create_index_info.index_engine_version; - CheckCompatible(version); - auto get_index_obj = - knowhere::IndexFactory::Instance().Create(GetIndexType(), version); - if (get_index_obj.has_value()) { - index_ = get_index_obj.value(); - } else { - auto err = get_index_obj.error(); - if (err == knowhere::Status::invalid_index_error) { - PanicInfo(ErrorCode::Unsupported, get_index_obj.what()); - } - PanicInfo(ErrorCode::KnowhereError, get_index_obj.what()); - } -} - -template -BinarySet -VectorMemIndex::UploadV2(const Config& config) { - auto binary_set = Serialize(config); - file_manager_->AddFileV2(binary_set); - - auto store_version = file_manager_->space()->GetCurrentVersion(); - std::shared_ptr store_version_data( - new uint8_t[sizeof(store_version)]); - store_version_data[0] = store_version & 0x00000000000000FF; - store_version = store_version >> 8; - store_version_data[1] = store_version & 0x00000000000000FF; - store_version = store_version >> 8; - store_version_data[2] = store_version & 0x00000000000000FF; - store_version = store_version >> 8; - store_version_data[3] = store_version & 0x00000000000000FF; - store_version = store_version >> 8; - store_version_data[4] = store_version & 0x00000000000000FF; - store_version = store_version >> 8; - store_version_data[5] = store_version & 0x00000000000000FF; - store_version = store_version >> 8; - store_version_data[6] = store_version & 0x00000000000000FF; - store_version = store_version >> 8; - store_version_data[7] = store_version & 0x00000000000000FF; - BinarySet ret; - ret.Append("index_store_version", store_version_data, 8); - - return ret; -} - template knowhere::expected> VectorMemIndex::VectorIterators(const milvus::DatasetPtr dataset, @@ -202,105 +138,6 @@ VectorMemIndex::Load(const BinarySet& binary_set, const Config& config) { LoadWithoutAssemble(binary_set, config); } -template -void -VectorMemIndex::LoadV2(const Config& config) { - if (config.contains(kMmapFilepath)) { - return LoadFromFileV2(config); - } - - auto blobs = space_->StatisticsBlobs(); - std::unordered_set pending_index_files; - auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); - for (auto& blob : blobs) { - if (blob.name.rfind(index_prefix, 0) == 0) { - pending_index_files.insert(blob.name); - } - } - - auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META; - auto res = space_->GetBlobByteSize(std::string(slice_meta_file)); - std::map index_datas{}; - - if (!res.ok() && !res.status().IsFileNotFound()) { - PanicInfo(DataFormatBroken, "failed to read blob"); - } - bool slice_meta_exist = res.ok(); - - auto read_blob = [&](const std::string& file_name) - -> std::unique_ptr { - auto res = space_->GetBlobByteSize(file_name); - if (!res.ok()) { - PanicInfo(DataFormatBroken, "unable to read index blob"); - } - auto index_blob_data = - std::shared_ptr(new uint8_t[res.value()]); - auto status = space_->ReadBlob(file_name, index_blob_data.get()); - if (!status.ok()) { - PanicInfo(DataFormatBroken, "unable to read index blob"); - } - return storage::DeserializeFileData(index_blob_data, res.value()); - }; - if (slice_meta_exist) { - pending_index_files.erase(slice_meta_file); - auto slice_meta_sz = res.value(); - auto slice_meta_data = - std::shared_ptr(new uint8_t[slice_meta_sz]); - auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get()); - if (!status.ok()) { - PanicInfo(DataFormatBroken, "unable to read slice meta"); - } - auto raw_slice_meta = - storage::DeserializeFileData(slice_meta_data, slice_meta_sz); - Config meta_data = Config::parse(std::string( - static_cast(raw_slice_meta->GetFieldData()->Data()), - raw_slice_meta->GetFieldData()->Size())); - for (auto& item : meta_data[META]) { - std::string prefix = item[NAME]; - int slice_num = item[SLICE_NUM]; - auto total_len = static_cast(item[TOTAL_LEN]); - // todo: support nullable index - auto new_field_data = milvus::storage::CreateFieldData( - DataType::INT8, false, 1, total_len); - for (auto i = 0; i < slice_num; ++i) { - std::string file_name = - index_prefix + "/" + GenSlicedFileName(prefix, i); - auto raw_index_blob = read_blob(file_name); - new_field_data->FillFieldData( - raw_index_blob->GetFieldData()->Data(), - raw_index_blob->GetFieldData()->Size()); - pending_index_files.erase(file_name); - } - AssertInfo( - new_field_data->IsFull(), - "index len is inconsistent after disassemble and assemble"); - index_datas[prefix] = new_field_data; - } - } - - if (!pending_index_files.empty()) { - for (auto& file_name : pending_index_files) { - auto raw_index_blob = read_blob(file_name); - index_datas.insert({file_name, raw_index_blob->GetFieldData()}); - } - } - LOG_INFO("construct binary set..."); - BinarySet binary_set; - for (auto& [key, data] : index_datas) { - LOG_INFO("add index data to binary set: {}", key); - auto size = data->Size(); - auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction - auto buf = std::shared_ptr( - (uint8_t*)const_cast(data->Data()), deleter); - auto file_name = key.substr(key.find_last_of('/') + 1); - binary_set.Append(file_name, buf, size); - } - - LOG_INFO("load index into Knowhere..."); - LoadWithoutAssemble(binary_set, config); - LOG_INFO("load vector index done"); -} - template void VectorMemIndex::Load(milvus::tracer::TraceContext ctx, @@ -442,58 +279,6 @@ VectorMemIndex::BuildWithDataset(const DatasetPtr& dataset, SetDim(index_.Dim()); } -template -void -VectorMemIndex::BuildV2(const Config& config) { - auto field_name = create_index_info_.field_name; - auto field_type = create_index_info_.field_type; - auto dim = create_index_info_.dim; - auto reader = space_->ScanData(); - std::vector field_datas; - for (auto rec : *reader) { - if (!rec.ok()) { - PanicInfo(IndexBuildError, - "failed to read data: {}", - rec.status().ToString()); - } - auto data = rec.ValueUnsafe(); - if (data == nullptr) { - break; - } - auto total_num_rows = data->num_rows(); - auto col_data = data->GetColumnByName(field_name); - // todo: support nullable index - auto field_data = - storage::CreateFieldData(field_type, false, dim, total_num_rows); - field_data->FillFieldData(col_data); - field_datas.push_back(field_data); - } - int64_t total_size = 0; - int64_t total_num_rows = 0; - for (const auto& data : field_datas) { - total_size += data->Size(); - total_num_rows += data->get_num_rows(); - AssertInfo(dim == 0 || dim == data->get_dim(), - "inconsistent dim value between field datas!"); - } - - auto buf = std::shared_ptr(new uint8_t[total_size]); - int64_t offset = 0; - for (auto data : field_datas) { - std::memcpy(buf.get() + offset, data->Data(), data->Size()); - offset += data->Size(); - data.reset(); - } - field_datas.clear(); - - Config build_config; - build_config.update(config); - build_config.erase("insert_files"); - - auto dataset = GenDataset(total_num_rows, dim, buf.get()); - BuildWithDataset(dataset, build_config); -} - template void VectorMemIndex::Build(const Config& config) { @@ -852,109 +637,6 @@ void VectorMemIndex::LoadFromFile(const Config& config) { .count()); } -template -void -VectorMemIndex::LoadFromFileV2(const Config& config) { - auto filepath = GetValueFromConfig(config, kMmapFilepath); - AssertInfo(filepath.has_value(), "mmap filepath is empty when load index"); - - std::filesystem::create_directories( - std::filesystem::path(filepath.value()).parent_path()); - - auto file = File::Open(filepath.value(), O_CREAT | O_TRUNC | O_RDWR); - - auto blobs = space_->StatisticsBlobs(); - std::unordered_set pending_index_files; - auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); - for (auto& blob : blobs) { - if (blob.name.rfind(index_prefix, 0) == 0) { - pending_index_files.insert(blob.name); - } - } - - auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META; - auto res = space_->GetBlobByteSize(std::string(slice_meta_file)); - - if (!res.ok() && !res.status().IsFileNotFound()) { - PanicInfo(DataFormatBroken, "failed to read blob"); - } - bool slice_meta_exist = res.ok(); - - auto read_blob = [&](const std::string& file_name) - -> std::unique_ptr { - auto res = space_->GetBlobByteSize(file_name); - if (!res.ok()) { - PanicInfo(DataFormatBroken, "unable to read index blob"); - } - auto index_blob_data = - std::shared_ptr(new uint8_t[res.value()]); - auto status = space_->ReadBlob(file_name, index_blob_data.get()); - if (!status.ok()) { - PanicInfo(DataFormatBroken, "unable to read index blob"); - } - return storage::DeserializeFileData(index_blob_data, res.value()); - }; - if (slice_meta_exist) { - pending_index_files.erase(slice_meta_file); - auto slice_meta_sz = res.value(); - auto slice_meta_data = - std::shared_ptr(new uint8_t[slice_meta_sz]); - auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get()); - if (!status.ok()) { - PanicInfo(DataFormatBroken, "unable to read slice meta"); - } - auto raw_slice_meta = - storage::DeserializeFileData(slice_meta_data, slice_meta_sz); - Config meta_data = Config::parse(std::string( - static_cast(raw_slice_meta->GetFieldData()->Data()), - raw_slice_meta->GetFieldData()->Size())); - for (auto& item : meta_data[META]) { - std::string prefix = item[NAME]; - int slice_num = item[SLICE_NUM]; - auto total_len = static_cast(item[TOTAL_LEN]); - - for (auto i = 0; i < slice_num; ++i) { - std::string file_name = - index_prefix + "/" + GenSlicedFileName(prefix, i); - auto raw_index_blob = read_blob(file_name); - auto written = - file.Write(raw_index_blob->GetFieldData()->Data(), - raw_index_blob->GetFieldData()->Size()); - pending_index_files.erase(file_name); - } - } - } - - if (!pending_index_files.empty()) { - for (auto& file_name : pending_index_files) { - auto raw_index_blob = read_blob(file_name); - file.Write(raw_index_blob->GetFieldData()->Data(), - raw_index_blob->GetFieldData()->Size()); - } - } - file.Close(); - - LOG_INFO("load index into Knowhere..."); - auto conf = config; - conf.erase(kMmapFilepath); - conf[kEnableMmap] = true; - auto stat = index_.DeserializeFromFile(filepath.value(), conf); - if (stat != knowhere::Status::success) { - PanicInfo(DataFormatBroken, - "failed to Deserialize index: {}", - KnowhereStatusString(stat)); - } - - auto dim = index_.Dim(); - this->SetDim(index_.Dim()); - - auto ok = unlink(filepath->data()); - AssertInfo(ok == 0, - "failed to unlink mmap index file {}: {}", - filepath.value(), - strerror(errno)); - LOG_INFO("load vector index done"); -} template class VectorMemIndex; template class VectorMemIndex; template class VectorMemIndex; diff --git a/internal/core/src/index/VectorMemIndex.h b/internal/core/src/index/VectorMemIndex.h index 6d04020f55..637c96879a 100644 --- a/internal/core/src/index/VectorMemIndex.h +++ b/internal/core/src/index/VectorMemIndex.h @@ -25,7 +25,6 @@ #include "knowhere/index/index_factory.h" #include "index/VectorIndex.h" #include "storage/MemFileManagerImpl.h" -#include "storage/space.h" #include "index/IndexInfo.h" namespace milvus::index { @@ -40,9 +39,6 @@ class VectorMemIndex : public VectorIndex { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - explicit VectorMemIndex(const CreateIndexInfo& create_index_info, - const storage::FileManagerContext& file_manager, - std::shared_ptr space); BinarySet Serialize(const Config& config) override; @@ -52,9 +48,6 @@ class VectorMemIndex : public VectorIndex { void Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; - void - LoadV2(const Config& config = {}) override; - void BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) override; @@ -62,9 +55,6 @@ class VectorMemIndex : public VectorIndex { void Build(const Config& config = {}) override; - void - BuildV2(const Config& config = {}) override; - void AddWithDataset(const DatasetPtr& dataset, const Config& config) override; @@ -91,9 +81,6 @@ class VectorMemIndex : public VectorIndex { BinarySet Upload(const Config& config = {}) override; - BinarySet - UploadV2(const Config& config = {}) override; - knowhere::expected> VectorIterators(const DatasetPtr dataset, const knowhere::Json& json, @@ -107,14 +94,10 @@ class VectorMemIndex : public VectorIndex { void LoadFromFile(const Config& config); - void - LoadFromFileV2(const Config& config); - protected: Config config_; knowhere::Index index_; std::shared_ptr file_manager_; - std::shared_ptr space_; CreateIndexInfo create_index_info_; }; diff --git a/internal/core/src/indexbuilder/IndexCreatorBase.h b/internal/core/src/indexbuilder/IndexCreatorBase.h index 940b70e077..b6a2ac44b2 100644 --- a/internal/core/src/indexbuilder/IndexCreatorBase.h +++ b/internal/core/src/indexbuilder/IndexCreatorBase.h @@ -26,9 +26,6 @@ class IndexCreatorBase { virtual void Build() = 0; - virtual void - BuildV2() = 0; - virtual milvus::BinarySet Serialize() = 0; @@ -38,9 +35,6 @@ class IndexCreatorBase { virtual BinarySet Upload() = 0; - - virtual BinarySet - UploadV2() = 0; }; using IndexCreatorBasePtr = std::unique_ptr; diff --git a/internal/core/src/indexbuilder/IndexFactory.h b/internal/core/src/indexbuilder/IndexFactory.h index 1e2cc53f38..6aa0b48302 100644 --- a/internal/core/src/indexbuilder/IndexFactory.h +++ b/internal/core/src/indexbuilder/IndexFactory.h @@ -23,7 +23,6 @@ #include "indexbuilder/type_c.h" #include "storage/Types.h" #include "storage/FileManager.h" -#include "storage/space.h" namespace milvus::indexbuilder { @@ -74,41 +73,6 @@ class IndexFactory { fmt::format("invalid type is {}", invalid_dtype_msg)); } } - - IndexCreatorBasePtr - CreateIndex(DataType type, - const std::string& field_name, - const int64_t dim, - Config& config, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - auto invalid_dtype_msg = - std::string("invalid data type: ") + std::to_string(int(type)); - - switch (type) { - case DataType::BOOL: - case DataType::INT8: - case DataType::INT16: - case DataType::INT32: - case DataType::INT64: - case DataType::FLOAT: - case DataType::DOUBLE: - case DataType::VARCHAR: - case DataType::STRING: - return CreateScalarIndex( - type, config, file_manager_context, space); - - case DataType::VECTOR_FLOAT: - case DataType::VECTOR_BINARY: - case DataType::VECTOR_FLOAT16: - case DataType::VECTOR_BFLOAT16: - case DataType::VECTOR_SPARSE_FLOAT: - return std::make_unique( - type, field_name, dim, config, file_manager_context, space); - default: - PanicInfo(ErrorCode::DataTypeInvalid, invalid_dtype_msg); - } - } }; } // namespace milvus::indexbuilder diff --git a/internal/core/src/indexbuilder/ScalarIndexCreator.cpp b/internal/core/src/indexbuilder/ScalarIndexCreator.cpp index 566e36c5c6..855be14760 100644 --- a/internal/core/src/indexbuilder/ScalarIndexCreator.cpp +++ b/internal/core/src/indexbuilder/ScalarIndexCreator.cpp @@ -36,18 +36,6 @@ ScalarIndexCreator::ScalarIndexCreator( index_info, file_manager_context); } -ScalarIndexCreator::ScalarIndexCreator( - DataType dtype, - Config& config, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) - : config_(config), dtype_(dtype) { - milvus::index::CreateIndexInfo index_info; - index_info.field_type = dtype_; - index_info.index_type = index_type(); - index_ = index::IndexFactory::GetInstance().CreateIndex( - index_info, file_manager_context, std::move(space)); -} void ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) { auto size = dataset->GetRows(); @@ -60,11 +48,6 @@ ScalarIndexCreator::Build() { index_->Build(config_); } -void -ScalarIndexCreator::BuildV2() { - index_->BuildV2(config_); -} - milvus::BinarySet ScalarIndexCreator::Serialize() { return index_->Serialize(config_); @@ -84,10 +67,4 @@ BinarySet ScalarIndexCreator::Upload() { return index_->Upload(); } - -BinarySet -ScalarIndexCreator::UploadV2() { - return index_->UploadV2(); -} - } // namespace milvus::indexbuilder diff --git a/internal/core/src/indexbuilder/ScalarIndexCreator.h b/internal/core/src/indexbuilder/ScalarIndexCreator.h index 8ca9071eff..2ac34050f0 100644 --- a/internal/core/src/indexbuilder/ScalarIndexCreator.h +++ b/internal/core/src/indexbuilder/ScalarIndexCreator.h @@ -17,7 +17,6 @@ #include #include "index/Index.h" #include "index/ScalarIndex.h" -#include "storage/space.h" namespace milvus::indexbuilder { @@ -27,19 +26,12 @@ class ScalarIndexCreator : public IndexCreatorBase { Config& config, const storage::FileManagerContext& file_manager_context); - ScalarIndexCreator(DataType data_type, - Config& config, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); void Build(const milvus::DatasetPtr& dataset) override; void Build() override; - void - BuildV2() override; - milvus::BinarySet Serialize() override; @@ -49,9 +41,6 @@ class ScalarIndexCreator : public IndexCreatorBase { BinarySet Upload() override; - BinarySet - UploadV2() override; - private: std::string index_type(); @@ -72,13 +61,4 @@ CreateScalarIndex(DataType dtype, return std::make_unique( dtype, config, file_manager_context); } - -inline ScalarIndexCreatorPtr -CreateScalarIndex(DataType dtype, - Config& config, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) { - return std::make_unique( - dtype, config, file_manager_context, space); -} } // namespace milvus::indexbuilder diff --git a/internal/core/src/indexbuilder/VecIndexCreator.cpp b/internal/core/src/indexbuilder/VecIndexCreator.cpp index 41caefd8af..789f10caa4 100644 --- a/internal/core/src/indexbuilder/VecIndexCreator.cpp +++ b/internal/core/src/indexbuilder/VecIndexCreator.cpp @@ -24,7 +24,7 @@ VecIndexCreator::VecIndexCreator( DataType data_type, Config& config, const storage::FileManagerContext& file_manager_context) - : VecIndexCreator(data_type, "", 0, config, file_manager_context, nullptr) { + : VecIndexCreator(data_type, "", 0, config, file_manager_context) { } VecIndexCreator::VecIndexCreator( @@ -32,9 +32,8 @@ VecIndexCreator::VecIndexCreator( const std::string& field_name, const int64_t dim, Config& config, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space) - : config_(config), data_type_(data_type), space_(std::move(space)) { + const storage::FileManagerContext& file_manager_context) + : config_(config), data_type_(data_type) { index::CreateIndexInfo index_info; index_info.field_type = data_type_; index_info.index_type = index::GetIndexTypeFromConfig(config_); @@ -45,7 +44,7 @@ VecIndexCreator::VecIndexCreator( index_info.dim = dim; index_ = index::IndexFactory::GetInstance().CreateIndex( - index_info, file_manager_context, space_); + index_info, file_manager_context); AssertInfo(index_ != nullptr, "[VecIndexCreator]Index is null after create index"); } @@ -65,11 +64,6 @@ VecIndexCreator::Build() { index_->Build(config_); } -void -VecIndexCreator::BuildV2() { - index_->BuildV2(config_); -} - milvus::BinarySet VecIndexCreator::Serialize() { return index_->Serialize(config_); @@ -95,11 +89,6 @@ VecIndexCreator::Upload() { return index_->Upload(); } -BinarySet -VecIndexCreator::UploadV2() { - return index_->UploadV2(); -} - void VecIndexCreator::CleanLocalData() { auto vector_index = dynamic_cast(index_.get()); diff --git a/internal/core/src/indexbuilder/VecIndexCreator.h b/internal/core/src/indexbuilder/VecIndexCreator.h index 2973f4f3b3..d8d605f321 100644 --- a/internal/core/src/indexbuilder/VecIndexCreator.h +++ b/internal/core/src/indexbuilder/VecIndexCreator.h @@ -20,7 +20,6 @@ #include "index/VectorIndex.h" #include "index/IndexInfo.h" #include "storage/Types.h" -#include "storage/space.h" namespace milvus::indexbuilder { @@ -37,17 +36,14 @@ class VecIndexCreator : public IndexCreatorBase { const std::string& field_name, const int64_t dim, Config& config, - const storage::FileManagerContext& file_manager_context, - std::shared_ptr space); + const storage::FileManagerContext& file_manager_context); + void Build(const milvus::DatasetPtr& dataset) override; void Build() override; - void - BuildV2() override; - milvus::BinarySet Serialize() override; @@ -65,9 +61,6 @@ class VecIndexCreator : public IndexCreatorBase { BinarySet Upload() override; - BinarySet - UploadV2() override; - public: void CleanLocalData(); @@ -76,8 +69,6 @@ class VecIndexCreator : public IndexCreatorBase { milvus::index::IndexBasePtr index_ = nullptr; Config config_; DataType data_type_; - - std::shared_ptr space_; }; } // namespace milvus::indexbuilder diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 48e461fd01..fa045a527b 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -15,7 +15,6 @@ #include "fmt/core.h" #include "indexbuilder/type_c.h" #include "log/Log.h" -#include "storage/options.h" #ifdef __linux__ #include @@ -31,7 +30,6 @@ #include "index/Utils.h" #include "pb/index_cgo_msg.pb.h" #include "storage/Util.h" -#include "storage/space.h" #include "index/Meta.h" using namespace milvus; @@ -234,107 +232,6 @@ CreateIndex(CIndex* res_index, } } -CStatus -CreateIndexV2(CIndex* res_index, - const uint8_t* serialized_build_index_info, - const uint64_t len) { - try { - auto build_index_info = - std::make_unique(); - auto res = - build_index_info->ParseFromArray(serialized_build_index_info, len); - AssertInfo(res, "Unmarshall build index info failed"); - auto field_type = - static_cast(build_index_info->field_schema().data_type()); - - milvus::index::CreateIndexInfo index_info; - index_info.field_type = field_type; - index_info.dim = build_index_info->dim(); - - auto storage_config = - get_storage_config(build_index_info->storage_config()); - auto config = get_config(build_index_info); - // get index type - auto index_type = milvus::index::GetValueFromConfig( - config, "index_type"); - AssertInfo(index_type.has_value(), "index type is empty"); - index_info.index_type = index_type.value(); - - auto engine_version = build_index_info->current_index_version(); - index_info.index_engine_version = engine_version; - config[milvus::index::INDEX_ENGINE_VERSION] = - std::to_string(engine_version); - - // get metric type - if (milvus::IsVectorDataType(field_type)) { - auto metric_type = milvus::index::GetValueFromConfig( - config, "metric_type"); - AssertInfo(metric_type.has_value(), "metric type is empty"); - index_info.metric_type = metric_type.value(); - } - - milvus::storage::FieldDataMeta field_meta{ - build_index_info->collectionid(), - build_index_info->partitionid(), - build_index_info->segmentid(), - build_index_info->field_schema().fieldid(), - build_index_info->field_schema()}; - milvus::storage::IndexMeta index_meta{ - build_index_info->segmentid(), - build_index_info->field_schema().fieldid(), - build_index_info->buildid(), - build_index_info->index_version(), - "", - build_index_info->field_schema().name(), - field_type, - build_index_info->dim(), - }; - - auto store_space = milvus_storage::Space::Open( - build_index_info->store_path(), - milvus_storage::Options{nullptr, - build_index_info->store_version()}); - AssertInfo(store_space.ok() && store_space.has_value(), - "create space failed: {}", - store_space.status().ToString()); - - auto index_space = milvus_storage::Space::Open( - build_index_info->index_store_path(), - milvus_storage::Options{.schema = store_space.value()->schema()}); - AssertInfo(index_space.ok() && index_space.has_value(), - "create space failed: {}", - index_space.status().ToString()); - - LOG_INFO("init space success"); - auto chunk_manager = - milvus::storage::CreateChunkManager(storage_config); - milvus::storage::FileManagerContext fileManagerContext( - field_meta, - index_meta, - chunk_manager, - std::move(index_space.value())); - - auto index = - milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex( - field_type, - build_index_info->field_schema().name(), - build_index_info->dim(), - config, - fileManagerContext, - std::move(store_space.value())); - index->BuildV2(); - *res_index = index.release(); - return milvus::SuccessCStatus(); - } catch (SegcoreError& e) { - auto status = CStatus(); - status.error_code = e.get_error_code(); - status.error_msg = strdup(e.what()); - return status; - } catch (std::exception& e) { - return milvus::FailureCStatus(&e); - } -} - CStatus DeleteIndex(CIndex index) { auto status = CStatus(); @@ -823,29 +720,6 @@ SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set) { return status; } -CStatus -SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set) { - auto status = CStatus(); - try { - AssertInfo( - index, - "failed to serialize index to binary set, passed index was null"); - - auto real_index = - reinterpret_cast(index); - - auto binary = - std::make_unique(real_index->UploadV2()); - *c_binary_set = binary.release(); - status.error_code = Success; - status.error_msg = ""; - } catch (std::exception& e) { - status.error_code = UnexpectedError; - status.error_msg = strdup(e.what()); - } - return status; -} - CStatus AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info, const int64_t field_id, diff --git a/internal/core/src/indexbuilder/index_c.h b/internal/core/src/indexbuilder/index_c.h index 53ce5552fe..ce94d68e8f 100644 --- a/internal/core/src/indexbuilder/index_c.h +++ b/internal/core/src/indexbuilder/index_c.h @@ -128,14 +128,6 @@ AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info, CStatus SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set); -CStatus -SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set); - -CStatus -CreateIndexV2(CIndex* res_index, - const uint8_t* serialized_build_index_info, - const uint64_t len); - CStatus AppendIndexStorageInfo(CBuildIndexInfo c_build_index_info, const char* c_data_store_path, diff --git a/internal/core/src/segcore/CMakeLists.txt b/internal/core/src/segcore/CMakeLists.txt index b783afb361..378240aee9 100644 --- a/internal/core/src/segcore/CMakeLists.txt +++ b/internal/core/src/segcore/CMakeLists.txt @@ -43,6 +43,6 @@ set(SEGCORE_FILES reduce/GroupReduce.cpp) add_library(milvus_segcore SHARED ${SEGCORE_FILES}) -target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus-storage milvus_futures) +target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus_futures) install(TARGETS milvus_segcore DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 9e65293868..dabe2f5029 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -33,8 +33,6 @@ #include "storage/RemoteChunkManagerSingleton.h" #include "storage/Util.h" #include "storage/ThreadPools.h" -#include "storage/options.h" -#include "storage/space.h" namespace milvus::segcore { @@ -280,89 +278,6 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) { reserved_offset + num_rows); } -void -SegmentGrowingImpl::LoadFieldDataV2(const LoadFieldDataInfo& infos) { - // schema don't include system field - AssertInfo(infos.field_infos.size() == schema_->size() + 2, - "lost some field data when load for growing segment"); - AssertInfo(infos.field_infos.find(TimestampFieldID.get()) != - infos.field_infos.end(), - "timestamps field data should be included"); - AssertInfo( - infos.field_infos.find(RowFieldID.get()) != infos.field_infos.end(), - "rowID field data should be included"); - auto primary_field_id = - schema_->get_primary_field_id().value_or(FieldId(-1)); - AssertInfo(primary_field_id.get() != INVALID_FIELD_ID, "Primary key is -1"); - AssertInfo(infos.field_infos.find(primary_field_id.get()) != - infos.field_infos.end(), - "primary field data should be included"); - - size_t num_rows = storage::GetNumRowsForLoadInfo(infos); - auto reserved_offset = PreInsert(num_rows); - for (auto& [id, info] : infos.field_infos) { - auto field_id = FieldId(id); - auto field_data_info = FieldDataInfo(field_id.get(), num_rows); - auto& pool = - ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE); - auto res = milvus_storage::Space::Open( - infos.url, milvus_storage::Options{nullptr, infos.storage_version}); - AssertInfo(res.ok(), "init space failed"); - std::shared_ptr space = std::move(res.value()); - auto load_future = pool.Submit( - LoadFieldDatasFromRemote2, space, schema_, field_data_info); - auto field_data = - milvus::storage::CollectFieldDataChannel(field_data_info.channel); - if (field_id == TimestampFieldID) { - // step 2: sort timestamp - // query node already guarantees that the timestamp is ordered, avoid field data copy in c++ - - // step 3: fill into Segment.ConcurrentVector - insert_record_.timestamps_.set_data_raw(reserved_offset, - field_data); - continue; - } - - if (field_id == RowFieldID) { - continue; - } - - if (!indexing_record_.SyncDataWithIndex(field_id)) { - insert_record_.get_data_base(field_id)->set_data_raw( - reserved_offset, field_data); - } - if (segcore_config_.get_enable_interim_segment_index()) { - auto offset = reserved_offset; - for (auto& data : field_data) { - auto row_count = data->get_num_rows(); - indexing_record_.AppendingIndex( - offset, row_count, field_id, data, insert_record_); - offset += row_count; - } - } - try_remove_chunks(field_id); - - if (field_id == primary_field_id) { - insert_record_.insert_pks(field_data); - } - - // update average row data size - auto field_meta = (*schema_)[field_id]; - if (IsVariableDataType(field_meta.get_data_type())) { - SegmentInternalInterface::set_field_avg_size( - field_id, - num_rows, - storage::GetByteSizeOfFieldDatas(field_data)); - } - - // update the mem size - stats_.mem_size += storage::GetByteSizeOfFieldDatas(field_data); - } - - // step 5: update small indexes - insert_record_.ack_responder_.AddSegment(reserved_offset, - reserved_offset + num_rows); -} SegcoreError SegmentGrowingImpl::Delete(int64_t reserved_begin, int64_t size, diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index e5000b9c08..37eaff6cb4 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -64,8 +64,6 @@ class SegmentGrowingImpl : public SegmentGrowing { void LoadFieldData(const LoadFieldDataInfo& info) override; - void - LoadFieldDataV2(const LoadFieldDataInfo& info) override; void RemoveDuplicatePkRecords() override; diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 3b8ec4af0a..958fe73127 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -115,9 +115,6 @@ class SegmentInterface { virtual void LoadFieldData(const LoadFieldDataInfo& info) = 0; - virtual void - LoadFieldDataV2(const LoadFieldDataInfo& info) = 0; - virtual void RemoveDuplicatePkRecords() = 0; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 56925b8e8d..3d1c215b75 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -281,59 +281,6 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) { } } -void -SegmentSealedImpl::LoadFieldDataV2(const LoadFieldDataInfo& load_info) { - // TODO(SPARSE): support storage v2 - // NOTE: lock only when data is ready to avoid starvation - // only one field for now, parallel load field data in golang - size_t num_rows = storage::GetNumRowsForLoadInfo(load_info); - - for (auto& [id, info] : load_info.field_infos) { - AssertInfo(info.row_count > 0, "The row count of field data is 0"); - - auto field_id = FieldId(id); - auto insert_files = info.insert_files; - auto field_data_info = - FieldDataInfo(field_id.get(), num_rows, load_info.mmap_dir_path); - - LOG_INFO("segment {} loads field {} with num_rows {}", - this->get_segment_id(), - field_id.get(), - num_rows); - - auto parallel_degree = static_cast( - DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE); - field_data_info.channel->set_capacity(parallel_degree * 2); - auto& pool = - ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE); - // auto load_future = pool.Submit( - // LoadFieldDatasFromRemote, insert_files, field_data_info.channel); - - auto res = milvus_storage::Space::Open( - load_info.url, - milvus_storage::Options{nullptr, load_info.storage_version}); - AssertInfo(res.ok(), - fmt::format("init space failed: {}, error: {}", - load_info.url, - res.status().ToString())); - std::shared_ptr space = std::move(res.value()); - auto load_future = pool.Submit( - LoadFieldDatasFromRemote2, space, schema_, field_data_info); - LOG_INFO("segment {} submits load field {} task to thread pool", - this->get_segment_id(), - field_id.get()); - if (load_info.mmap_dir_path.empty() || - SystemProperty::Instance().IsSystem(field_id)) { - LoadFieldData(field_id, field_data_info); - } else { - MapFieldData(field_id, field_data_info); - } - LOG_INFO("segment {} loads field {} done", - this->get_segment_id(), - field_id.get()); - } -} - void SegmentSealedImpl::RemoveDuplicatePkRecords() { std::unique_lock lck(mutex_); diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 79ed3f136e..2059ba1c6d 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -50,8 +50,6 @@ class SegmentSealedImpl : public SegmentSealed { LoadIndex(const LoadIndexInfo& info) override; void LoadFieldData(const LoadFieldDataInfo& info) override; - void - LoadFieldDataV2(const LoadFieldDataInfo& info) override; // erase duplicate records when sealed segment loaded done void RemoveDuplicatePkRecords() override; diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index c874de7a84..a9ff746c2a 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -780,35 +780,7 @@ ReverseDataFromIndex(const index::IndexBase* index, return data_array; } -void -LoadFieldDatasFromRemote2(std::shared_ptr space, - SchemaPtr schema, - FieldDataInfo& field_data_info) { - auto reader = space->ScanData(); - for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { - if (!rec.ok()) { - PanicInfo(DataFormatBroken, "failed to read data"); - } - auto data = rec.ValueUnsafe(); - auto total_num_rows = data->num_rows(); - for (auto& field : schema->get_fields()) { - if (field.second.get_id().get() != field_data_info.field_id) { - continue; - } - auto col_data = - data->GetColumnByName(field.second.get_name().get()); - auto field_data = storage::CreateFieldData( - field.second.get_data_type(), - field.second.is_nullable(), - field.second.is_vector() ? field.second.get_dim() : 0, - total_num_rows); - field_data->FillFieldData(col_data); - field_data_info.channel->push(field_data); - } - } - field_data_info.channel->close(); -} // init segcore storage config first, and create default remote chunk manager // segcore use default remote chunk manager to load data from minio/s3 void diff --git a/internal/core/src/segcore/Utils.h b/internal/core/src/segcore/Utils.h index 51e9cf0d1b..5f308074a4 100644 --- a/internal/core/src/segcore/Utils.h +++ b/internal/core/src/segcore/Utils.h @@ -28,7 +28,6 @@ #include "log/Log.h" #include "segcore/DeletedRecord.h" #include "segcore/InsertRecord.h" -#include "storage/space.h" namespace milvus::segcore { @@ -119,10 +118,6 @@ void LoadFieldDatasFromRemote(const std::vector& remote_files, FieldDataChannelPtr channel); -void -LoadFieldDatasFromRemote2(std::shared_ptr space, - SchemaPtr schema, - FieldDataInfo& field_data_info); /** * Returns an index pointing to the first element in the range [first, last) such that `value < element` is true * (i.e. that is strictly greater than value), or last if no such element is found. diff --git a/internal/core/src/segcore/load_index_c.cpp b/internal/core/src/segcore/load_index_c.cpp index 3df3a92879..0db7f7e3ec 100644 --- a/internal/core/src/segcore/load_index_c.cpp +++ b/internal/core/src/segcore/load_index_c.cpp @@ -318,77 +318,6 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) { } } -CStatus -AppendIndexV3(CLoadIndexInfo c_load_index_info) { - try { - auto load_index_info = - (milvus::segcore::LoadIndexInfo*)c_load_index_info; - auto& index_params = load_index_info->index_params; - auto field_type = load_index_info->field_type; - - milvus::index::CreateIndexInfo index_info; - index_info.field_type = load_index_info->field_type; - - // get index type - AssertInfo(index_params.find("index_type") != index_params.end(), - "index type is empty"); - index_info.index_type = index_params.at("index_type"); - - // get metric type - if (milvus::IsVectorDataType(field_type)) { - AssertInfo(index_params.find("metric_type") != index_params.end(), - "metric type is empty for vector index"); - index_info.metric_type = index_params.at("metric_type"); - } - - milvus::storage::FieldDataMeta field_meta{ - load_index_info->collection_id, - load_index_info->partition_id, - load_index_info->segment_id, - load_index_info->field_id}; - milvus::storage::IndexMeta index_meta{load_index_info->segment_id, - load_index_info->field_id, - load_index_info->index_build_id, - load_index_info->index_version}; - auto config = milvus::index::ParseConfigFromIndexParams( - load_index_info->index_params); - - auto res = milvus_storage::Space::Open( - load_index_info->uri, - milvus_storage::Options{nullptr, - load_index_info->index_store_version}); - AssertInfo(res.ok(), "init space failed"); - std::shared_ptr space = std::move(res.value()); - - milvus::storage::FileManagerContext fileManagerContext( - field_meta, index_meta, nullptr, space); - load_index_info->index = - milvus::index::IndexFactory::GetInstance().CreateIndex( - index_info, fileManagerContext, space); - - if (!load_index_info->mmap_dir_path.empty() && - load_index_info->index->IsMmapSupported()) { - auto filepath = - std::filesystem::path(load_index_info->mmap_dir_path) / - std::to_string(load_index_info->segment_id) / - std::to_string(load_index_info->field_id) / - std::to_string(load_index_info->index_id); - - config[kMmapFilepath] = filepath.string(); - } - - load_index_info->index->LoadV2(config); - auto status = CStatus(); - status.error_code = milvus::Success; - status.error_msg = ""; - return status; - } catch (std::exception& e) { - auto status = CStatus(); - status.error_code = milvus::UnexpectedError; - status.error_msg = strdup(e.what()); - return status; - } -} CStatus AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) { try { diff --git a/internal/core/src/segcore/load_index_c.h b/internal/core/src/segcore/load_index_c.h index 8755aa7396..db0108dcd7 100644 --- a/internal/core/src/segcore/load_index_c.h +++ b/internal/core/src/segcore/load_index_c.h @@ -62,9 +62,6 @@ AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* file_path); CStatus AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info); -CStatus -AppendIndexV3(CLoadIndexInfo c_load_index_info); - CStatus AppendIndexEngineVersionToLoadInfo(CLoadIndexInfo c_load_index_info, int32_t index_engine_version); diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index d8e8421f46..f45d58ad2a 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -29,7 +29,6 @@ #include "storage/Util.h" #include "futures/Future.h" #include "futures/Executor.h" -#include "storage/space.h" ////////////////////////////// common interfaces ////////////////////////////// CStatus @@ -339,20 +338,6 @@ RemoveDuplicatePkRecords(CSegmentInterface c_segment) { } } -CStatus -LoadFieldDataV2(CSegmentInterface c_segment, - CLoadFieldDataInfo c_load_field_data_info) { - try { - auto segment = - reinterpret_cast(c_segment); - AssertInfo(segment != nullptr, "segment conversion failed"); - auto load_info = (LoadFieldDataInfo*)c_load_field_data_info; - segment->LoadFieldDataV2(*load_info); - return milvus::SuccessCStatus(); - } catch (std::exception& e) { - return milvus::FailureCStatus(&e); - } -} // just for test CStatus LoadFieldRawData(CSegmentInterface c_segment, diff --git a/internal/core/src/segcore/segment_c.h b/internal/core/src/segcore/segment_c.h index 2e579961d7..827ff5a831 100644 --- a/internal/core/src/segcore/segment_c.h +++ b/internal/core/src/segcore/segment_c.h @@ -102,10 +102,6 @@ CStatus LoadFieldData(CSegmentInterface c_segment, CLoadFieldDataInfo load_field_data_info); -CStatus -LoadFieldDataV2(CSegmentInterface c_segment, - CLoadFieldDataInfo load_field_data_info); - CStatus RemoveDuplicatePkRecords(CSegmentInterface c_segment); diff --git a/internal/core/src/storage/CMakeLists.txt b/internal/core/src/storage/CMakeLists.txt index aa4b02f640..11cd21c5d9 100644 --- a/internal/core/src/storage/CMakeLists.txt +++ b/internal/core/src/storage/CMakeLists.txt @@ -69,7 +69,6 @@ if (DEFINED AZURE_BUILD_DIR) "-L${AZURE_BUILD_DIR} -lblob-chunk-manager" blob-chunk-manager milvus_common - milvus-storage milvus_monitor pthread ${CONAN_LIBS} @@ -77,7 +76,6 @@ if (DEFINED AZURE_BUILD_DIR) else () target_link_libraries(milvus_storage PUBLIC milvus_common - milvus-storage milvus_monitor pthread ${CONAN_LIBS} diff --git a/internal/core/src/storage/DiskFileManagerImpl.cpp b/internal/core/src/storage/DiskFileManagerImpl.cpp index 34332f5240..919c218b1f 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.cpp +++ b/internal/core/src/storage/DiskFileManagerImpl.cpp @@ -45,16 +45,6 @@ #include "storage/Util.h" namespace milvus::storage { - -DiskFileManagerImpl::DiskFileManagerImpl( - const FileManagerContext& fileManagerContext, - std::shared_ptr space) - : FileManagerImpl(fileManagerContext.fieldDataMeta, - fileManagerContext.indexMeta), - space_(space) { - rcm_ = fileManagerContext.chunkManagerPtr; -} - DiskFileManagerImpl::DiskFileManagerImpl( const FileManagerContext& fileManagerContext) : FileManagerImpl(fileManagerContext.fieldDataMeta, @@ -78,39 +68,10 @@ std::string DiskFileManagerImpl::GetRemoteIndexPath(const std::string& file_name, int64_t slice_num) const { std::string remote_prefix; - if (space_ != nullptr) { - remote_prefix = GetRemoteIndexObjectPrefixV2(); - } else { - remote_prefix = GetRemoteIndexObjectPrefix(); - } + remote_prefix = GetRemoteIndexObjectPrefix(); return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num); } -bool -DiskFileManagerImpl::AddFileUsingSpace( - const std::string& local_file_name, - const std::vector& local_file_offsets, - const std::vector& remote_files, - const std::vector& remote_file_sizes) { - auto local_chunk_manager = - LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - for (int64_t i = 0; i < remote_files.size(); ++i) { - auto buf = - std::shared_ptr(new uint8_t[remote_file_sizes[i]]); - local_chunk_manager->Read(local_file_name, - local_file_offsets[i], - buf.get(), - remote_file_sizes[i]); - - auto status = - space_->WriteBlob(remote_files[i], buf.get(), remote_file_sizes[i]); - if (!status.ok()) { - return false; - } - } - return true; -} - bool DiskFileManagerImpl::AddFile(const std::string& file) noexcept { auto local_chunk_manager = @@ -204,85 +165,17 @@ DiskFileManagerImpl::AddBatchIndexFiles( } std::map res; - if (space_ != nullptr) { - res = PutIndexData(space_, - data_slices, - remote_file_sizes, - remote_files, - field_meta_, - index_meta_); - } else { - res = PutIndexData(rcm_.get(), - data_slices, - remote_file_sizes, - remote_files, - field_meta_, - index_meta_); - } + res = PutIndexData(rcm_.get(), + data_slices, + remote_file_sizes, + remote_files, + field_meta_, + index_meta_); for (auto& re : res) { remote_paths_to_size_[re.first] = re.second; } } -void -DiskFileManagerImpl::CacheIndexToDisk() { - auto blobs = space_->StatisticsBlobs(); - std::vector remote_files; - for (auto& blob : blobs) { - remote_files.push_back(blob.name); - } - auto local_chunk_manager = - LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - - std::map> index_slices; - for (auto& file_path : remote_files) { - auto pos = file_path.find_last_of("_"); - index_slices[file_path.substr(0, pos)].emplace_back( - std::stoi(file_path.substr(pos + 1))); - } - - for (auto& slices : index_slices) { - std::sort(slices.second.begin(), slices.second.end()); - } - - auto EstimateParallelDegree = [&](const std::string& file) -> uint64_t { - auto fileSize = space_->GetBlobByteSize(file); - return uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / fileSize.value()); - }; - - for (auto& slices : index_slices) { - auto prefix = slices.first; - auto local_index_file_name = - GetLocalIndexObjectPrefix() + - prefix.substr(prefix.find_last_of('/') + 1); - local_chunk_manager->CreateFile(local_index_file_name); - int64_t offset = 0; - std::vector batch_remote_files; - uint64_t max_parallel_degree = INT_MAX; - for (int& iter : slices.second) { - if (batch_remote_files.size() == max_parallel_degree) { - auto next_offset = CacheBatchIndexFilesToDiskV2( - batch_remote_files, local_index_file_name, offset); - offset = next_offset; - batch_remote_files.clear(); - } - auto origin_file = prefix + "_" + std::to_string(iter); - if (batch_remote_files.size() == 0) { - // Use first file size as average size to estimate - max_parallel_degree = EstimateParallelDegree(origin_file); - } - batch_remote_files.push_back(origin_file); - } - if (batch_remote_files.size() > 0) { - auto next_offset = CacheBatchIndexFilesToDiskV2( - batch_remote_files, local_index_file_name, offset); - offset = next_offset; - batch_remote_files.clear(); - } - local_paths_.emplace_back(local_index_file_name); - } -} - void DiskFileManagerImpl::CacheIndexToDisk( const std::vector& remote_files) { @@ -329,111 +222,6 @@ DiskFileManagerImpl::CacheIndexToDisk( } } -uint64_t -DiskFileManagerImpl::CacheBatchIndexFilesToDisk( - const std::vector& remote_files, - const std::string& local_file_name, - uint64_t local_file_init_offfset) { - auto local_chunk_manager = - LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - auto index_datas = GetObjectData(rcm_.get(), remote_files); - int batch_size = remote_files.size(); - AssertInfo(index_datas.size() == batch_size, - "inconsistent file num and index data num!"); - - uint64_t offset = local_file_init_offfset; - for (int i = 0; i < batch_size; ++i) { - auto index_data = index_datas[i].get()->GetFieldData(); - auto index_size = index_data->Size(); - auto uint8_data = - reinterpret_cast(const_cast(index_data->Data())); - local_chunk_manager->Write( - local_file_name, offset, uint8_data, index_size); - offset += index_size; - } - return offset; -} - -uint64_t -DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2( - const std::vector& remote_files, - const std::string& local_file_name, - uint64_t local_file_init_offfset) { - auto local_chunk_manager = - LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - auto index_datas = GetObjectData(space_, remote_files); - int batch_size = remote_files.size(); - AssertInfo(index_datas.size() == batch_size, - "inconsistent file num and index data num!"); - - uint64_t offset = local_file_init_offfset; - for (int i = 0; i < batch_size; ++i) { - auto index_data = index_datas[i]; - auto index_size = index_data->Size(); - auto uint8_data = - reinterpret_cast(const_cast(index_data->Data())); - local_chunk_manager->Write( - local_file_name, offset, uint8_data, index_size); - offset += index_size; - } - return offset; -} -template -std::string -DiskFileManagerImpl::CacheRawDataToDisk( - std::shared_ptr space) { - auto segment_id = GetFieldDataMeta().segment_id; - auto field_id = GetFieldDataMeta().field_id; - - auto local_chunk_manager = - LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - auto local_data_path = storage::GenFieldRawDataPathPrefix( - local_chunk_manager, segment_id, field_id) + - "raw_data"; - local_chunk_manager->CreateFile(local_data_path); - // file format - // num_rows(uint32) | dim(uint32) | index_data ([]uint8_t) - uint32_t num_rows = 0; - uint32_t dim = 0; - int64_t write_offset = sizeof(num_rows) + sizeof(dim); - auto reader = space->ScanData(); - for (auto rec : *reader) { - if (!rec.ok()) { - PanicInfo(IndexBuildError, - fmt::format("failed to read data: {}", - rec.status().ToString())); - } - auto data = rec.ValueUnsafe(); - if (data == nullptr) { - break; - } - auto total_num_rows = data->num_rows(); - num_rows += total_num_rows; - auto col_data = data->GetColumnByName(index_meta_.field_name); - auto field_data = storage::CreateFieldData( - index_meta_.field_type, false, index_meta_.dim, total_num_rows); - field_data->FillFieldData(col_data); - dim = field_data->get_dim(); - auto data_size = - field_data->get_num_rows() * milvus::GetVecRowSize(dim); - local_chunk_manager->Write(local_data_path, - write_offset, - const_cast(field_data->Data()), - data_size); - write_offset += data_size; - } - - // write num_rows and dim value to file header - write_offset = 0; - local_chunk_manager->Write( - local_data_path, write_offset, &num_rows, sizeof(num_rows)); - write_offset += sizeof(num_rows); - local_chunk_manager->Write( - local_data_path, write_offset, &dim, sizeof(dim)); - - return local_data_path; -} - void SortByPath(std::vector& paths) { std::sort(paths.begin(), @@ -682,92 +470,6 @@ WriteOptFieldsIvfMeta( write_offset += sizeof(num_of_fields); } -// write optional scalar fields ivf info in the following format without space among them -// | (meta) -// | version (uint8_t) | num_of_fields (uint32_t) | -// | (field_0) -// | field_id (int64_t) | num_of_unique_field_data (uint32_t) -// | size_0 (uint32_t) | offset_0 (uint32_t)... -// | size_1 | offset_0, offset_1, ... -std::string -DiskFileManagerImpl::CacheOptFieldToDisk( - std::shared_ptr space, OptFieldT& fields_map) { - const uint32_t num_of_fields = fields_map.size(); - if (0 == num_of_fields) { - return ""; - } else if (num_of_fields > 1) { - PanicInfo( - ErrorCode::NotImplemented, - "vector index build with multiple fields is not supported yet"); - } - if (nullptr == space) { - LOG_ERROR("Failed to cache optional field. Space is null"); - return ""; - } - - auto segment_id = GetFieldDataMeta().segment_id; - auto vec_field_id = GetFieldDataMeta().field_id; - auto local_chunk_manager = - LocalChunkManagerSingleton::GetInstance().GetChunkManager(); - auto local_data_path = storage::GenFieldRawDataPathPrefix( - local_chunk_manager, segment_id, vec_field_id) + - std::string(VEC_OPT_FIELDS); - local_chunk_manager->CreateFile(local_data_path); - - uint64_t write_offset = 0; - WriteOptFieldsIvfMeta( - local_chunk_manager, local_data_path, num_of_fields, write_offset); - - std::unordered_set actual_field_ids; - auto reader = space->ScanData(); - for (auto& [field_id, tup] : fields_map) { - const auto& field_name = std::get<0>(tup); - const auto& field_type = std::get<1>(tup); - std::vector field_datas; - for (auto rec : *reader) { - if (!rec.ok()) { - PanicInfo(IndexBuildError, - fmt::format("failed to read optional field data: {}", - rec.status().ToString())); - } - auto data = rec.ValueUnsafe(); - if (data == nullptr) { - break; - } - auto total_num_rows = data->num_rows(); - if (0 == total_num_rows) { - LOG_WARN("optional field {} has no data", field_name); - return ""; - } - auto col_data = data->GetColumnByName(field_name); - auto field_data = - storage::CreateFieldData(field_type, false, 1, total_num_rows); - field_data->FillFieldData(col_data); - field_datas.emplace_back(field_data); - } - if (WriteOptFieldIvfData(field_type, - field_id, - local_chunk_manager, - local_data_path, - field_datas, - write_offset)) { - actual_field_ids.insert(field_id); - } - } - - if (actual_field_ids.size() != num_of_fields) { - write_offset = 0; - WriteOptFieldsIvfMeta(local_chunk_manager, - local_data_path, - actual_field_ids.size(), - write_offset); - if (actual_field_ids.empty()) { - return ""; - } - } - return local_data_path; -} - std::string DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) { const uint32_t num_of_fields = fields_map.size(); @@ -904,17 +606,4 @@ DiskFileManagerImpl::CacheRawDataToDisk( template std::string DiskFileManagerImpl::CacheRawDataToDisk( std::vector remote_files); -template std::string -DiskFileManagerImpl::CacheRawDataToDisk( - std::shared_ptr space); -template std::string -DiskFileManagerImpl::CacheRawDataToDisk( - std::shared_ptr space); -template std::string -DiskFileManagerImpl::CacheRawDataToDisk( - std::shared_ptr space); -template std::string -DiskFileManagerImpl::CacheRawDataToDisk( - std::shared_ptr space); - } // namespace milvus::storage diff --git a/internal/core/src/storage/DiskFileManagerImpl.h b/internal/core/src/storage/DiskFileManagerImpl.h index b059f8399d..fe212f6c7f 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.h +++ b/internal/core/src/storage/DiskFileManagerImpl.h @@ -25,8 +25,6 @@ #include "storage/IndexData.h" #include "storage/FileManager.h" #include "storage/ChunkManager.h" -#include "storage/space.h" - #include "common/Consts.h" namespace milvus::storage { @@ -35,9 +33,6 @@ class DiskFileManagerImpl : public FileManagerImpl { public: explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext); - explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext, - std::shared_ptr space); - virtual ~DiskFileManagerImpl(); virtual bool @@ -77,19 +72,6 @@ class DiskFileManagerImpl : public FileManagerImpl { void CacheIndexToDisk(const std::vector& remote_files); - void - CacheIndexToDisk(); - - uint64_t - CacheBatchIndexFilesToDisk(const std::vector& remote_files, - const std::string& local_file_name, - uint64_t local_file_init_offfset); - - uint64_t - CacheBatchIndexFilesToDiskV2(const std::vector& remote_files, - const std::string& local_file_name, - uint64_t local_file_init_offfset); - void AddBatchIndexFiles(const std::string& local_file_name, const std::vector& local_file_offsets, @@ -100,27 +82,12 @@ class DiskFileManagerImpl : public FileManagerImpl { std::string CacheRawDataToDisk(std::vector remote_files); - template - std::string - CacheRawDataToDisk(std::shared_ptr space); - std::string CacheOptFieldToDisk(OptFieldT& fields_map); - std::string - CacheOptFieldToDisk(std::shared_ptr space, - OptFieldT& fields_map); - - virtual bool - AddFileUsingSpace(const std::string& local_file_name, - const std::vector& local_file_offsets, - const std::vector& remote_files, - const std::vector& remote_file_sizes); - std::string GetRemoteIndexPrefix() const { - return space_ != nullptr ? GetRemoteIndexObjectPrefixV2() - : GetRemoteIndexObjectPrefix(); + return GetRemoteIndexObjectPrefix(); } private: @@ -141,8 +108,6 @@ class DiskFileManagerImpl : public FileManagerImpl { // remote file path std::map remote_paths_to_size_; - - std::shared_ptr space_; }; using DiskANNFileManagerImplPtr = std::shared_ptr; diff --git a/internal/core/src/storage/FileManager.h b/internal/core/src/storage/FileManager.h index 816beb2e8a..87f94a1b40 100644 --- a/internal/core/src/storage/FileManager.h +++ b/internal/core/src/storage/FileManager.h @@ -25,7 +25,6 @@ #include "log/Log.h" #include "storage/ChunkManager.h" #include "storage/Types.h" -#include "storage/space.h" namespace milvus::storage { @@ -40,15 +39,6 @@ struct FileManagerContext { chunkManagerPtr(chunkManagerPtr) { } - FileManagerContext(const FieldDataMeta& fieldDataMeta, - const IndexMeta& indexMeta, - const ChunkManagerPtr& chunkManagerPtr, - std::shared_ptr space) - : fieldDataMeta(fieldDataMeta), - indexMeta(indexMeta), - chunkManagerPtr(chunkManagerPtr), - space_(space) { - } bool Valid() const { return chunkManagerPtr != nullptr; @@ -57,7 +47,6 @@ struct FileManagerContext { FieldDataMeta fieldDataMeta; IndexMeta indexMeta; ChunkManagerPtr chunkManagerPtr; - std::shared_ptr space_; }; #define FILEMANAGER_TRY try { diff --git a/internal/core/src/storage/MemFileManagerImpl.cpp b/internal/core/src/storage/MemFileManagerImpl.cpp index 80bc90bb2e..a920708bf5 100644 --- a/internal/core/src/storage/MemFileManagerImpl.cpp +++ b/internal/core/src/storage/MemFileManagerImpl.cpp @@ -26,15 +26,6 @@ namespace milvus::storage { -MemFileManagerImpl::MemFileManagerImpl( - const FileManagerContext& fileManagerContext, - std::shared_ptr space) - : FileManagerImpl(fileManagerContext.fieldDataMeta, - fileManagerContext.indexMeta), - space_(space) { - rcm_ = fileManagerContext.chunkManagerPtr; -} - MemFileManagerImpl::MemFileManagerImpl( const FileManagerContext& fileManagerContext) : FileManagerImpl(fileManagerContext.fieldDataMeta, @@ -91,50 +82,6 @@ MemFileManagerImpl::AddFile(const BinarySet& binary_set) { return true; } -bool -MemFileManagerImpl::AddFileV2(const BinarySet& binary_set) { - std::vector data_slices; - std::vector slice_sizes; - std::vector slice_names; - - auto AddBatchIndexFiles = [&]() { - auto res = PutIndexData(space_, - data_slices, - slice_sizes, - slice_names, - field_meta_, - index_meta_); - for (auto& [file, size] : res) { - remote_paths_to_size_[file] = size; - } - }; - - auto remotePrefix = GetRemoteIndexObjectPrefixV2(); - int64_t batch_size = 0; - for (auto iter = binary_set.binary_map_.begin(); - iter != binary_set.binary_map_.end(); - iter++) { - if (batch_size >= DEFAULT_FIELD_MAX_MEMORY_LIMIT) { - AddBatchIndexFiles(); - data_slices.clear(); - slice_sizes.clear(); - slice_names.clear(); - batch_size = 0; - } - - data_slices.emplace_back(iter->second->data.get()); - slice_sizes.emplace_back(iter->second->size); - slice_names.emplace_back(remotePrefix + "/" + iter->first); - batch_size += iter->second->size; - } - - if (data_slices.size() > 0) { - AddBatchIndexFiles(); - } - - return true; -} - bool MemFileManagerImpl::LoadFile(const std::string& filename) noexcept { return true; diff --git a/internal/core/src/storage/MemFileManagerImpl.h b/internal/core/src/storage/MemFileManagerImpl.h index 1349cbeb41..8fffc1b387 100644 --- a/internal/core/src/storage/MemFileManagerImpl.h +++ b/internal/core/src/storage/MemFileManagerImpl.h @@ -25,7 +25,6 @@ #include "storage/IndexData.h" #include "storage/FileManager.h" #include "storage/ChunkManager.h" -#include "storage/space.h" namespace milvus::storage { @@ -33,9 +32,6 @@ class MemFileManagerImpl : public FileManagerImpl { public: explicit MemFileManagerImpl(const FileManagerContext& fileManagerContext); - MemFileManagerImpl(const FileManagerContext& fileManagerContext, - std::shared_ptr space); - virtual bool LoadFile(const std::string& filename) noexcept; @@ -63,14 +59,6 @@ class MemFileManagerImpl : public FileManagerImpl { bool AddFile(const BinarySet& binary_set); - bool - AddFileV2(const BinarySet& binary_set); - - std::shared_ptr - space() const { - return space_; - } - std::map GetRemotePathsToFileSize() const { return remote_paths_to_size_; @@ -79,7 +67,6 @@ class MemFileManagerImpl : public FileManagerImpl { private: // remote file path std::map remote_paths_to_size_; - std::shared_ptr space_; }; using MemFileManagerImplPtr = std::shared_ptr; diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index badfa00719..d8710d1126 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -516,22 +516,6 @@ DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager, return DeserializeFileData(buf, fileSize); } -std::unique_ptr -DownloadAndDecodeRemoteFileV2(std::shared_ptr space, - const std::string& file) { - auto fileSize = space->GetBlobByteSize(file); - if (!fileSize.ok()) { - PanicInfo(FileReadFailed, fileSize.status().ToString()); - } - auto buf = std::shared_ptr(new uint8_t[fileSize.value()]); - auto status = space->ReadBlob(file, buf.get()); - if (!status.ok()) { - PanicInfo(FileReadFailed, status.ToString()); - } - - return DeserializeFileData(buf, fileSize.value()); -} - std::pair EncodeAndUploadIndexSlice(ChunkManager* chunk_manager, uint8_t* buf, @@ -551,27 +535,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager, return std::make_pair(std::move(object_key), serialized_index_size); } -std::pair -EncodeAndUploadIndexSlice2(std::shared_ptr space, - uint8_t* buf, - int64_t batch_size, - IndexMeta index_meta, - FieldDataMeta field_meta, - std::string object_key) { - // todo: support nullable index - auto field_data = CreateFieldData(DataType::INT8, false); - field_data->FillFieldData(buf, batch_size); - auto indexData = std::make_shared(field_data); - indexData->set_index_meta(index_meta); - indexData->SetFieldDataMeta(field_meta); - auto serialized_index_data = indexData->serialize_to_remote_file(); - auto serialized_index_size = serialized_index_data.size(); - auto status = space->WriteBlob( - object_key, serialized_index_data.data(), serialized_index_size); - AssertInfo(status.ok(), "write to space error: {}", status.ToString()); - return std::make_pair(std::move(object_key), serialized_index_size); -} - std::pair EncodeAndUploadFieldSlice(ChunkManager* chunk_manager, void* buf, @@ -609,36 +572,6 @@ GetObjectData(ChunkManager* remote_chunk_manager, return futures; } -std::vector -GetObjectData(std::shared_ptr space, - const std::vector& remote_files) { - auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH); - std::vector>> futures; - for (auto& file : remote_files) { - futures.emplace_back( - pool.Submit(DownloadAndDecodeRemoteFileV2, space, file)); - } - - std::vector datas; - std::exception_ptr first_exception = nullptr; - for (auto& future : futures) { - try { - auto res = future.get(); - datas.emplace_back(res->GetFieldData()); - } catch (...) { - if (!first_exception) { - first_exception = std::current_exception(); - } - } - } - ReleaseArrowUnused(); - if (first_exception) { - std::rethrow_exception(first_exception); - } - - return datas; -} - std::map PutIndexData(ChunkManager* remote_chunk_manager, const std::vector& data_slices, @@ -687,54 +620,6 @@ PutIndexData(ChunkManager* remote_chunk_manager, return remote_paths_to_size; } -std::map -PutIndexData(std::shared_ptr space, - const std::vector& data_slices, - const std::vector& slice_sizes, - const std::vector& slice_names, - FieldDataMeta& field_meta, - IndexMeta& index_meta) { - auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE); - std::vector>> futures; - AssertInfo(data_slices.size() == slice_sizes.size(), - "inconsistent data slices size {} with slice sizes {}", - data_slices.size(), - slice_sizes.size()); - AssertInfo(data_slices.size() == slice_names.size(), - "inconsistent data slices size {} with slice names size {}", - data_slices.size(), - slice_names.size()); - - for (int64_t i = 0; i < data_slices.size(); ++i) { - futures.push_back(pool.Submit(EncodeAndUploadIndexSlice2, - space, - const_cast(data_slices[i]), - slice_sizes[i], - index_meta, - field_meta, - slice_names[i])); - } - - std::map remote_paths_to_size; - std::exception_ptr first_exception = nullptr; - for (auto& future : futures) { - try { - auto res = future.get(); - remote_paths_to_size[res.first] = res.second; - } catch (...) { - if (!first_exception) { - first_exception = std::current_exception(); - } - } - } - ReleaseArrowUnused(); - if (first_exception) { - std::rethrow_exception(first_exception); - } - - return remote_paths_to_size; -} - int64_t GetTotalNumRowsForFieldDatas(const std::vector& field_datas) { int64_t count = 0; diff --git a/internal/core/src/storage/Util.h b/internal/core/src/storage/Util.h index d92bb7d577..7d18c72220 100644 --- a/internal/core/src/storage/Util.h +++ b/internal/core/src/storage/Util.h @@ -31,7 +31,6 @@ #include "storage/ChunkManager.h" #include "storage/DataCodec.h" #include "storage/Types.h" -#include "storage/space.h" namespace milvus::storage { @@ -89,10 +88,6 @@ std::unique_ptr DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager, const std::string& file); -std::unique_ptr -DownloadAndDecodeRemoteFileV2(std::shared_ptr space, - const std::string& file); - std::pair EncodeAndUploadIndexSlice(ChunkManager* chunk_manager, uint8_t* buf, @@ -102,13 +97,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager, std::string object_key); std::pair -EncodeAndUploadIndexSlice2(std::shared_ptr space, - uint8_t* buf, - int64_t batch_size, - IndexMeta index_meta, - FieldDataMeta field_meta, - std::string object_key); -std::pair EncodeAndUploadFieldSlice(ChunkManager* chunk_manager, void* buf, int64_t element_count, @@ -120,10 +108,6 @@ std::vector>> GetObjectData(ChunkManager* remote_chunk_manager, const std::vector& remote_files); -std::vector -GetObjectData(std::shared_ptr space, - const std::vector& remote_files); - std::map PutIndexData(ChunkManager* remote_chunk_manager, const std::vector& data_slices, @@ -132,13 +116,6 @@ PutIndexData(ChunkManager* remote_chunk_manager, FieldDataMeta& field_meta, IndexMeta& index_meta); -std::map -PutIndexData(std::shared_ptr space, - const std::vector& data_slices, - const std::vector& slice_sizes, - const std::vector& slice_names, - FieldDataMeta& field_meta, - IndexMeta& index_meta); int64_t GetTotalNumRowsForFieldDatas(const std::vector& field_datas); diff --git a/internal/core/thirdparty/CMakeLists.txt b/internal/core/thirdparty/CMakeLists.txt index eb1806ac50..5fe44881ad 100644 --- a/internal/core/thirdparty/CMakeLists.txt +++ b/internal/core/thirdparty/CMakeLists.txt @@ -41,8 +41,6 @@ if (USE_OPENDAL) endif() add_subdirectory(tantivy) -add_subdirectory(milvus-storage) - if (LINUX) add_subdirectory(jemalloc) endif() diff --git a/internal/core/thirdparty/milvus-storage/CMakeLists.txt b/internal/core/thirdparty/milvus-storage/CMakeLists.txt deleted file mode 100644 index a67a7cae82..0000000000 --- a/internal/core/thirdparty/milvus-storage/CMakeLists.txt +++ /dev/null @@ -1,48 +0,0 @@ -#------------------------------------------------------------------------------- -# Copyright (C) 2019-2020 Zilliz. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under the License. -#------------------------------------------------------------------------------- - -set( MILVUS_STORAGE_VERSION 9d1ad9c) - -message(STATUS "Building milvus-storage-${MILVUS_STORAGE_VERSION} from source") -message(STATUS ${CMAKE_BUILD_TYPE}) - -# message(FATAL_ERROR ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch) -# set(milvus-storage-patch git apply --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch) -set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} ) -FetchContent_Declare( - milvus-storage - GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git" - GIT_TAG ${MILVUS_STORAGE_VERSION} - SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-src - BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-build - SOURCE_SUBDIR cpp - PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage_CMakeLists.txt /cpp/CMakeLists.txt - DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} ) - -FetchContent_MakeAvailable(milvus-storage) -# target_compile_features(milvus-storage PUBLIC cxx_std_20) - -# FetchContent_GetProperties( milvus-storage ) -# if ( NOT milvus-storage_POPULATED ) -# FetchContent_Populate( milvus-storage) - -# # Adding the following target: -# add_subdirectory( ${milvus-storage_SOURCE_DIR}/cpp -# ${milvus-storage_BINARY_DIR} ) -# endif() - -# message(FATAL_ERROR ${milvus-storage_SOURCE_DIR} ${milvus-storage_BINARY_DIR}) -# get prometheus COMPILE_OPTIONS -# get_property( var DIRECTORY "${milvus-storage_SOURCE_DIR}" PROPERTY COMPILE_OPTIONS ) -message( STATUS "milvus-storage src compile options: ${var}" ) -# unset(CMAKE_CXX_STANDARD) diff --git a/internal/core/thirdparty/milvus-storage/milvus-storage_CMakeLists.txt b/internal/core/thirdparty/milvus-storage/milvus-storage_CMakeLists.txt deleted file mode 100644 index 135765c99e..0000000000 --- a/internal/core/thirdparty/milvus-storage/milvus-storage_CMakeLists.txt +++ /dev/null @@ -1,34 +0,0 @@ -cmake_minimum_required(VERSION 3.20.0) - -project(milvus-storage VERSION 0.1.0) - -option(WITH_UT "Build the testing tree." ON) -option(WITH_ASAN "Build with address sanitizer." OFF) -option(USE_OPENDAL "Build with opendal." OFF) - -if (USE_OPENDAL) - add_compile_definitions(MILVUS_OPENDAL) -endif() - -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -find_package(Boost REQUIRED) -find_package(Arrow REQUIRED) -find_package(Protobuf REQUIRED) -find_package(glog REQUIRED) -find_package(AWSSDK REQUIRED) - -file(GLOB_RECURSE SRC_FILES src/*.cpp src/*.cc) -message(STATUS "SRC_FILES: ${SRC_FILES}") -add_library(milvus-storage ${SRC_FILES}) -target_include_directories(milvus-storage PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/milvus-storage ${CMAKE_CURRENT_SOURCE_DIR}/src) -target_link_libraries(milvus-storage PUBLIC arrow::arrow Boost::boost protobuf::protobuf AWS::aws-sdk-cpp-core glog::glog) -if (USE_OPENDAL) - target_link_libraries(milvus-storage PUBLIC opendal) -endif() - -if (WITH_UT) - enable_testing() - add_subdirectory(test) -endif() diff --git a/internal/core/unittest/test_disk_file_manager_test.cpp b/internal/core/unittest/test_disk_file_manager_test.cpp index 9f2251baa4..565e063b6d 100644 --- a/internal/core/unittest/test_disk_file_manager_test.cpp +++ b/internal/core/unittest/test_disk_file_manager_test.cpp @@ -36,9 +36,6 @@ #include "storage/InsertData.h" #include "storage/ThreadPool.h" #include "storage/Types.h" -#include "storage/options.h" -#include "storage/schema.h" -#include "storage/space.h" #include "storage/Util.h" #include "storage/DiskFileManagerImpl.h" #include "storage/LocalChunkManagerSingleton.h" @@ -285,62 +282,6 @@ PrepareInsertData(const int64_t opt_field_data_range) -> std::string { return path; } -auto -PrepareInsertDataSpace(const int64_t opt_field_data_range) - -> std::pair> { - std::string path = kOptFieldPath + "space/" + std::to_string(kOptFieldId); - arrow::FieldVector arrow_fields{ - arrow::field("pk", arrow::int64()), - arrow::field("ts", arrow::int64()), - arrow::field(kOptFieldName, arrow::int64()), - arrow::field("vec", arrow::fixed_size_binary(1))}; - auto arrow_schema = std::make_shared(arrow_fields); - milvus_storage::SchemaOptions schema_options = { - .primary_column = "pk", .version_column = "ts", .vector_column = "vec"}; - auto schema = - std::make_shared(arrow_schema, schema_options); - boost::filesystem::remove_all(path); - boost::filesystem::create_directories(path); - EXPECT_TRUE(schema->Validate().ok()); - auto opt_space = milvus_storage::Space::Open( - "file://" + boost::filesystem::canonical(path).string(), - milvus_storage::Options{schema}); - EXPECT_TRUE(opt_space.has_value()); - auto space = std::move(opt_space.value()); - const auto data = PrepareRawFieldData(opt_field_data_range); - arrow::Int64Builder pk_builder; - arrow::Int64Builder ts_builder; - arrow::NumericBuilder scalar_builder; - arrow::FixedSizeBinaryBuilder vec_builder(arrow::fixed_size_binary(1)); - const uint8_t kByteZero = 0; - for (size_t i = 0; i < kEntityCnt; ++i) { - EXPECT_TRUE(pk_builder.Append(i).ok()); - EXPECT_TRUE(ts_builder.Append(i).ok()); - EXPECT_TRUE(vec_builder.Append(&kByteZero).ok()); - } - for (size_t i = 0; i < kEntityCnt; ++i) { - EXPECT_TRUE(scalar_builder.Append(data[i]).ok()); - } - std::shared_ptr pk_array; - EXPECT_TRUE(pk_builder.Finish(&pk_array).ok()); - std::shared_ptr ts_array; - EXPECT_TRUE(ts_builder.Finish(&ts_array).ok()); - std::shared_ptr scalar_array; - EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok()); - std::shared_ptr vec_array; - EXPECT_TRUE(vec_builder.Finish(&vec_array).ok()); - auto batch = - arrow::RecordBatch::Make(arrow_schema, - kEntityCnt, - {pk_array, ts_array, scalar_array, vec_array}); - milvus_storage::WriteOption write_opt = {kEntityCnt}; - space->Write(*arrow::RecordBatchReader::Make({batch}, arrow_schema) - .ValueOrDie() - .get(), - write_opt); - return {path, std::move(space)}; -} - template auto PrepareOptionalField(const std::shared_ptr& file_manager, @@ -400,47 +341,24 @@ CheckOptFieldCorrectness( } } // namespace -TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskFieldEmpty) { - auto file_manager = CreateFileManager(cm_); - { - const auto& [insert_file_space_path, space] = - PrepareInsertDataSpace(kOptFieldDataRange); - OptFieldT opt_fields; - EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty()); - EXPECT_TRUE( - file_manager->CacheOptFieldToDisk(space, opt_fields).empty()); - } - - { - auto opt_fileds = - PrepareOptionalField(file_manager, ""); - auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds); - EXPECT_TRUE(res.empty()); - } -} - TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) { auto file_manager = CreateFileManager(cm_); const auto insert_file_path = PrepareInsertData(kOptFieldDataRange); - const auto& [insert_file_space_path, space] = - PrepareInsertDataSpace(kOptFieldDataRange); OptFieldT opt_fields = PrepareOptionalField(file_manager, insert_file_path); opt_fields[kOptFieldId + 1] = { - kOptFieldName + "second", DataType::INT64, {insert_file_space_path}}; + kOptFieldName + "second", DataType::INT64, {insert_file_path}}; EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError); - EXPECT_THROW(file_manager->CacheOptFieldToDisk(space, opt_fields), - SegcoreError); } TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) { auto file_manager = CreateFileManager(cm_); - const auto& [insert_file_path, space] = - PrepareInsertDataSpace(kOptFieldDataRange); + const auto insert_file_path = + PrepareInsertData(kOptFieldDataRange); auto opt_fileds = PrepareOptionalField(file_manager, insert_file_path); - auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds); + auto res = file_manager->CacheOptFieldToDisk(opt_fileds); ASSERT_FALSE(res.empty()); CheckOptFieldCorrectness(res); } @@ -477,12 +395,4 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) { auto res = file_manager->CacheOptFieldToDisk(opt_fileds); ASSERT_TRUE(res.empty()); } - - { - const auto& [insert_file_path, space] = PrepareInsertDataSpace(1); - auto opt_fileds = PrepareOptionalField( - file_manager, insert_file_path); - auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds); - ASSERT_TRUE(res.empty()); - } -} \ No newline at end of file +} diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 9d4afc53ae..8631c67778 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -32,7 +32,6 @@ #include "index/IndexFactory.h" #include "common/QueryResult.h" #include "segcore/Types.h" -#include "storage/options.h" #include "test_utils/indexbuilder_test_utils.h" #include "test_utils/storage_test_utils.h" #include "test_utils/DataGen.h" @@ -916,261 +915,4 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) { SearchResult result; EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result)); } -#endif - -//class IndexTestV2 -// : public ::testing::TestWithParam> { -// protected: -// std::shared_ptr -// TestSchema(int vec_size) { -// arrow::FieldVector fields; -// fields.push_back(arrow::field("pk", arrow::int64())); -// fields.push_back(arrow::field("ts", arrow::int64())); -// fields.push_back( -// arrow::field("vec", arrow::fixed_size_binary(vec_size))); -// return std::make_shared(fields); -// } -// -// std::shared_ptr -// TestRecords(int vec_size, GeneratedData& dataset) { -// arrow::Int64Builder pk_builder; -// arrow::Int64Builder ts_builder; -// arrow::FixedSizeBinaryBuilder vec_builder( -// arrow::fixed_size_binary(vec_size)); -// if (!is_binary) { -// xb_data = dataset.get_col(milvus::FieldId(100)); -// auto data = reinterpret_cast(xb_data.data()); -// for (auto i = 0; i < NB; ++i) { -// EXPECT_TRUE(pk_builder.Append(i).ok()); -// EXPECT_TRUE(ts_builder.Append(i).ok()); -// EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok()); -// } -// } else { -// xb_bin_data = dataset.get_col(milvus::FieldId(100)); -// for (auto i = 0; i < NB; ++i) { -// EXPECT_TRUE(pk_builder.Append(i).ok()); -// EXPECT_TRUE(ts_builder.Append(i).ok()); -// EXPECT_TRUE( -// vec_builder.Append(xb_bin_data.data() + i * vec_size).ok()); -// } -// } -// std::shared_ptr pk_array; -// EXPECT_TRUE(pk_builder.Finish(&pk_array).ok()); -// std::shared_ptr ts_array; -// EXPECT_TRUE(ts_builder.Finish(&ts_array).ok()); -// std::shared_ptr vec_array; -// EXPECT_TRUE(vec_builder.Finish(&vec_array).ok()); -// auto schema = TestSchema(vec_size); -// auto rec_batch = arrow::RecordBatch::Make( -// schema, NB, {pk_array, ts_array, vec_array}); -// auto reader = -// arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie(); -// return reader; -// } -// -// std::shared_ptr -// TestSpace(int vec_size, GeneratedData& dataset) { -// auto arrow_schema = TestSchema(vec_size); -// auto schema_options = std::make_shared(); -// schema_options->primary_column = "pk"; -// schema_options->version_column = "ts"; -// schema_options->vector_column = "vec"; -// auto schema = std::make_shared(arrow_schema, -// schema_options); -// EXPECT_TRUE(schema->Validate().ok()); -// -// auto space_res = milvus_storage::Space::Open( -// "file://" + boost::filesystem::canonical(temp_path).string(), -// milvus_storage::Options{schema}); -// EXPECT_TRUE(space_res.has_value()); -// -// auto space = std::move(space_res.value()); -// auto rec = TestRecords(vec_size, dataset); -// auto write_opt = milvus_storage::WriteOption{NB}; -// space->Write(rec.get(), &write_opt); -// return std::move(space); -// } -// -// void -// SetUp() override { -// temp_path = boost::filesystem::temp_directory_path() / -// boost::filesystem::unique_path(); -// boost::filesystem::create_directory(temp_path); -// storage_config_ = get_default_local_storage_config(); -// -// auto param = GetParam(); -// index_type = std::get<0>(param).first; -// metric_type = std::get<0>(param).second; -// file_slice_size = std::get<1>(param); -// enable_mmap = index_type != knowhere::IndexEnum::INDEX_DISKANN && -// std::get<2>(param); -// if (enable_mmap) { -// mmap_file_path = boost::filesystem::temp_directory_path() / -// boost::filesystem::unique_path(); -// } -// NB = 3000; -// -// // try to reduce the test time, -// // but the large dataset is needed for the case below. -// auto test_name = std::string( -// testing::UnitTest::GetInstance()->current_test_info()->name()); -// if (test_name == "Mmap" && -// index_type == knowhere::IndexEnum::INDEX_HNSW) { -// NB = 270000; -// } -// build_conf = generate_build_conf(index_type, metric_type); -// load_conf = generate_load_conf(index_type, metric_type, NB); -// search_conf = generate_search_conf(index_type, metric_type); -// range_search_conf = generate_range_search_conf(index_type, metric_type); -// -// std::map is_binary_map = { -// {knowhere::IndexEnum::INDEX_FAISS_IDMAP, false}, -// {knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false}, -// {knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, false}, -// {knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, false}, -// {knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true}, -// {knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, true}, -// {knowhere::IndexEnum::INDEX_HNSW, false}, -// {knowhere::IndexEnum::INDEX_DISKANN, false}, -// }; -// -// is_binary = is_binary_map[index_type]; -// int vec_size; -// if (is_binary) { -// vec_size = DIM / 8; -// vec_field_data_type = milvus::DataType::VECTOR_BINARY; -// } else { -// vec_size = DIM * 4; -// vec_field_data_type = milvus::DataType::VECTOR_FLOAT; -// } -// -// auto dataset = GenDataset(NB, metric_type, is_binary); -// space = TestSpace(vec_size, dataset); -// -// if (!is_binary) { -// xb_data = dataset.get_col(milvus::FieldId(100)); -// xq_dataset = knowhere::GenDataSet( -// NQ, DIM, xb_data.data() + DIM * query_offset); -// } else { -// xb_bin_data = dataset.get_col(milvus::FieldId(100)); -// xq_dataset = knowhere::GenDataSet( -// NQ, DIM, xb_bin_data.data() + DIM * query_offset); -// } -// } -// -// void -// TearDown() override { -// boost::filesystem::remove_all(temp_path); -// if (enable_mmap) { -// boost::filesystem::remove_all(mmap_file_path); -// } -// } -// -// protected: -// std::string index_type, metric_type; -// bool is_binary; -// milvus::Config build_conf; -// milvus::Config load_conf; -// milvus::Config search_conf; -// milvus::Config range_search_conf; -// milvus::DataType vec_field_data_type; -// knowhere::DataSetPtr xb_dataset; -// FixedVector xb_data; -// FixedVector xb_bin_data; -// knowhere::DataSetPtr xq_dataset; -// int64_t query_offset = 100; -// int64_t NB = 3000; -// StorageConfig storage_config_; -// -// boost::filesystem::path temp_path; -// std::shared_ptr space; -// int64_t file_slice_size = DEFAULT_INDEX_FILE_SLICE_SIZE; -// bool enable_mmap; -// boost::filesystem::path mmap_file_path; -//}; -// -//INSTANTIATE_TEST_SUITE_P( -// IndexTypeParameters, -// IndexTestV2, -// testing::Combine( -// ::testing::Values( -// std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, -// knowhere::metric::L2), -// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, -// knowhere::metric::L2), -// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, -// knowhere::metric::L2), -// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, -// knowhere::metric::L2), -// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, -// knowhere::metric::JACCARD), -// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, -// knowhere::metric::JACCARD), -//#ifdef BUILD_DISK_ANN -// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2), -//#endif -// std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2)), -// testing::Values(DEFAULT_INDEX_FILE_SLICE_SIZE, 5000L), -// testing::Bool())); -// -//TEST_P(IndexTestV2, BuildAndQuery) { -// FILE_SLICE_SIZE = file_slice_size; -// milvus::index::CreateIndexInfo create_index_info; -// create_index_info.index_type = index_type; -// create_index_info.metric_type = metric_type; -// create_index_info.field_type = vec_field_data_type; -// create_index_info.field_name = "vec"; -// create_index_info.dim = DIM; -// create_index_info.index_engine_version = -// knowhere::Version::GetCurrentVersion().VersionNumber(); -// index::IndexBasePtr index; -// -// milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100}; -// milvus::storage::IndexMeta index_meta{.segment_id = 3, -// .field_id = 100, -// .build_id = 1000, -// .index_version = 1, -// .field_name = "vec", -// .field_type = vec_field_data_type, -// .dim = DIM}; -// auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_); -// milvus::storage::FileManagerContext file_manager_context( -// field_data_meta, index_meta, chunk_manager, space); -// index = milvus::index::IndexFactory::GetInstance().CreateIndex( -// create_index_info, file_manager_context, space); -// -// auto build_conf = generate_build_conf(index_type, metric_type); -// index->BuildV2(build_conf); -// milvus::index::IndexBasePtr new_index; -// milvus::index::VectorIndex* vec_index = nullptr; -// -// auto binary_set = index->UploadV2(); -// index.reset(); -// -// new_index = milvus::index::IndexFactory::GetInstance().CreateIndex( -// create_index_info, file_manager_context, space); -// vec_index = dynamic_cast(new_index.get()); -// -// load_conf = generate_load_conf(index_type, metric_type, 0); -// if (enable_mmap) { -// load_conf[kMmapFilepath] = mmap_file_path.string(); -// } -// ASSERT_NO_THROW(vec_index->LoadV2(load_conf)); -// EXPECT_EQ(vec_index->Count(), NB); -// EXPECT_EQ(vec_index->GetDim(), DIM); -// -// milvus::SearchInfo search_info; -// search_info.topk_ = K; -// search_info.metric_type_ = metric_type; -// search_info.search_params_ = search_conf; -// auto result = vec_index->Query(xq_dataset, search_info, nullptr); -// EXPECT_EQ(result->total_nq_, NQ); -// EXPECT_EQ(result->unity_topK_, K); -// EXPECT_EQ(result->distances_.size(), NQ * K); -// EXPECT_EQ(result->seg_offsets_.size(), NQ * K); -// if (!is_binary) { -// EXPECT_EQ(result->seg_offsets_[0], query_offset); -// } -// search_info.search_params_ = range_search_conf; -// vec_index->Query(xq_dataset, search_info, nullptr); -//} +#endif \ No newline at end of file diff --git a/internal/core/unittest/test_scalar_index.cpp b/internal/core/unittest/test_scalar_index.cpp index 2d3e6bb213..d620628541 100644 --- a/internal/core/unittest/test_scalar_index.cpp +++ b/internal/core/unittest/test_scalar_index.cpp @@ -301,31 +301,6 @@ TestRecords(int vec_size, GeneratedData& dataset, std::vector& scalars) { return reader; } -template -std::shared_ptr -TestSpace(boost::filesystem::path& temp_path, - int vec_size, - GeneratedData& dataset, - std::vector& scalars) { - auto arrow_schema = TestSchema(vec_size); - milvus_storage::SchemaOptions schema_options{ - .primary_column = "pk", .version_column = "ts", .vector_column = "vec"}; - auto schema = - std::make_shared(arrow_schema, schema_options); - EXPECT_TRUE(schema->Validate().ok()); - - auto space_res = milvus_storage::Space::Open( - "file://" + boost::filesystem::canonical(temp_path).string(), - milvus_storage::Options{schema}); - EXPECT_TRUE(space_res.has_value()); - - auto space = std::move(space_res.value()); - auto rec = TestRecords(vec_size, dataset, scalars); - auto write_opt = milvus_storage::WriteOption{nb}; - space->Write(*rec, write_opt); - return std::move(space); -} - template <> struct TypedScalarIndexTestV2::Helper { using C = arrow::Int8Type; diff --git a/internal/core/unittest/test_string_index.cpp b/internal/core/unittest/test_string_index.cpp index bd006a5caf..c9d91481cb 100644 --- a/internal/core/unittest/test_string_index.cpp +++ b/internal/core/unittest/test_string_index.cpp @@ -349,116 +349,5 @@ TEST_F(StringIndexMarisaTest, BaseIndexCodec) { } } } - -using milvus::segcore::GeneratedData; -class StringIndexMarisaTestV2 : public StringIndexBaseTest { - std::shared_ptr - TestSchema(int vec_size) { - arrow::FieldVector fields; - fields.push_back(arrow::field("pk", arrow::int64())); - fields.push_back(arrow::field("ts", arrow::int64())); - fields.push_back(arrow::field("scalar", arrow::utf8())); - fields.push_back( - arrow::field("vec", arrow::fixed_size_binary(vec_size))); - return std::make_shared(fields); - } - - std::shared_ptr - TestRecords(int vec_size, - GeneratedData& dataset, - std::vector& scalars) { - arrow::Int64Builder pk_builder; - arrow::Int64Builder ts_builder; - arrow::StringBuilder scalar_builder; - arrow::FixedSizeBinaryBuilder vec_builder( - arrow::fixed_size_binary(vec_size)); - auto xb_data = dataset.get_col(milvus::FieldId(100)); - auto data = reinterpret_cast(xb_data.data()); - for (auto i = 0; i < nb; ++i) { - EXPECT_TRUE(pk_builder.Append(i).ok()); - EXPECT_TRUE(ts_builder.Append(i).ok()); - EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok()); - } - for (auto& v : scalars) { - EXPECT_TRUE(scalar_builder.Append(v).ok()); - } - std::shared_ptr pk_array; - EXPECT_TRUE(pk_builder.Finish(&pk_array).ok()); - std::shared_ptr ts_array; - EXPECT_TRUE(ts_builder.Finish(&ts_array).ok()); - std::shared_ptr scalar_array; - EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok()); - std::shared_ptr vec_array; - EXPECT_TRUE(vec_builder.Finish(&vec_array).ok()); - auto schema = TestSchema(vec_size); - auto rec_batch = arrow::RecordBatch::Make( - schema, nb, {pk_array, ts_array, scalar_array, vec_array}); - auto reader = - arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie(); - return reader; - } - - std::shared_ptr - TestSpace(int vec_size, - GeneratedData& dataset, - std::vector& scalars) { - auto arrow_schema = TestSchema(vec_size); - milvus_storage::SchemaOptions schema_options{.primary_column = "pk", - .version_column = "ts", - .vector_column = "vec"}; - auto schema = std::make_shared(arrow_schema, - schema_options); - EXPECT_TRUE(schema->Validate().ok()); - - auto space_res = milvus_storage::Space::Open( - "file://" + boost::filesystem::canonical(temp_path).string(), - milvus_storage::Options{schema}); - EXPECT_TRUE(space_res.has_value()); - - auto space = std::move(space_res.value()); - auto rec = TestRecords(vec_size, dataset, scalars); - auto write_opt = milvus_storage::WriteOption{nb}; - space->Write(*rec, write_opt); - return std::move(space); - } - void - SetUp() override { - StringIndexBaseTest::SetUp(); - temp_path = boost::filesystem::temp_directory_path() / - boost::filesystem::unique_path(); - boost::filesystem::create_directory(temp_path); - - auto vec_size = DIM * 4; - auto vec_field_data_type = milvus::DataType::VECTOR_FLOAT; - auto dataset = ::GenDataset(nb, knowhere::metric::L2, false); - - space = TestSpace(vec_size, dataset, strs); - } - void - TearDown() override { - boost::filesystem::remove_all(temp_path); - } - - protected: - boost::filesystem::path temp_path; - std::shared_ptr space; -}; - -TEST_F(StringIndexMarisaTestV2, Base) { - auto storage_config = get_default_local_storage_config(); - auto chunk_manager = milvus::storage::CreateChunkManager(storage_config); - milvus::storage::FileManagerContext file_manager_context( - {}, {.field_name = "scalar"}, chunk_manager, space); - auto index = - milvus::index::CreateStringIndexMarisa(file_manager_context, space); - index->BuildV2(); - index->UploadV2(); - - auto new_index = - milvus::index::CreateStringIndexMarisa(file_manager_context, space); - new_index->LoadV2(); - ASSERT_EQ(strs.size(), index->Count()); -} - } // namespace index } // namespace milvus diff --git a/internal/datacoord/services.go b/internal/datacoord/services.go index 513a9fdeea..b5f15505d7 100644 --- a/internal/datacoord/services.go +++ b/internal/datacoord/services.go @@ -543,10 +543,6 @@ func (s *Server) SaveBinlogPaths(ctx context.Context, req *datapb.SaveBinlogPath UpdateCheckPointOperator(req.GetSegmentID(), req.GetCheckPoints()), ) - if Params.CommonCfg.EnableStorageV2.GetAsBool() { - operators = append(operators, UpdateStorageVersionOperator(req.GetSegmentID(), req.GetStorageVersion())) - } - // Update segment info in memory and meta. if err := s.meta.UpdateSegmentsInfo(operators...); err != nil { log.Error("save binlog and checkpoints failed", zap.Error(err)) @@ -882,18 +878,6 @@ func (s *Server) GetRecoveryInfoV2(ctx context.Context, req *datapb.GetRecoveryI continue } - if Params.CommonCfg.EnableStorageV2.GetAsBool() { - segmentInfos = append(segmentInfos, &datapb.SegmentInfo{ - ID: segment.ID, - PartitionID: segment.PartitionID, - CollectionID: segment.CollectionID, - InsertChannel: segment.InsertChannel, - NumOfRows: segment.NumOfRows, - Level: segment.GetLevel(), - }) - continue - } - binlogs := segment.GetBinlogs() if len(binlogs) == 0 && segment.GetLevel() != datapb.SegmentLevel_L0 { continue diff --git a/internal/datacoord/task_index.go b/internal/datacoord/task_index.go index bf3b87b80f..ab23f656a1 100644 --- a/internal/datacoord/task_index.go +++ b/internal/datacoord/task_index.go @@ -25,10 +25,8 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/indexpb" - "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/types" - itypeutil "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/indexparams" @@ -201,68 +199,27 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule } } - if Params.CommonCfg.EnableStorageV2.GetAsBool() { - storePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segment.GetID()) - if err != nil { - log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err)) - it.SetState(indexpb.JobState_JobStateInit, err.Error()) - return true - } - indexStorePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue()+"/index", segment.GetID()) - if err != nil { - log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err)) - it.SetState(indexpb.JobState_JobStateInit, err.Error()) - return true - } - - it.req = &indexpb.CreateJobRequest{ - ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(), - IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath), - BuildID: it.taskID, - IndexVersion: segIndex.IndexVersion + 1, - StorageConfig: storageConfig, - IndexParams: indexParams, - TypeParams: typeParams, - NumRows: segIndex.NumRows, - CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(), - CollectionID: segment.GetCollectionID(), - PartitionID: segment.GetPartitionID(), - SegmentID: segment.GetID(), - FieldID: fieldID, - FieldName: field.GetName(), - FieldType: field.GetDataType(), - StorePath: storePath, - StoreVersion: segment.GetStorageVersion(), - IndexStorePath: indexStorePath, - Dim: int64(dim), - DataIds: binlogIDs, - OptionalScalarFields: optionalFields, - Field: field, - PartitionKeyIsolation: partitionKeyIsolation, - } - } else { - it.req = &indexpb.CreateJobRequest{ - ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(), - IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath), - BuildID: it.taskID, - IndexVersion: segIndex.IndexVersion + 1, - StorageConfig: storageConfig, - IndexParams: indexParams, - TypeParams: typeParams, - NumRows: segIndex.NumRows, - CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(), - CollectionID: segment.GetCollectionID(), - PartitionID: segment.GetPartitionID(), - SegmentID: segment.GetID(), - FieldID: fieldID, - FieldName: field.GetName(), - FieldType: field.GetDataType(), - Dim: int64(dim), - DataIds: binlogIDs, - OptionalScalarFields: optionalFields, - Field: field, - PartitionKeyIsolation: partitionKeyIsolation, - } + it.req = &indexpb.CreateJobRequest{ + ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(), + IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath), + BuildID: it.taskID, + IndexVersion: segIndex.IndexVersion + 1, + StorageConfig: storageConfig, + IndexParams: indexParams, + TypeParams: typeParams, + NumRows: segIndex.NumRows, + CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(), + CollectionID: segment.GetCollectionID(), + PartitionID: segment.GetPartitionID(), + SegmentID: segment.GetID(), + FieldID: fieldID, + FieldName: field.GetName(), + FieldType: field.GetDataType(), + Dim: int64(dim), + DataIds: binlogIDs, + OptionalScalarFields: optionalFields, + Field: field, + PartitionKeyIsolation: partitionKeyIsolation, } log.Ctx(ctx).Info("index task pre check successfully", zap.Int64("taskID", it.GetTaskID())) diff --git a/internal/datacoord/task_scheduler_test.go b/internal/datacoord/task_scheduler_test.go index 8281023efd..dd6994e904 100644 --- a/internal/datacoord/task_scheduler_test.go +++ b/internal/datacoord/task_scheduler_test.go @@ -911,15 +911,6 @@ func (s *taskSchedulerSuite) Test_scheduler() { defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false") s.scheduler(handler) }) - - s.Run("test scheduler with indexBuilderV2", func() { - paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true") - defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false") - paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true") - defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false") - - s.scheduler(handler) - }) } func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() { @@ -1289,26 +1280,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() { paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("True") defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("False") - err := Params.Save("common.storage.scheme", "fake") - defer Params.Reset("common.storage.scheme") - Params.CommonCfg.EnableStorageV2.SwapTempValue("True") - defer Params.CommonCfg.EnableStorageV2.SwapTempValue("False") scheduler.Start() // get collection info failed --> init handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once() - // partition key field is nil, get collection info failed --> init - handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ - ID: collID, - Schema: &schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - {FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}}, - }, - }, - }, nil).Once() - handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once() - // get collection info success, get dim failed --> init handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ ID: collID, @@ -1318,38 +1294,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() { {FieldID: s.fieldID, Name: "vec"}, }, }, - }, nil).Twice() - - // peek client success, update version success, get collection info success, get dim success, get storage uri failed --> init - s.NoError(err) - handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) { - return &collectionInfo{ - ID: collID, - Schema: &schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - {FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64}, - {FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}}, - }, - }, - }, nil - }).Twice() - s.NoError(err) + }, nil).Once() // assign failed --> retry workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once() catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once() - handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) { - Params.Reset("common.storage.scheme") - return &collectionInfo{ - ID: collID, - Schema: &schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - {FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64}, - {FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}}, - }, - }, - }, nil - }).Once() in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once() // retry --> init diff --git a/internal/datanode/compaction/clustering_compactor_test.go b/internal/datanode/compaction/clustering_compactor_test.go index b98e97192e..98ee467340 100644 --- a/internal/datanode/compaction/clustering_compactor_test.go +++ b/internal/datanode/compaction/clustering_compactor_test.go @@ -30,12 +30,12 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus-storage/go/common/log" "github.com/milvus-io/milvus/internal/datanode/allocator" "github.com/milvus-io/milvus/internal/datanode/io" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/tsoutil" diff --git a/internal/datanode/compaction/load_stats.go b/internal/datanode/compaction/load_stats.go index a762a60135..9961ba2c17 100644 --- a/internal/datanode/compaction/load_stats.go +++ b/internal/datanode/compaction/load_stats.go @@ -24,8 +24,6 @@ import ( "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/flushcommon/metacache" - "github.com/milvus-io/milvus/internal/flushcommon/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/log" @@ -113,54 +111,3 @@ func LoadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *s log.Info("Successfully load pk stats", zap.Any("time", time.Since(startTs)), zap.Uint("size", size)) return result, nil } - -func LoadStatsV2(storageCache *metacache.StorageV2Cache, segment *datapb.SegmentInfo, schema *schemapb.CollectionSchema) ([]*storage.PkStatistics, error) { - space, err := storageCache.GetOrCreateSpace(segment.ID, syncmgr.SpaceCreatorFunc(segment.ID, schema, storageCache.ArrowSchema())) - if err != nil { - return nil, err - } - - getResult := func(stats []*storage.PrimaryKeyStats) []*storage.PkStatistics { - result := make([]*storage.PkStatistics, 0, len(stats)) - for _, stat := range stats { - pkStat := &storage.PkStatistics{ - PkFilter: stat.BF, - MinPK: stat.MinPk, - MaxPK: stat.MaxPk, - } - result = append(result, pkStat) - } - return result - } - - blobs := space.StatisticsBlobs() - deserBlobs := make([]*storage.Blob, 0) - for _, b := range blobs { - if b.Name == storage.CompoundStatsType.LogIdx() { - blobData := make([]byte, b.Size) - _, err = space.ReadBlob(b.Name, blobData) - if err != nil { - return nil, err - } - stats, err := storage.DeserializeStatsList(&storage.Blob{Value: blobData}) - if err != nil { - return nil, err - } - return getResult(stats), nil - } - } - - for _, b := range blobs { - blobData := make([]byte, b.Size) - _, err = space.ReadBlob(b.Name, blobData) - if err != nil { - return nil, err - } - deserBlobs = append(deserBlobs, &storage.Blob{Value: blobData}) - } - stats, err := storage.DeserializeStats(deserBlobs) - if err != nil { - return nil, err - } - return getResult(stats), nil -} diff --git a/internal/datanode/importv2/util.go b/internal/datanode/importv2/util.go index c2679e3037..d88305d856 100644 --- a/internal/datanode/importv2/util.go +++ b/internal/datanode/importv2/util.go @@ -32,7 +32,6 @@ import ( "github.com/milvus-io/milvus/internal/flushcommon/metacache" "github.com/milvus-io/milvus/internal/flushcommon/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" @@ -52,10 +51,6 @@ func NewSyncTask(ctx context.Context, insertData *storage.InsertData, deleteData *storage.DeleteData, ) (syncmgr.Task, error) { - if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { - return nil, merr.WrapErrImportFailed("storage v2 is not supported") // TODO: dyh, resolve storage v2 - } - metaCache := metaCaches[vchannel] if _, ok := metaCache.GetSegmentByID(segmentID); !ok { metaCache.AddSegment(&datapb.SegmentInfo{ diff --git a/internal/flushcommon/metacache/storagev2_cache.go b/internal/flushcommon/metacache/storagev2_cache.go deleted file mode 100644 index 1e11fe5f60..0000000000 --- a/internal/flushcommon/metacache/storagev2_cache.go +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package metacache - -import ( - "sync" - - "github.com/apache/arrow/go/v12/arrow" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus/internal/util/typeutil" -) - -type StorageV2Cache struct { - arrowSchema *arrow.Schema - spaceMu sync.Mutex - spaces map[int64]*milvus_storage.Space -} - -func (s *StorageV2Cache) ArrowSchema() *arrow.Schema { - return s.arrowSchema -} - -func (s *StorageV2Cache) GetOrCreateSpace(segmentID int64, creator func() (*milvus_storage.Space, error)) (*milvus_storage.Space, error) { - s.spaceMu.Lock() - defer s.spaceMu.Unlock() - space, ok := s.spaces[segmentID] - if ok { - return space, nil - } - space, err := creator() - if err != nil { - return nil, err - } - s.spaces[segmentID] = space - return space, nil -} - -// only for unit test -func (s *StorageV2Cache) SetSpace(segmentID int64, space *milvus_storage.Space) { - s.spaceMu.Lock() - defer s.spaceMu.Unlock() - s.spaces[segmentID] = space -} - -func NewStorageV2Cache(schema *schemapb.CollectionSchema) (*StorageV2Cache, error) { - arrowSchema, err := typeutil.ConvertToArrowSchema(schema.Fields) - if err != nil { - return nil, err - } - return &StorageV2Cache{ - arrowSchema: arrowSchema, - spaces: make(map[int64]*milvus_storage.Space), - }, nil -} diff --git a/internal/flushcommon/pipeline/data_sync_service.go b/internal/flushcommon/pipeline/data_sync_service.go index 0e12ad64ce..ba60584445 100644 --- a/internal/flushcommon/pipeline/data_sync_service.go +++ b/internal/flushcommon/pipeline/data_sync_service.go @@ -30,7 +30,6 @@ import ( "github.com/milvus-io/milvus/internal/flushcommon/syncmgr" "github.com/milvus-io/milvus/internal/flushcommon/writebuffer" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/flowgraph" "github.com/milvus-io/milvus/pkg/log" @@ -129,12 +128,12 @@ func (dsService *DataSyncService) GetMetaCache() metacache.MetaCache { return dsService.metacache } -func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo, storageV2Cache *metacache.StorageV2Cache) (metacache.MetaCache, error) { +func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) { tickler.SetTotal(int32(len(unflushed) + len(flushed))) - return initMetaCache(initCtx, storageV2Cache, params.ChunkManager, info, tickler, unflushed, flushed) + return initMetaCache(initCtx, params.ChunkManager, info, tickler, unflushed, flushed) } -func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2Cache, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) { +func initMetaCache(initCtx context.Context, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) { // tickler will update addSegment progress to watchInfo futures := make([]*conc.Future[any], 0, len(unflushed)+len(flushed)) segmentPks := typeutil.NewConcurrentMap[int64, []*storage.PkStatistics]() @@ -152,11 +151,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C future := io.GetOrCreateStatsPool().Submit(func() (any, error) { var stats []*storage.PkStatistics var err error - if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { - stats, err = compaction.LoadStatsV2(storageV2Cache, segment, info.GetSchema()) - } else { - stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs()) - } + stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs()) if err != nil { return nil, err } @@ -190,7 +185,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C return metacache, nil } -func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) { +func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) { var ( channelName = info.GetVchan().GetChannelName() collectionID = info.GetVchan().GetCollectionID() @@ -204,7 +199,7 @@ func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, serverID: params.Session.ServerID, } - err := params.WriteBufferManager.Register(channelName, metacache, storageV2Cache, + err := params.WriteBufferManager.Register(channelName, metacache, writebuffer.WithMetaWriter(syncmgr.BrokerMetaWriter(params.Broker, config.serverID)), writebuffer.WithIDAllocator(params.Allocator)) if err != nil { @@ -287,21 +282,13 @@ func NewDataSyncService(initCtx context.Context, pipelineParams *util.PipelinePa return nil, err } - var storageCache *metacache.StorageV2Cache - if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { - storageCache, err = metacache.NewStorageV2Cache(info.Schema) - if err != nil { - return nil, err - } - } - // init metaCache meta - metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos, storageCache) + metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos) if err != nil { return nil, err } - return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, storageCache, unflushedSegmentInfos, flushedSegmentInfos) + return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, unflushedSegmentInfos, flushedSegmentInfos) } func NewDataSyncServiceWithMetaCache(metaCache metacache.MetaCache) *DataSyncService { diff --git a/internal/flushcommon/pipeline/data_sync_service_test.go b/internal/flushcommon/pipeline/data_sync_service_test.go index 9a5f3c2b1c..357fbbcf4a 100644 --- a/internal/flushcommon/pipeline/data_sync_service_test.go +++ b/internal/flushcommon/pipeline/data_sync_service_test.go @@ -289,7 +289,7 @@ func TestGetChannelWithTickler(t *testing.T) { }, } - metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed, nil) + metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed) assert.NoError(t, err) assert.NotNil(t, metaCache) assert.Equal(t, int64(1), metaCache.Collection()) diff --git a/internal/flushcommon/syncmgr/meta_writer.go b/internal/flushcommon/syncmgr/meta_writer.go index 97933988dd..19853f329d 100644 --- a/internal/flushcommon/syncmgr/meta_writer.go +++ b/internal/flushcommon/syncmgr/meta_writer.go @@ -20,7 +20,6 @@ import ( // MetaWriter is the interface for SyncManager to write segment sync meta. type MetaWriter interface { UpdateSync(context.Context, *SyncTask) error - UpdateSyncV2(*SyncTaskV2) error DropChannel(context.Context, string) error } @@ -138,82 +137,6 @@ func (b *brokerMetaWriter) UpdateSync(ctx context.Context, pack *SyncTask) error return nil } -func (b *brokerMetaWriter) UpdateSyncV2(pack *SyncTaskV2) error { - checkPoints := []*datapb.CheckPoint{} - - // only current segment checkpoint info, - segment, ok := pack.metacache.GetSegmentByID(pack.segmentID) - if !ok { - return merr.WrapErrSegmentNotFound(pack.segmentID) - } - checkPoints = append(checkPoints, &datapb.CheckPoint{ - SegmentID: pack.segmentID, - NumOfRows: segment.FlushedRows() + pack.batchSize, - Position: pack.checkpoint, - }) - - startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing), - metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { - return &datapb.SegmentStartPosition{ - SegmentID: info.SegmentID(), - StartPosition: info.StartPosition(), - } - }) - log.Info("SaveBinlogPath", - zap.Int64("SegmentID", pack.segmentID), - zap.Int64("CollectionID", pack.collectionID), - zap.Any("startPos", startPos), - zap.Any("checkPoints", checkPoints), - zap.String("vChannelName", pack.channelName), - ) - - req := &datapb.SaveBinlogPathsRequest{ - Base: commonpbutil.NewMsgBase( - commonpbutil.WithSourceID(b.serverID), - ), - SegmentID: pack.segmentID, - CollectionID: pack.collectionID, - - CheckPoints: checkPoints, - StorageVersion: pack.storageVersion, - - StartPositions: startPos, - Flushed: pack.isFlush, - Dropped: pack.isDrop, - Channel: pack.channelName, - } - err := retry.Do(context.Background(), func() error { - err := b.broker.SaveBinlogPaths(context.Background(), req) - // Segment not found during stale segment flush. Segment might get compacted already. - // Stop retry and still proceed to the end, ignoring this error. - if !pack.isFlush && errors.Is(err, merr.ErrSegmentNotFound) { - log.Warn("stale segment not found, could be compacted", - zap.Int64("segmentID", pack.segmentID)) - log.Warn("failed to SaveBinlogPaths", - zap.Int64("segmentID", pack.segmentID), - zap.Error(err)) - return nil - } - // meta error, datanode handles a virtual channel does not belong here - if errors.IsAny(err, merr.ErrSegmentNotFound, merr.ErrChannelNotFound) { - log.Warn("meta error found, skip sync and start to drop virtual channel", zap.String("channel", pack.channelName)) - return nil - } - - if err != nil { - return err - } - - return nil - }, b.opts...) - if err != nil { - log.Warn("failed to SaveBinlogPaths", - zap.Int64("segmentID", pack.segmentID), - zap.Error(err)) - } - return err -} - func (b *brokerMetaWriter) DropChannel(ctx context.Context, channelName string) error { err := retry.Handle(ctx, func() (bool, error) { status, err := b.broker.DropVirtualChannel(context.Background(), &datapb.DropVirtualChannelRequest{ diff --git a/internal/flushcommon/syncmgr/meta_writer_test.go b/internal/flushcommon/syncmgr/meta_writer_test.go index 6266d13cee..479b9fb375 100644 --- a/internal/flushcommon/syncmgr/meta_writer_test.go +++ b/internal/flushcommon/syncmgr/meta_writer_test.go @@ -67,34 +67,6 @@ func (s *MetaWriterSuite) TestReturnError() { s.Error(err) } -func (s *MetaWriterSuite) TestNormalSaveV2() { - s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil) - - bfs := metacache.NewBloomFilterSet() - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) - metacache.UpdateNumOfRows(1000)(seg) - s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) - task := NewSyncTaskV2() - task.WithMetaCache(s.metacache) - err := s.writer.UpdateSyncV2(task) - s.NoError(err) -} - -func (s *MetaWriterSuite) TestReturnErrorV2() { - s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(errors.New("mocked")) - - bfs := metacache.NewBloomFilterSet() - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) - metacache.UpdateNumOfRows(1000)(seg) - s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) - task := NewSyncTaskV2() - task.WithMetaCache(s.metacache) - err := s.writer.UpdateSyncV2(task) - s.Error(err) -} - func TestMetaWriter(t *testing.T) { suite.Run(t, new(MetaWriterSuite)) } diff --git a/internal/flushcommon/syncmgr/mock_meta_writer.go b/internal/flushcommon/syncmgr/mock_meta_writer.go index 7d64d0fe59..bacc91649a 100644 --- a/internal/flushcommon/syncmgr/mock_meta_writer.go +++ b/internal/flushcommon/syncmgr/mock_meta_writer.go @@ -107,48 +107,6 @@ func (_c *MockMetaWriter_UpdateSync_Call) RunAndReturn(run func(context.Context, return _c } -// UpdateSyncV2 provides a mock function with given fields: _a0 -func (_m *MockMetaWriter) UpdateSyncV2(_a0 *SyncTaskV2) error { - ret := _m.Called(_a0) - - var r0 error - if rf, ok := ret.Get(0).(func(*SyncTaskV2) error); ok { - r0 = rf(_a0) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// MockMetaWriter_UpdateSyncV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateSyncV2' -type MockMetaWriter_UpdateSyncV2_Call struct { - *mock.Call -} - -// UpdateSyncV2 is a helper method to define mock.On call -// - _a0 *SyncTaskV2 -func (_e *MockMetaWriter_Expecter) UpdateSyncV2(_a0 interface{}) *MockMetaWriter_UpdateSyncV2_Call { - return &MockMetaWriter_UpdateSyncV2_Call{Call: _e.mock.On("UpdateSyncV2", _a0)} -} - -func (_c *MockMetaWriter_UpdateSyncV2_Call) Run(run func(_a0 *SyncTaskV2)) *MockMetaWriter_UpdateSyncV2_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*SyncTaskV2)) - }) - return _c -} - -func (_c *MockMetaWriter_UpdateSyncV2_Call) Return(_a0 error) *MockMetaWriter_UpdateSyncV2_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockMetaWriter_UpdateSyncV2_Call) RunAndReturn(run func(*SyncTaskV2) error) *MockMetaWriter_UpdateSyncV2_Call { - _c.Call.Return(run) - return _c -} - // NewMockMetaWriter creates a new instance of MockMetaWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewMockMetaWriter(t interface { diff --git a/internal/flushcommon/syncmgr/mock_task.go b/internal/flushcommon/syncmgr/mock_task.go index 7f4f59b7a1..6087e5ba3e 100644 --- a/internal/flushcommon/syncmgr/mock_task.go +++ b/internal/flushcommon/syncmgr/mock_task.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.30.1. DO NOT EDIT. +// Code generated by mockery v2.32.4. DO NOT EDIT. package syncmgr diff --git a/internal/flushcommon/syncmgr/storage_v2_serializer.go b/internal/flushcommon/syncmgr/storage_v2_serializer.go deleted file mode 100644 index 8147daad94..0000000000 --- a/internal/flushcommon/syncmgr/storage_v2_serializer.go +++ /dev/null @@ -1,256 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package syncmgr - -import ( - "context" - "fmt" - - "github.com/apache/arrow/go/v12/arrow" - "github.com/apache/arrow/go/v12/arrow/array" - "github.com/apache/arrow/go/v12/arrow/memory" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" - "github.com/milvus-io/milvus-storage/go/storage/schema" - "github.com/milvus-io/milvus/internal/allocator" - "github.com/milvus-io/milvus/internal/flushcommon/metacache" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/querycoordv2/params" - "github.com/milvus-io/milvus/internal/storage" - iTypeutil "github.com/milvus-io/milvus/internal/util/typeutil" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/metrics" - "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/milvus-io/milvus/pkg/util/timerecord" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -type storageV2Serializer struct { - *storageV1Serializer - - arrowSchema *arrow.Schema - storageV2Cache *metacache.StorageV2Cache - inCodec *storage.InsertCodec - metacache metacache.MetaCache -} - -func NewStorageV2Serializer( - storageV2Cache *metacache.StorageV2Cache, - allocator allocator.Interface, - metacache metacache.MetaCache, - metaWriter MetaWriter, -) (*storageV2Serializer, error) { - v1Serializer, err := NewStorageSerializer(allocator, metacache, metaWriter) - if err != nil { - return nil, err - } - - return &storageV2Serializer{ - storageV1Serializer: v1Serializer, - storageV2Cache: storageV2Cache, - arrowSchema: storageV2Cache.ArrowSchema(), - metacache: metacache, - }, nil -} - -func (s *storageV2Serializer) EncodeBuffer(ctx context.Context, pack *SyncPack) (Task, error) { - task := NewSyncTaskV2() - tr := timerecord.NewTimeRecorder("storage_serializer_v2") - metricSegLevel := pack.level.String() - - space, err := s.storageV2Cache.GetOrCreateSpace(pack.segmentID, SpaceCreatorFunc(pack.segmentID, s.schema, s.arrowSchema)) - if err != nil { - log.Warn("failed to get or create space", zap.Error(err)) - return nil, err - } - - task.space = space - if len(pack.insertData) > 0 { - insertReader, err := s.serializeInsertData(pack) - if err != nil { - log.Warn("failed to serialize insert data with storagev2", zap.Error(err)) - return nil, err - } - - task.reader = insertReader - - singlePKStats, batchStatsBlob, err := s.serializeStatslog(pack) - if err != nil { - log.Warn("failed to serialized statslog", zap.Error(err)) - return nil, err - } - - task.statsBlob = batchStatsBlob - s.metacache.UpdateSegments(metacache.RollStats(singlePKStats), metacache.WithSegmentIDs(pack.segmentID)) - } - - if pack.isFlush { - if pack.level != datapb.SegmentLevel_L0 { - mergedStatsBlob, err := s.serializeMergedPkStats(pack) - if err != nil { - log.Warn("failed to serialize merged stats log", zap.Error(err)) - return nil, err - } - - task.mergedStatsBlob = mergedStatsBlob - } - task.WithFlush() - } - - if pack.deltaData != nil { - deltaReader, err := s.serializeDeltaData(pack) - if err != nil { - log.Warn("failed to serialize delta data", zap.Error(err)) - return nil, err - } - task.deleteReader = deltaReader - } - - if pack.isDrop { - task.WithDrop() - } - - s.setTaskMeta(task, pack) - metrics.DataNodeEncodeBufferLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metricSegLevel).Observe(float64(tr.RecordSpan().Milliseconds())) - return task, nil -} - -func (s *storageV2Serializer) setTaskMeta(task *SyncTaskV2, pack *SyncPack) { - task.WithCollectionID(pack.collectionID). - WithPartitionID(pack.partitionID). - WithChannelName(pack.channelName). - WithSegmentID(pack.segmentID). - WithBatchSize(pack.batchSize). - WithSchema(s.metacache.Schema()). - WithStartPosition(pack.startPosition). - WithCheckpoint(pack.checkpoint). - WithLevel(pack.level). - WithTimeRange(pack.tsFrom, pack.tsTo). - WithMetaCache(s.metacache). - WithMetaWriter(s.metaWriter). - WithFailureCallback(func(err error) { - // TODO could change to unsub channel in the future - panic(err) - }) -} - -func (s *storageV2Serializer) serializeInsertData(pack *SyncPack) (array.RecordReader, error) { - builder := array.NewRecordBuilder(memory.DefaultAllocator, s.arrowSchema) - defer builder.Release() - - for _, chunk := range pack.insertData { - if err := iTypeutil.BuildRecord(builder, chunk, s.schema.GetFields()); err != nil { - return nil, err - } - } - - rec := builder.NewRecord() - defer rec.Release() - - itr, err := array.NewRecordReader(s.arrowSchema, []arrow.Record{rec}) - if err != nil { - return nil, err - } - itr.Retain() - - return itr, nil -} - -func (s *storageV2Serializer) serializeDeltaData(pack *SyncPack) (array.RecordReader, error) { - fields := make([]*schemapb.FieldSchema, 0, 2) - tsField := &schemapb.FieldSchema{ - FieldID: common.TimeStampField, - Name: common.TimeStampFieldName, - DataType: schemapb.DataType_Int64, - } - fields = append(fields, s.pkField, tsField) - - deltaArrowSchema, err := iTypeutil.ConvertToArrowSchema(fields) - if err != nil { - return nil, err - } - - builder := array.NewRecordBuilder(memory.DefaultAllocator, deltaArrowSchema) - defer builder.Release() - - switch s.pkField.GetDataType() { - case schemapb.DataType_Int64: - pb := builder.Field(0).(*array.Int64Builder) - for _, pk := range pack.deltaData.Pks { - pb.Append(pk.GetValue().(int64)) - } - case schemapb.DataType_VarChar: - pb := builder.Field(0).(*array.StringBuilder) - for _, pk := range pack.deltaData.Pks { - pb.Append(pk.GetValue().(string)) - } - default: - return nil, merr.WrapErrParameterInvalidMsg("unexpected pk type %v", s.pkField.GetDataType()) - } - - for _, ts := range pack.deltaData.Tss { - builder.Field(1).(*array.Int64Builder).Append(int64(ts)) - } - - rec := builder.NewRecord() - defer rec.Release() - - reader, err := array.NewRecordReader(deltaArrowSchema, []arrow.Record{rec}) - if err != nil { - return nil, err - } - reader.Retain() - - return reader, nil -} - -func SpaceCreatorFunc(segmentID int64, collSchema *schemapb.CollectionSchema, arrowSchema *arrow.Schema) func() (*milvus_storage.Space, error) { - return func() (*milvus_storage.Space, error) { - url, err := iTypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segmentID) - if err != nil { - return nil, err - } - - pkSchema, err := typeutil.GetPrimaryFieldSchema(collSchema) - if err != nil { - return nil, err - } - vecSchema, err := typeutil.GetVectorFieldSchema(collSchema) - if err != nil { - return nil, err - } - space, err := milvus_storage.Open( - url, - options.NewSpaceOptionBuilder(). - SetSchema(schema.NewSchema( - arrowSchema, - &schema.SchemaOptions{ - PrimaryColumn: pkSchema.Name, - VectorColumn: vecSchema.Name, - VersionColumn: common.TimeStampFieldName, - }, - )). - Build(), - ) - return space, err - } -} diff --git a/internal/flushcommon/syncmgr/storage_v2_serializer_test.go b/internal/flushcommon/syncmgr/storage_v2_serializer_test.go deleted file mode 100644 index 27ccedb9ee..0000000000 --- a/internal/flushcommon/syncmgr/storage_v2_serializer_test.go +++ /dev/null @@ -1,366 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package syncmgr - -import ( - "context" - "fmt" - "math/rand" - "testing" - "time" - - "github.com/samber/lo" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" - "github.com/milvus-io/milvus-storage/go/storage/schema" - "github.com/milvus-io/milvus/internal/allocator" - "github.com/milvus-io/milvus/internal/flushcommon/metacache" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/milvus-io/milvus/pkg/util/tsoutil" -) - -type StorageV2SerializerSuite struct { - suite.Suite - - collectionID int64 - partitionID int64 - segmentID int64 - channelName string - - schema *schemapb.CollectionSchema - storageCache *metacache.StorageV2Cache - mockAllocator *allocator.MockAllocator - mockCache *metacache.MockMetaCache - mockMetaWriter *MockMetaWriter - - serializer *storageV2Serializer -} - -func (s *StorageV2SerializerSuite) SetupSuite() { - paramtable.Get().Init(paramtable.NewBaseTable()) - - s.collectionID = rand.Int63n(100) + 1000 - s.partitionID = rand.Int63n(100) + 2000 - s.segmentID = rand.Int63n(1000) + 10000 - s.channelName = fmt.Sprintf("by-dev-rootcoord-dml0_%d_v1", s.collectionID) - s.schema = &schemapb.CollectionSchema{ - Name: "sync_task_test_col", - Fields: []*schemapb.FieldSchema{ - {FieldID: common.RowIDField, DataType: schemapb.DataType_Int64, Name: common.RowIDFieldName}, - {FieldID: common.TimeStampField, DataType: schemapb.DataType_Int64, Name: common.TimeStampFieldName}, - { - FieldID: 100, - Name: "pk", - DataType: schemapb.DataType_Int64, - IsPrimaryKey: true, - }, - { - FieldID: 101, - Name: "vector", - DataType: schemapb.DataType_FloatVector, - TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "128"}, - }, - }, - }, - } - - s.mockAllocator = allocator.NewMockAllocator(s.T()) - s.mockCache = metacache.NewMockMetaCache(s.T()) - s.mockMetaWriter = NewMockMetaWriter(s.T()) -} - -func (s *StorageV2SerializerSuite) SetupTest() { - storageCache, err := metacache.NewStorageV2Cache(s.schema) - s.Require().NoError(err) - s.storageCache = storageCache - - s.mockCache.EXPECT().Collection().Return(s.collectionID) - s.mockCache.EXPECT().Schema().Return(s.schema) - - s.serializer, err = NewStorageV2Serializer(storageCache, s.mockAllocator, s.mockCache, s.mockMetaWriter) - s.Require().NoError(err) -} - -func (s *StorageV2SerializerSuite) getSpace() *milvus_storage.Space { - tmpDir := s.T().TempDir() - space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder(). - SetSchema(schema.NewSchema(s.storageCache.ArrowSchema(), &schema.SchemaOptions{ - PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName, - })).Build()) - s.Require().NoError(err) - return space -} - -func (s *StorageV2SerializerSuite) getBasicPack() *SyncPack { - pack := &SyncPack{} - - pack.WithCollectionID(s.collectionID). - WithPartitionID(s.partitionID). - WithSegmentID(s.segmentID). - WithChannelName(s.channelName). - WithCheckpoint(&msgpb.MsgPosition{ - Timestamp: 1000, - ChannelName: s.channelName, - }) - - return pack -} - -func (s *StorageV2SerializerSuite) getEmptyInsertBuffer() *storage.InsertData { - buf, err := storage.NewInsertData(s.schema) - s.Require().NoError(err) - - return buf -} - -func (s *StorageV2SerializerSuite) getInsertBuffer() *storage.InsertData { - buf := s.getEmptyInsertBuffer() - - // generate data - for i := 0; i < 10; i++ { - data := make(map[storage.FieldID]any) - data[common.RowIDField] = int64(i + 1) - data[common.TimeStampField] = int64(i + 1) - data[100] = int64(i + 1) - vector := lo.RepeatBy(128, func(_ int) float32 { - return rand.Float32() - }) - data[101] = vector - err := buf.Append(data) - s.Require().NoError(err) - } - return buf -} - -func (s *StorageV2SerializerSuite) getDeleteBuffer() *storage.DeleteData { - buf := &storage.DeleteData{} - for i := 0; i < 10; i++ { - pk := storage.NewInt64PrimaryKey(int64(i + 1)) - ts := tsoutil.ComposeTSByTime(time.Now(), 0) - buf.Append(pk, ts) - } - return buf -} - -func (s *StorageV2SerializerSuite) getDeleteBufferZeroTs() *storage.DeleteData { - buf := &storage.DeleteData{} - for i := 0; i < 10; i++ { - pk := storage.NewInt64PrimaryKey(int64(i + 1)) - buf.Append(pk, 0) - } - return buf -} - -func (s *StorageV2SerializerSuite) getBfs() *metacache.BloomFilterSet { - bfs := metacache.NewBloomFilterSet() - fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{ - FieldID: 101, - Name: "ID", - IsPrimaryKey: true, - DataType: schemapb.DataType_Int64, - }, 16) - s.Require().NoError(err) - - ids := []int64{1, 2, 3, 4, 5, 6, 7} - for _, id := range ids { - err = fd.AppendRow(id) - s.Require().NoError(err) - } - - bfs.UpdatePKRange(fd) - return bfs -} - -func (s *StorageV2SerializerSuite) TestSerializeInsert() { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - s.storageCache.SetSpace(s.segmentID, s.getSpace()) - - s.Run("no_data", func() { - pack := s.getBasicPack() - pack.WithTimeRange(50, 100) - pack.WithDrop() - - task, err := s.serializer.EncodeBuffer(ctx, pack) - s.NoError(err) - taskV1, ok := task.(*SyncTaskV2) - s.Require().True(ok) - s.Equal(s.collectionID, taskV1.collectionID) - s.Equal(s.partitionID, taskV1.partitionID) - s.Equal(s.channelName, taskV1.channelName) - s.Equal(&msgpb.MsgPosition{ - Timestamp: 1000, - ChannelName: s.channelName, - }, taskV1.checkpoint) - s.EqualValues(50, taskV1.tsFrom) - s.EqualValues(100, taskV1.tsTo) - s.True(taskV1.isDrop) - }) - - s.Run("empty_insert_data", func() { - pack := s.getBasicPack() - pack.WithTimeRange(50, 100) - pack.WithInsertData([]*storage.InsertData{s.getEmptyInsertBuffer()}).WithBatchSize(0) - - _, err := s.serializer.EncodeBuffer(ctx, pack) - s.Error(err) - }) - - s.Run("with_normal_data", func() { - pack := s.getBasicPack() - pack.WithTimeRange(50, 100) - pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10) - - s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return().Once() - - task, err := s.serializer.EncodeBuffer(ctx, pack) - s.NoError(err) - - taskV2, ok := task.(*SyncTaskV2) - s.Require().True(ok) - s.Equal(s.collectionID, taskV2.collectionID) - s.Equal(s.partitionID, taskV2.partitionID) - s.Equal(s.channelName, taskV2.channelName) - s.Equal(&msgpb.MsgPosition{ - Timestamp: 1000, - ChannelName: s.channelName, - }, taskV2.checkpoint) - s.EqualValues(50, taskV2.tsFrom) - s.EqualValues(100, taskV2.tsTo) - s.NotNil(taskV2.reader) - s.NotNil(taskV2.statsBlob) - }) - - s.Run("with_flush_segment_not_found", func() { - pack := s.getBasicPack() - pack.WithFlush() - - s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(nil, false).Once() - _, err := s.serializer.EncodeBuffer(ctx, pack) - s.Error(err) - }) - - s.Run("with_flush", func() { - pack := s.getBasicPack() - pack.WithTimeRange(50, 100) - pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10) - pack.WithFlush() - - bfs := s.getBfs() - segInfo := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) - metacache.UpdateNumOfRows(1000)(segInfo) - s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Run(func(action metacache.SegmentAction, filters ...metacache.SegmentFilter) { - action(segInfo) - }).Return().Once() - s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(segInfo, true).Once() - - task, err := s.serializer.EncodeBuffer(ctx, pack) - s.NoError(err) - - taskV2, ok := task.(*SyncTaskV2) - s.Require().True(ok) - s.Equal(s.collectionID, taskV2.collectionID) - s.Equal(s.partitionID, taskV2.partitionID) - s.Equal(s.channelName, taskV2.channelName) - s.Equal(&msgpb.MsgPosition{ - Timestamp: 1000, - ChannelName: s.channelName, - }, taskV2.checkpoint) - s.EqualValues(50, taskV2.tsFrom) - s.EqualValues(100, taskV2.tsTo) - s.NotNil(taskV2.mergedStatsBlob) - }) -} - -func (s *StorageV2SerializerSuite) TestSerializeDelete() { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - s.Run("serialize_failed", func() { - pkField := s.serializer.pkField - s.serializer.pkField = &schemapb.FieldSchema{} - defer func() { - s.serializer.pkField = pkField - }() - pack := s.getBasicPack() - pack.WithDeleteData(s.getDeleteBufferZeroTs()) - pack.WithTimeRange(50, 100) - - _, err := s.serializer.EncodeBuffer(ctx, pack) - s.Error(err) - }) - - s.Run("serialize_failed_bad_pk", func() { - pkField := s.serializer.pkField - s.serializer.pkField = &schemapb.FieldSchema{ - DataType: schemapb.DataType_Array, - } - defer func() { - s.serializer.pkField = pkField - }() - pack := s.getBasicPack() - pack.WithDeleteData(s.getDeleteBufferZeroTs()) - pack.WithTimeRange(50, 100) - - _, err := s.serializer.EncodeBuffer(ctx, pack) - s.Error(err) - }) - - s.Run("serialize_normal", func() { - pack := s.getBasicPack() - pack.WithDeleteData(s.getDeleteBuffer()) - pack.WithTimeRange(50, 100) - - task, err := s.serializer.EncodeBuffer(ctx, pack) - s.NoError(err) - - taskV2, ok := task.(*SyncTaskV2) - s.Require().True(ok) - s.Equal(s.collectionID, taskV2.collectionID) - s.Equal(s.partitionID, taskV2.partitionID) - s.Equal(s.channelName, taskV2.channelName) - s.Equal(&msgpb.MsgPosition{ - Timestamp: 1000, - ChannelName: s.channelName, - }, taskV2.checkpoint) - s.EqualValues(50, taskV2.tsFrom) - s.EqualValues(100, taskV2.tsTo) - s.NotNil(taskV2.deleteReader) - }) -} - -func (s *StorageV2SerializerSuite) TestBadSchema() { - mockCache := metacache.NewMockMetaCache(s.T()) - mockCache.EXPECT().Collection().Return(s.collectionID).Once() - mockCache.EXPECT().Schema().Return(&schemapb.CollectionSchema{}).Once() - _, err := NewStorageV2Serializer(s.storageCache, s.mockAllocator, mockCache, s.mockMetaWriter) - s.Error(err) -} - -func TestStorageV2Serializer(t *testing.T) { - suite.Run(t, new(StorageV2SerializerSuite)) -} diff --git a/internal/flushcommon/syncmgr/sync_manager.go b/internal/flushcommon/syncmgr/sync_manager.go index bbf56d46f1..cac7d76827 100644 --- a/internal/flushcommon/syncmgr/sync_manager.go +++ b/internal/flushcommon/syncmgr/sync_manager.go @@ -99,7 +99,6 @@ func (mgr *syncManager) SyncData(ctx context.Context, task Task, callbacks ...fu switch t := task.(type) { case *SyncTask: t.WithChunkManager(mgr.chunkManager) - case *SyncTaskV2: } return mgr.safeSubmitTask(ctx, task, callbacks...) diff --git a/internal/flushcommon/syncmgr/taskv2.go b/internal/flushcommon/syncmgr/taskv2.go deleted file mode 100644 index 820ded3a0c..0000000000 --- a/internal/flushcommon/syncmgr/taskv2.go +++ /dev/null @@ -1,235 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package syncmgr - -import ( - "context" - - "github.com/apache/arrow/go/v12/arrow" - "github.com/apache/arrow/go/v12/arrow/array" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" - "github.com/milvus-io/milvus/internal/allocator" - "github.com/milvus-io/milvus/internal/flushcommon/metacache" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/retry" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -type SyncTaskV2 struct { - *SyncTask - arrowSchema *arrow.Schema - reader array.RecordReader - statsBlob *storage.Blob - deleteReader array.RecordReader - storageVersion int64 - space *milvus_storage.Space - - failureCallback func(err error) -} - -func (t *SyncTaskV2) getLogger() *log.MLogger { - return log.Ctx(context.Background()).With( - zap.Int64("collectionID", t.collectionID), - zap.Int64("partitionID", t.partitionID), - zap.Int64("segmentID", t.segmentID), - zap.String("channel", t.channelName), - ) -} - -func (t *SyncTaskV2) handleError(err error) { - if t.failureCallback != nil { - t.failureCallback(err) - } -} - -func (t *SyncTaskV2) Run(ctx context.Context) error { - log := t.getLogger() - var err error - - _, ok := t.metacache.GetSegmentByID(t.segmentID) - if !ok { - log.Warn("failed to sync data, segment not found in metacache") - t.handleError(err) - return merr.WrapErrSegmentNotFound(t.segmentID) - } - - if err = t.writeSpace(); err != nil { - t.handleError(err) - return err - } - - if err = t.writeMeta(); err != nil { - t.handleError(err) - return err - } - - actions := []metacache.SegmentAction{metacache.FinishSyncing(t.batchSize)} - switch { - case t.isDrop: - actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Dropped)) - case t.isFlush: - actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Flushed)) - } - - t.metacache.UpdateSegments(metacache.MergeSegmentAction(actions...), metacache.WithSegmentIDs(t.segmentID)) - - return nil -} - -func (t *SyncTaskV2) writeSpace() error { - defer func() { - if t.reader != nil { - t.reader.Release() - } - if t.deleteReader != nil { - t.deleteReader.Release() - } - }() - - txn := t.space.NewTransaction() - if t.reader != nil { - txn.Write(t.reader, &options.DefaultWriteOptions) - } - if t.deleteReader != nil { - txn.Delete(t.deleteReader) - } - if t.statsBlob != nil { - txn.WriteBlob(t.statsBlob.Value, t.statsBlob.Key, false) - } - - return txn.Commit() -} - -func (t *SyncTaskV2) writeMeta() error { - t.storageVersion = t.space.GetCurrentVersion() - return t.metaWriter.UpdateSyncV2(t) -} - -func NewSyncTaskV2() *SyncTaskV2 { - return &SyncTaskV2{ - SyncTask: NewSyncTask(), - } -} - -func (t *SyncTaskV2) WithChunkManager(cm storage.ChunkManager) *SyncTaskV2 { - t.chunkManager = cm - return t -} - -func (t *SyncTaskV2) WithAllocator(allocator allocator.Interface) *SyncTaskV2 { - t.allocator = allocator - return t -} - -func (t *SyncTaskV2) WithStartPosition(start *msgpb.MsgPosition) *SyncTaskV2 { - t.startPosition = start - return t -} - -func (t *SyncTaskV2) WithCheckpoint(cp *msgpb.MsgPosition) *SyncTaskV2 { - t.checkpoint = cp - return t -} - -func (t *SyncTaskV2) WithCollectionID(collID int64) *SyncTaskV2 { - t.collectionID = collID - return t -} - -func (t *SyncTaskV2) WithPartitionID(partID int64) *SyncTaskV2 { - t.partitionID = partID - return t -} - -func (t *SyncTaskV2) WithSegmentID(segID int64) *SyncTaskV2 { - t.segmentID = segID - return t -} - -func (t *SyncTaskV2) WithChannelName(chanName string) *SyncTaskV2 { - t.channelName = chanName - return t -} - -func (t *SyncTaskV2) WithSchema(schema *schemapb.CollectionSchema) *SyncTaskV2 { - t.schema = schema - return t -} - -func (t *SyncTaskV2) WithTimeRange(from, to typeutil.Timestamp) *SyncTaskV2 { - t.tsFrom, t.tsTo = from, to - return t -} - -func (t *SyncTaskV2) WithFlush() *SyncTaskV2 { - t.isFlush = true - return t -} - -func (t *SyncTaskV2) WithDrop() *SyncTaskV2 { - t.isDrop = true - return t -} - -func (t *SyncTaskV2) WithMetaCache(metacache metacache.MetaCache) *SyncTaskV2 { - t.metacache = metacache - return t -} - -func (t *SyncTaskV2) WithMetaWriter(metaWriter MetaWriter) *SyncTaskV2 { - t.metaWriter = metaWriter - return t -} - -func (t *SyncTaskV2) WithWriteRetryOptions(opts ...retry.Option) *SyncTaskV2 { - t.writeRetryOpts = opts - return t -} - -func (t *SyncTaskV2) WithFailureCallback(callback func(error)) *SyncTaskV2 { - t.failureCallback = callback - return t -} - -func (t *SyncTaskV2) WithBatchSize(batchSize int64) *SyncTaskV2 { - t.batchSize = batchSize - return t -} - -func (t *SyncTaskV2) WithSpace(space *milvus_storage.Space) *SyncTaskV2 { - t.space = space - return t -} - -func (t *SyncTaskV2) WithArrowSchema(arrowSchema *arrow.Schema) *SyncTaskV2 { - t.arrowSchema = arrowSchema - return t -} - -func (t *SyncTaskV2) WithLevel(level datapb.SegmentLevel) *SyncTaskV2 { - t.level = level - return t -} diff --git a/internal/flushcommon/syncmgr/taskv2_test.go b/internal/flushcommon/syncmgr/taskv2_test.go deleted file mode 100644 index 7ee82e259b..0000000000 --- a/internal/flushcommon/syncmgr/taskv2_test.go +++ /dev/null @@ -1,403 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package syncmgr - -import ( - "context" - "fmt" - "math/rand" - "testing" - "time" - - "github.com/apache/arrow/go/v12/arrow" - "github.com/apache/arrow/go/v12/arrow/array" - "github.com/apache/arrow/go/v12/arrow/memory" - "github.com/samber/lo" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" - "github.com/milvus-io/milvus-storage/go/storage/schema" - "github.com/milvus-io/milvus/internal/allocator" - "github.com/milvus-io/milvus/internal/datanode/broker" - "github.com/milvus-io/milvus/internal/flushcommon/metacache" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/internal/util/typeutil" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/milvus-io/milvus/pkg/util/tsoutil" -) - -type SyncTaskSuiteV2 struct { - suite.Suite - - collectionID int64 - partitionID int64 - segmentID int64 - channelName string - - metacache *metacache.MockMetaCache - allocator *allocator.MockGIDAllocator - schema *schemapb.CollectionSchema - arrowSchema *arrow.Schema - broker *broker.MockBroker - - space *milvus_storage.Space -} - -func (s *SyncTaskSuiteV2) SetupSuite() { - paramtable.Get().Init(paramtable.NewBaseTable()) - - s.collectionID = 100 - s.partitionID = 101 - s.segmentID = 1001 - s.channelName = "by-dev-rootcoord-dml_0_100v0" - - s.schema = &schemapb.CollectionSchema{ - Name: "sync_task_test_col", - Fields: []*schemapb.FieldSchema{ - {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64}, - { - FieldID: 100, - Name: "pk", - DataType: schemapb.DataType_Int64, - IsPrimaryKey: true, - }, - { - FieldID: 101, - Name: "vector", - DataType: schemapb.DataType_FloatVector, - TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "128"}, - }, - }, - }, - } - - arrowSchema, err := typeutil.ConvertToArrowSchema(s.schema.Fields) - s.NoError(err) - s.arrowSchema = arrowSchema -} - -func (s *SyncTaskSuiteV2) SetupTest() { - s.allocator = allocator.NewMockGIDAllocator() - s.allocator.AllocF = func(count uint32) (int64, int64, error) { - return time.Now().Unix(), int64(count), nil - } - s.allocator.AllocOneF = func() (allocator.UniqueID, error) { - return time.Now().Unix(), nil - } - - s.broker = broker.NewMockBroker(s.T()) - s.metacache = metacache.NewMockMetaCache(s.T()) - - tmpDir := s.T().TempDir() - space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder(). - SetSchema(schema.NewSchema(s.arrowSchema, &schema.SchemaOptions{ - PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName, - })).Build()) - s.Require().NoError(err) - s.space = space -} - -func (s *SyncTaskSuiteV2) getEmptyInsertBuffer() *storage.InsertData { - buf, err := storage.NewInsertData(s.schema) - s.Require().NoError(err) - - return buf -} - -func (s *SyncTaskSuiteV2) getInsertBuffer() *storage.InsertData { - buf := s.getEmptyInsertBuffer() - - // generate data - for i := 0; i < 10; i++ { - data := make(map[storage.FieldID]any) - data[common.RowIDField] = int64(i + 1) - data[common.TimeStampField] = int64(i + 1) - data[100] = int64(i + 1) - vector := lo.RepeatBy(128, func(_ int) float32 { - return rand.Float32() - }) - data[101] = vector - err := buf.Append(data) - s.Require().NoError(err) - } - return buf -} - -func (s *SyncTaskSuiteV2) getDeleteBuffer() *storage.DeleteData { - buf := &storage.DeleteData{} - for i := 0; i < 10; i++ { - pk := storage.NewInt64PrimaryKey(int64(i + 1)) - ts := tsoutil.ComposeTSByTime(time.Now(), 0) - buf.Append(pk, ts) - } - return buf -} - -func (s *SyncTaskSuiteV2) getDeleteBufferZeroTs() *storage.DeleteData { - buf := &storage.DeleteData{} - for i := 0; i < 10; i++ { - pk := storage.NewInt64PrimaryKey(int64(i + 1)) - buf.Append(pk, 0) - } - return buf -} - -func (s *SyncTaskSuiteV2) getSuiteSyncTask() *SyncTaskV2 { - pack := &SyncPack{} - - pack.WithCollectionID(s.collectionID). - WithPartitionID(s.partitionID). - WithSegmentID(s.segmentID). - WithChannelName(s.channelName). - WithCheckpoint(&msgpb.MsgPosition{ - Timestamp: 1000, - ChannelName: s.channelName, - }) - pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10) - pack.WithDeleteData(s.getDeleteBuffer()) - - storageCache, err := metacache.NewStorageV2Cache(s.schema) - s.Require().NoError(err) - - s.metacache.EXPECT().Collection().Return(s.collectionID) - s.metacache.EXPECT().Schema().Return(s.schema) - serializer, err := NewStorageV2Serializer(storageCache, s.allocator, s.metacache, nil) - s.Require().NoError(err) - task, err := serializer.EncodeBuffer(context.Background(), pack) - s.Require().NoError(err) - taskV2, ok := task.(*SyncTaskV2) - s.Require().True(ok) - taskV2.WithMetaCache(s.metacache) - - return taskV2 -} - -func (s *SyncTaskSuiteV2) TestRunNormal() { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil) - bfs := metacache.NewBloomFilterSet() - fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{ - FieldID: 101, - Name: "ID", - IsPrimaryKey: true, - DataType: schemapb.DataType_Int64, - }, 16) - s.Require().NoError(err) - - ids := []int64{1, 2, 3, 4, 5, 6, 7} - for _, id := range ids { - err = fd.AppendRow(id) - s.Require().NoError(err) - } - - bfs.UpdatePKRange(fd) - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) - metacache.UpdateNumOfRows(1000)(seg) - s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) - s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - - s.Run("without_insert_delete", func() { - task := s.getSuiteSyncTask() - task.WithMetaWriter(BrokerMetaWriter(s.broker, 1)) - task.WithTimeRange(50, 100) - task.WithCheckpoint(&msgpb.MsgPosition{ - ChannelName: s.channelName, - MsgID: []byte{1, 2, 3, 4}, - Timestamp: 100, - }) - - err := task.Run(ctx) - s.NoError(err) - }) - - s.Run("with_insert_delete_cp", func() { - task := s.getSuiteSyncTask() - task.WithTimeRange(50, 100) - task.WithMetaWriter(BrokerMetaWriter(s.broker, 1)) - task.WithCheckpoint(&msgpb.MsgPosition{ - ChannelName: s.channelName, - MsgID: []byte{1, 2, 3, 4}, - Timestamp: 100, - }) - - err := task.Run(ctx) - s.NoError(err) - }) -} - -func (s *SyncTaskSuiteV2) TestBuildRecord() { - fieldSchemas := []*schemapb.FieldSchema{ - {FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool}, - {FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8}, - {FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16}, - {FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32}, - {FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64}, - {FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float}, - {FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double}, - {FieldID: 8, Name: "field7", DataType: schemapb.DataType_String}, - {FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar}, - {FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}}, - {FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, - {FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32}, - {FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON}, - {FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, - } - - schema, err := typeutil.ConvertToArrowSchema(fieldSchemas) - s.NoError(err) - - b := array.NewRecordBuilder(memory.NewGoAllocator(), schema) - defer b.Release() - - data := &storage.InsertData{ - Data: map[int64]storage.FieldData{ - 1: &storage.BoolFieldData{Data: []bool{true, false}}, - 2: &storage.Int8FieldData{Data: []int8{3, 4}}, - 3: &storage.Int16FieldData{Data: []int16{3, 4}}, - 4: &storage.Int32FieldData{Data: []int32{3, 4}}, - 5: &storage.Int64FieldData{Data: []int64{3, 4}}, - 6: &storage.FloatFieldData{Data: []float32{3, 4}}, - 7: &storage.DoubleFieldData{Data: []float64{3, 4}}, - 8: &storage.StringFieldData{Data: []string{"3", "4"}}, - 9: &storage.StringFieldData{Data: []string{"3", "4"}}, - 10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8}, - 11: &storage.FloatVectorFieldData{ - Data: []float32{4, 5, 6, 7, 4, 5, 6, 7}, - Dim: 4, - }, - 12: &storage.ArrayFieldData{ - ElementType: schemapb.DataType_Int32, - Data: []*schemapb.ScalarField{ - { - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}}, - }, - }, - { - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}}, - }, - }, - }, - }, - 13: &storage.JSONFieldData{ - Data: [][]byte{ - []byte(`{"batch":2}`), - []byte(`{"key":"world"}`), - }, - }, - 14: &storage.Float16VectorFieldData{ - Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255}, - Dim: 4, - }, - }, - } - - err = typeutil.BuildRecord(b, data, fieldSchemas) - s.NoError(err) - s.EqualValues(2, b.NewRecord().NumRows()) -} - -func (s *SyncTaskSuiteV2) TestBuildRecordNullable() { - fieldSchemas := []*schemapb.FieldSchema{ - {FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool}, - {FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8}, - {FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16}, - {FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32}, - {FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64}, - {FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float}, - {FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double}, - {FieldID: 8, Name: "field7", DataType: schemapb.DataType_String}, - {FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar}, - {FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}}, - {FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, - {FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32}, - {FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON}, - {FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, - } - - schema, err := typeutil.ConvertToArrowSchema(fieldSchemas) - s.NoError(err) - - b := array.NewRecordBuilder(memory.NewGoAllocator(), schema) - defer b.Release() - - data := &storage.InsertData{ - Data: map[int64]storage.FieldData{ - 1: &storage.BoolFieldData{Data: []bool{true, false}, ValidData: []bool{true, true}}, - 2: &storage.Int8FieldData{Data: []int8{3, 4}, ValidData: []bool{true, true}}, - 3: &storage.Int16FieldData{Data: []int16{3, 4}, ValidData: []bool{true, true}}, - 4: &storage.Int32FieldData{Data: []int32{3, 4}, ValidData: []bool{true, true}}, - 5: &storage.Int64FieldData{Data: []int64{3, 4}, ValidData: []bool{true, true}}, - 6: &storage.FloatFieldData{Data: []float32{3, 4}, ValidData: []bool{true, true}}, - 7: &storage.DoubleFieldData{Data: []float64{3, 4}, ValidData: []bool{true, true}}, - 8: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}}, - 9: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}}, - 10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8}, - 11: &storage.FloatVectorFieldData{ - Data: []float32{4, 5, 6, 7, 4, 5, 6, 7}, - Dim: 4, - }, - 12: &storage.ArrayFieldData{ - ElementType: schemapb.DataType_Int32, - Data: []*schemapb.ScalarField{ - { - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}}, - }, - }, - { - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}}, - }, - }, - }, - ValidData: []bool{true, true}, - }, - 13: &storage.JSONFieldData{ - Data: [][]byte{ - []byte(`{"batch":2}`), - []byte(`{"key":"world"}`), - }, - ValidData: []bool{true, true}, - }, - 14: &storage.Float16VectorFieldData{ - Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255}, - Dim: 4, - }, - }, - } - - err = typeutil.BuildRecord(b, data, fieldSchemas) - s.NoError(err) - s.EqualValues(2, b.NewRecord().NumRows()) -} - -func TestSyncTaskV2(t *testing.T) { - suite.Run(t, new(SyncTaskSuiteV2)) -} diff --git a/internal/flushcommon/writebuffer/bf_write_buffer.go b/internal/flushcommon/writebuffer/bf_write_buffer.go index b8ecf6ffa8..3fcb5df30d 100644 --- a/internal/flushcommon/writebuffer/bf_write_buffer.go +++ b/internal/flushcommon/writebuffer/bf_write_buffer.go @@ -19,8 +19,8 @@ type bfWriteBuffer struct { metacache metacache.MetaCache } -func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { - base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option) +func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { + base, err := newWriteBufferBase(channel, metacache, syncMgr, option) if err != nil { return nil, err } diff --git a/internal/flushcommon/writebuffer/bf_write_buffer_test.go b/internal/flushcommon/writebuffer/bf_write_buffer_test.go index ef4ea6b97a..b8ec5a0c23 100644 --- a/internal/flushcommon/writebuffer/bf_write_buffer_test.go +++ b/internal/flushcommon/writebuffer/bf_write_buffer_test.go @@ -13,16 +13,11 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" - "github.com/milvus-io/milvus-storage/go/storage/schema" "github.com/milvus-io/milvus/internal/datanode/broker" "github.com/milvus-io/milvus/internal/flushcommon/metacache" "github.com/milvus-io/milvus/internal/flushcommon/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/mq/msgstream" @@ -41,7 +36,6 @@ type BFWriteBufferSuite struct { metacacheInt64 *metacache.MockMetaCache metacacheVarchar *metacache.MockMetaCache broker *broker.MockBroker - storageV2Cache *metacache.StorageV2Cache } func (s *BFWriteBufferSuite) SetupSuite() { @@ -89,10 +83,6 @@ func (s *BFWriteBufferSuite) SetupSuite() { }, }, } - - storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema) - s.Require().NoError(err) - s.storageV2Cache = storageCache } func (s *BFWriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) { @@ -201,16 +191,11 @@ func (s *BFWriteBufferSuite) SetupTest() { s.metacacheVarchar.EXPECT().Collection().Return(s.collID).Maybe() s.broker = broker.NewMockBroker(s.T()) - var err error - s.storageV2Cache, err = metacache.NewStorageV2Cache(s.collInt64Schema) - s.Require().NoError(err) } func (s *BFWriteBufferSuite) TestBufferData() { s.Run("normal_run_int64", func() { - storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema) - s.Require().NoError(err) - wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{}) + wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{}) s.NoError(err) seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) @@ -237,9 +222,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { }) s.Run("normal_run_varchar", func() { - storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema) - s.Require().NoError(err) - wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{}) + wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{}) s.NoError(err) seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) @@ -261,9 +244,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { }) s.Run("int_pk_type_not_match", func() { - storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema) - s.Require().NoError(err) - wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{}) + wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{}) s.NoError(err) seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) @@ -281,9 +262,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { }) s.Run("varchar_pk_not_match", func() { - storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema) - s.Require().NoError(err) - wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{}) + wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{}) s.NoError(err) seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) @@ -305,7 +284,7 @@ func (s *BFWriteBufferSuite) TestAutoSync() { paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1") s.Run("normal_auto_sync", func() { - wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, nil, s.syncMgr, &writeBufferOption{ + wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{ syncPolicies: []SyncPolicy{ GetFullBufferPolicy(), GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)), @@ -340,92 +319,11 @@ func (s *BFWriteBufferSuite) TestAutoSync() { }) } -func (s *BFWriteBufferSuite) TestBufferDataWithStorageV2() { - params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true") - defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false") - params.Params.CommonCfg.StorageScheme.SwapTempValue("file") - tmpDir := s.T().TempDir() - arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields) - s.Require().NoError(err) - space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder(). - SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{ - PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName, - })).Build()) - s.Require().NoError(err) - s.storageV2Cache.SetSpace(1000, space) - wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{}) - s.NoError(err) - - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) - s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) - s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) - s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() - s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - - pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64) - delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) - - err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) - s.NoError(err) -} - -func (s *BFWriteBufferSuite) TestAutoSyncWithStorageV2() { - params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true") - defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false") - paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1") - tmpDir := s.T().TempDir() - arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields) - s.Require().NoError(err) - - space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder(). - SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{ - PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName, - })).Build()) - s.Require().NoError(err) - s.storageV2Cache.SetSpace(1002, space) - - s.Run("normal_auto_sync", func() { - wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{ - syncPolicies: []SyncPolicy{ - GetFullBufferPolicy(), - GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)), - GetSealedSegmentsPolicy(s.metacacheInt64), - }, - }) - s.NoError(err) - - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) - seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1002}, metacache.NewBloomFilterSet()) - segCompacted := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) - - s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg, segCompacted}) - s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false).Once() - s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once() - s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true) - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002}) - s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() - s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return() - s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything, mock.Anything).Return(nil) - - pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64) - delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) - - metrics.DataNodeFlowGraphBufferDataSize.Reset() - err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) - s.NoError(err) - - value, err := metrics.DataNodeFlowGraphBufferDataSize.GetMetricWithLabelValues(fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(s.metacacheInt64.Collection())) - s.NoError(err) - s.MetricsEqual(value, 0) - }) -} - func (s *BFWriteBufferSuite) TestCreateFailure() { metacache := metacache.NewMockMetaCache(s.T()) metacache.EXPECT().Collection().Return(s.collID) metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{}) - _, err := NewBFWriteBuffer(s.channelName, metacache, s.storageV2Cache, s.syncMgr, &writeBufferOption{}) + _, err := NewBFWriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{}) s.Error(err) } diff --git a/internal/flushcommon/writebuffer/l0_write_buffer.go b/internal/flushcommon/writebuffer/l0_write_buffer.go index 12f3dc9841..67647a6255 100644 --- a/internal/flushcommon/writebuffer/l0_write_buffer.go +++ b/internal/flushcommon/writebuffer/l0_write_buffer.go @@ -33,11 +33,11 @@ type l0WriteBuffer struct { idAllocator allocator.Interface } -func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { +func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { if option.idAllocator == nil { return nil, merr.WrapErrServiceInternal("id allocator is nil when creating l0 write buffer") } - base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option) + base, err := newWriteBufferBase(channel, metacache, syncMgr, option) if err != nil { return nil, err } diff --git a/internal/flushcommon/writebuffer/l0_write_buffer_test.go b/internal/flushcommon/writebuffer/l0_write_buffer_test.go index 6fb1239e8b..ebb4985598 100644 --- a/internal/flushcommon/writebuffer/l0_write_buffer_test.go +++ b/internal/flushcommon/writebuffer/l0_write_buffer_test.go @@ -28,13 +28,12 @@ import ( type L0WriteBufferSuite struct { testutils.PromMetricsSuite - channelName string - collID int64 - collSchema *schemapb.CollectionSchema - syncMgr *syncmgr.MockSyncManager - metacache *metacache.MockMetaCache - allocator *allocator.MockGIDAllocator - storageCache *metacache.StorageV2Cache + channelName string + collID int64 + collSchema *schemapb.CollectionSchema + syncMgr *syncmgr.MockSyncManager + metacache *metacache.MockMetaCache + allocator *allocator.MockGIDAllocator } func (s *L0WriteBufferSuite) SetupSuite() { @@ -61,10 +60,6 @@ func (s *L0WriteBufferSuite) SetupSuite() { }, } s.channelName = "by-dev-rootcoord-dml_0v0" - - storageCache, err := metacache.NewStorageV2Cache(s.collSchema) - s.Require().NoError(err) - s.storageCache = storageCache } func (s *L0WriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) { @@ -173,7 +168,7 @@ func (s *L0WriteBufferSuite) SetupTest() { func (s *L0WriteBufferSuite) TestBufferData() { s.Run("normal_run", func() { - wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{ + wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ idAllocator: s.allocator, }) s.NoError(err) @@ -202,7 +197,7 @@ func (s *L0WriteBufferSuite) TestBufferData() { }) s.Run("pk_type_not_match", func() { - wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{ + wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ idAllocator: s.allocator, }) s.NoError(err) @@ -225,7 +220,7 @@ func (s *L0WriteBufferSuite) TestCreateFailure() { metacache := metacache.NewMockMetaCache(s.T()) metacache.EXPECT().Collection().Return(s.collID) metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{}) - _, err := NewL0WriteBuffer(s.channelName, metacache, s.storageCache, s.syncMgr, &writeBufferOption{ + _, err := NewL0WriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{ idAllocator: s.allocator, }) s.Error(err) diff --git a/internal/flushcommon/writebuffer/manager.go b/internal/flushcommon/writebuffer/manager.go index ff76da80d9..028c8e5503 100644 --- a/internal/flushcommon/writebuffer/manager.go +++ b/internal/flushcommon/writebuffer/manager.go @@ -23,7 +23,7 @@ import ( //go:generate mockery --name=BufferManager --structname=MockBufferManager --output=./ --filename=mock_manager.go --with-expecter --inpackage type BufferManager interface { // Register adds a WriteBuffer with provided schema & options. - Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error + Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error // SealSegments notifies writeBuffer corresponding to provided channel to seal segments. // which will cause segment start flush procedure. SealSegments(ctx context.Context, channel string, segmentIDs []int64) error @@ -140,7 +140,7 @@ func (m *bufferManager) Stop() { } // Register a new WriteBuffer for channel. -func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error { +func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error { m.mut.Lock() defer m.mut.Unlock() @@ -148,7 +148,7 @@ func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, if ok { return merr.WrapErrChannelReduplicate(channel) } - buf, err := NewWriteBuffer(channel, metacache, storageV2Cache, m.syncMgr, opts...) + buf, err := NewWriteBuffer(channel, metacache, m.syncMgr, opts...) if err != nil { return err } diff --git a/internal/flushcommon/writebuffer/manager_test.go b/internal/flushcommon/writebuffer/manager_test.go index a1004b479f..63cb015657 100644 --- a/internal/flushcommon/writebuffer/manager_test.go +++ b/internal/flushcommon/writebuffer/manager_test.go @@ -73,13 +73,10 @@ func (s *ManagerSuite) SetupTest() { func (s *ManagerSuite) TestRegister() { manager := s.manager - storageCache, err := metacache.NewStorageV2Cache(s.collSchema) - s.Require().NoError(err) - - err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator)) + err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator)) s.NoError(err) - err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator)) + err = manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator)) s.Error(err) s.ErrorIs(err, merr.ErrChannelReduplicate) } @@ -183,9 +180,7 @@ func (s *ManagerSuite) TestRemoveChannel() { }) s.Run("remove_channel", func() { - storageCache, err := metacache.NewStorageV2Cache(s.collSchema) - s.Require().NoError(err) - err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator)) + err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator)) s.Require().NoError(err) s.NotPanics(func() { diff --git a/internal/flushcommon/writebuffer/mock_manager.go b/internal/flushcommon/writebuffer/mock_manager.go index 9c2e1490a0..d58830cc1e 100644 --- a/internal/flushcommon/writebuffer/mock_manager.go +++ b/internal/flushcommon/writebuffer/mock_manager.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.30.1. DO NOT EDIT. +// Code generated by mockery v2.32.4. DO NOT EDIT. package writebuffer @@ -278,20 +278,20 @@ func (_c *MockBufferManager_NotifyCheckpointUpdated_Call) RunAndReturn(run func( return _c } -// Register provides a mock function with given fields: channel, _a1, storageV2Cache, opts -func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error { +// Register provides a mock function with given fields: channel, _a1, opts +func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption) error { _va := make([]interface{}, len(opts)) for _i := range opts { _va[_i] = opts[_i] } var _ca []interface{} - _ca = append(_ca, channel, _a1, storageV2Cache) + _ca = append(_ca, channel, _a1) _ca = append(_ca, _va...) ret := _m.Called(_ca...) var r0 error - if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error); ok { - r0 = rf(channel, _a1, storageV2Cache, opts...) + if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, ...WriteBufferOption) error); ok { + r0 = rf(channel, _a1, opts...) } else { r0 = ret.Error(0) } @@ -307,22 +307,21 @@ type MockBufferManager_Register_Call struct { // Register is a helper method to define mock.On call // - channel string // - _a1 metacache.MetaCache -// - storageV2Cache *metacache.StorageV2Cache // - opts ...WriteBufferOption -func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, storageV2Cache interface{}, opts ...interface{}) *MockBufferManager_Register_Call { +func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, opts ...interface{}) *MockBufferManager_Register_Call { return &MockBufferManager_Register_Call{Call: _e.mock.On("Register", - append([]interface{}{channel, _a1, storageV2Cache}, opts...)...)} + append([]interface{}{channel, _a1}, opts...)...)} } -func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call { +func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call { _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]WriteBufferOption, len(args)-3) - for i, a := range args[3:] { + variadicArgs := make([]WriteBufferOption, len(args)-2) + for i, a := range args[2:] { if a != nil { variadicArgs[i] = a.(WriteBufferOption) } } - run(args[0].(string), args[1].(metacache.MetaCache), args[2].(*metacache.StorageV2Cache), variadicArgs...) + run(args[0].(string), args[1].(metacache.MetaCache), variadicArgs...) }) return _c } @@ -332,7 +331,7 @@ func (_c *MockBufferManager_Register_Call) Return(_a0 error) *MockBufferManager_ return _c } -func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error) *MockBufferManager_Register_Call { +func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, ...WriteBufferOption) error) *MockBufferManager_Register_Call { _c.Call.Return(run) return _c } diff --git a/internal/flushcommon/writebuffer/write_buffer.go b/internal/flushcommon/writebuffer/write_buffer.go index 7788465af5..acd679583e 100644 --- a/internal/flushcommon/writebuffer/write_buffer.go +++ b/internal/flushcommon/writebuffer/write_buffer.go @@ -16,7 +16,6 @@ import ( "github.com/milvus-io/milvus/internal/flushcommon/metacache" "github.com/milvus-io/milvus/internal/flushcommon/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" @@ -100,7 +99,7 @@ func (c *checkpointCandidates) GetEarliestWithDefault(def *checkpointCandidate) return result } -func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) { +func NewWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) { option := defaultWBOption(metacache) for _, opt := range opts { opt(option) @@ -108,9 +107,9 @@ func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cach switch option.deletePolicy { case DeletePolicyBFPkOracle: - return NewBFWriteBuffer(channel, metacache, storageV2Cache, syncMgr, option) + return NewBFWriteBuffer(channel, metacache, syncMgr, option) case DeletePolicyL0Delta: - return NewL0WriteBuffer(channel, metacache, storageV2Cache, syncMgr, option) + return NewL0WriteBuffer(channel, metacache, syncMgr, option) default: return nil, merr.WrapErrParameterInvalid("valid delete policy config", option.deletePolicy) } @@ -140,34 +139,23 @@ type writeBufferBase struct { checkpoint *msgpb.MsgPosition flushTimestamp *atomic.Uint64 - storagev2Cache *metacache.StorageV2Cache - // pre build logger logger *log.MLogger cpRatedLogger *log.MLogger } -func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) { +func newWriteBufferBase(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) { flushTs := atomic.NewUint64(nonFlushTS) flushTsPolicy := GetFlushTsPolicy(flushTs, metacache) option.syncPolicies = append(option.syncPolicies, flushTsPolicy) var serializer syncmgr.Serializer var err error - if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { - serializer, err = syncmgr.NewStorageV2Serializer( - storageV2Cache, - option.idAllocator, - metacache, - option.metaWriter, - ) - } else { - serializer, err = syncmgr.NewStorageSerializer( - option.idAllocator, - metacache, - option.metaWriter, - ) - } + serializer, err = syncmgr.NewStorageSerializer( + option.idAllocator, + metacache, + option.metaWriter, + ) if err != nil { return nil, err } @@ -201,7 +189,6 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2 syncCheckpoint: newCheckpointCandiates(), syncPolicies: option.syncPolicies, flushTimestamp: flushTs, - storagev2Cache: storageV2Cache, } wb.logger = log.With(zap.Int64("collectionID", wb.collectionID), @@ -660,8 +647,6 @@ func (wb *writeBufferBase) Close(ctx context.Context, drop bool) { switch t := syncTask.(type) { case *syncmgr.SyncTask: t.WithDrop() - case *syncmgr.SyncTaskV2: - t.WithDrop() } f := wb.syncMgr.SyncData(ctx, syncTask, func(err error) error { diff --git a/internal/flushcommon/writebuffer/write_buffer_test.go b/internal/flushcommon/writebuffer/write_buffer_test.go index 09e5084161..f9812a9558 100644 --- a/internal/flushcommon/writebuffer/write_buffer_test.go +++ b/internal/flushcommon/writebuffer/write_buffer_test.go @@ -22,13 +22,12 @@ import ( type WriteBufferSuite struct { suite.Suite - collID int64 - channelName string - collSchema *schemapb.CollectionSchema - wb *writeBufferBase - syncMgr *syncmgr.MockSyncManager - metacache *metacache.MockMetaCache - storageCache *metacache.StorageV2Cache + collID int64 + channelName string + collSchema *schemapb.CollectionSchema + wb *writeBufferBase + syncMgr *syncmgr.MockSyncManager + metacache *metacache.MockMetaCache } func (s *WriteBufferSuite) SetupSuite() { @@ -47,14 +46,12 @@ func (s *WriteBufferSuite) SetupSuite() { } func (s *WriteBufferSuite) SetupTest() { - storageCache, err := metacache.NewStorageV2Cache(s.collSchema) - s.Require().NoError(err) - s.storageCache = storageCache s.syncMgr = syncmgr.NewMockSyncManager(s.T()) s.metacache = metacache.NewMockMetaCache(s.T()) s.metacache.EXPECT().Schema().Return(s.collSchema).Maybe() s.metacache.EXPECT().Collection().Return(s.collID).Maybe() - s.wb, err = newWriteBufferBase(s.channelName, s.metacache, storageCache, s.syncMgr, &writeBufferOption{ + var err error + s.wb, err = newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }, @@ -66,7 +63,7 @@ func (s *WriteBufferSuite) TestDefaultOption() { s.Run("default BFPkOracle", func() { paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "false") defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key) - wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr) + wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr) s.NoError(err) _, ok := wb.(*bfWriteBuffer) s.True(ok) @@ -75,7 +72,7 @@ func (s *WriteBufferSuite) TestDefaultOption() { s.Run("default L0Delta policy", func() { paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "true") defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key) - wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator())) + wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator())) s.NoError(err) _, ok := wb.(*l0WriteBuffer) s.True(ok) @@ -83,18 +80,18 @@ func (s *WriteBufferSuite) TestDefaultOption() { } func (s *WriteBufferSuite) TestWriteBufferType() { - wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) + wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) s.NoError(err) _, ok := wb.(*bfWriteBuffer) s.True(ok) - wb, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator())) + wb, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator())) s.NoError(err) _, ok = wb.(*l0WriteBuffer) s.True(ok) - _, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy("")) + _, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy("")) s.Error(err) } @@ -114,7 +111,7 @@ func (s *WriteBufferSuite) TestFlushSegments() { s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return() s.metacache.EXPECT().GetSegmentByID(mock.Anything, mock.Anything, mock.Anything).Return(nil, true) - wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) + wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) s.NoError(err) err = wb.SealSegments(context.Background(), []int64{segmentID}) @@ -265,7 +262,7 @@ func (s *WriteBufferSuite) TestGetCheckpoint() { } func (s *WriteBufferSuite) TestSyncSegmentsError() { - wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{ + wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }, @@ -298,7 +295,7 @@ func (s *WriteBufferSuite) TestSyncSegmentsError() { } func (s *WriteBufferSuite) TestEvictBuffer() { - wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{ + wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }, @@ -367,7 +364,7 @@ func (s *WriteBufferSuite) TestEvictBuffer() { } func (s *WriteBufferSuite) TestDropPartitions() { - wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{ + wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }, diff --git a/internal/indexnode/indexnode_service.go b/internal/indexnode/indexnode_service.go index e1eee6280c..cdc8f76901 100644 --- a/internal/indexnode/indexnode_service.go +++ b/internal/indexnode/indexnode_service.go @@ -97,12 +97,7 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc() return merr.Status(err), nil } - var task task - if Params.CommonCfg.EnableStorageV2.GetAsBool() { - task = newIndexBuildTaskV2(taskCtx, taskCancel, req, i) - } else { - task = newIndexBuildTask(taskCtx, taskCancel, req, cm, i) - } + task := newIndexBuildTask(taskCtx, taskCancel, req, cm, i) ret := merr.Success() if err := i.sched.TaskQueue.Enqueue(task); err != nil { log.Warn("IndexNode failed to schedule", @@ -327,12 +322,7 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc() return merr.Status(err), nil } - var task task - if Params.CommonCfg.EnableStorageV2.GetAsBool() { - task = newIndexBuildTaskV2(taskCtx, taskCancel, indexRequest, i) - } else { - task = newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i) - } + task := newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i) ret := merr.Success() if err := i.sched.TaskQueue.Enqueue(task); err != nil { log.Warn("IndexNode failed to schedule", diff --git a/internal/indexnode/task_index.go b/internal/indexnode/task_index.go index bc3843cc4a..1848fdd693 100644 --- a/internal/indexnode/task_index.go +++ b/internal/indexnode/task_index.go @@ -43,187 +43,6 @@ import ( "github.com/milvus-io/milvus/pkg/util/timerecord" ) -type indexBuildTaskV2 struct { - *indexBuildTask -} - -func newIndexBuildTaskV2(ctx context.Context, - cancel context.CancelFunc, - req *indexpb.CreateJobRequest, - node *IndexNode, -) *indexBuildTaskV2 { - t := &indexBuildTaskV2{ - indexBuildTask: &indexBuildTask{ - ident: fmt.Sprintf("%s/%d", req.GetClusterID(), req.GetBuildID()), - cancel: cancel, - ctx: ctx, - req: req, - tr: timerecord.NewTimeRecorder(fmt.Sprintf("IndexBuildID: %d, ClusterID: %s", req.GetBuildID(), req.GetClusterID())), - node: node, - }, - } - - t.parseParams() - return t -} - -func (it *indexBuildTaskV2) parseParams() { - // fill field for requests before v2.5.0 - if it.req.GetField() == nil || it.req.GetField().GetDataType() == schemapb.DataType_None { - it.req.Field = &schemapb.FieldSchema{ - FieldID: it.req.GetFieldID(), - Name: it.req.GetFieldName(), - DataType: it.req.GetFieldType(), - } - } -} - -func (it *indexBuildTaskV2) Execute(ctx context.Context) error { - log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()), - zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()), - zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion())) - - indexType := it.newIndexParams[common.IndexTypeKey] - if indexType == indexparamcheck.IndexDISKANN { - // check index node support disk index - if !Params.IndexNodeCfg.EnableDisk.GetAsBool() { - log.Warn("IndexNode don't support build disk index", - zap.String("index type", it.newIndexParams[common.IndexTypeKey]), - zap.Bool("enable disk", Params.IndexNodeCfg.EnableDisk.GetAsBool())) - return merr.WrapErrIndexNotSupported("disk index") - } - - // check load size and size of field data - localUsedSize, err := indexcgowrapper.GetLocalUsedSize(paramtable.Get().LocalStorageCfg.Path.GetValue()) - if err != nil { - log.Warn("IndexNode get local used size failed") - return err - } - fieldDataSize, err := estimateFieldDataSize(it.req.GetDim(), it.req.GetNumRows(), it.req.GetField().GetDataType()) - if err != nil { - log.Warn("IndexNode get local used size failed") - return err - } - usedLocalSizeWhenBuild := int64(float64(fieldDataSize)*diskUsageRatio) + localUsedSize - maxUsedLocalSize := int64(Params.IndexNodeCfg.DiskCapacityLimit.GetAsFloat() * Params.IndexNodeCfg.MaxDiskUsagePercentage.GetAsFloat()) - - if usedLocalSizeWhenBuild > maxUsedLocalSize { - log.Warn("IndexNode don't has enough disk size to build disk ann index", - zap.Int64("usedLocalSizeWhenBuild", usedLocalSizeWhenBuild), - zap.Int64("maxUsedLocalSize", maxUsedLocalSize)) - return merr.WrapErrServiceDiskLimitExceeded(float32(usedLocalSizeWhenBuild), float32(maxUsedLocalSize)) - } - - err = indexparams.SetDiskIndexBuildParams(it.newIndexParams, int64(fieldDataSize)) - if err != nil { - log.Warn("failed to fill disk index params", zap.Error(err)) - return err - } - } - - storageConfig := &indexcgopb.StorageConfig{ - Address: it.req.GetStorageConfig().GetAddress(), - AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(), - SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(), - UseSSL: it.req.GetStorageConfig().GetUseSSL(), - BucketName: it.req.GetStorageConfig().GetBucketName(), - RootPath: it.req.GetStorageConfig().GetRootPath(), - UseIAM: it.req.GetStorageConfig().GetUseIAM(), - IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(), - StorageType: it.req.GetStorageConfig().GetStorageType(), - UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(), - Region: it.req.GetStorageConfig().GetRegion(), - CloudProvider: it.req.GetStorageConfig().GetCloudProvider(), - RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(), - SslCACert: it.req.GetStorageConfig().GetSslCACert(), - } - - optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields())) - for _, optField := range it.req.GetOptionalScalarFields() { - optFields = append(optFields, &indexcgopb.OptionalFieldInfo{ - FieldID: optField.GetFieldID(), - FieldName: optField.GetFieldName(), - FieldType: optField.GetFieldType(), - DataPaths: optField.GetDataPaths(), - }) - } - - buildIndexParams := &indexcgopb.BuildIndexInfo{ - ClusterID: it.req.GetClusterID(), - BuildID: it.req.GetBuildID(), - CollectionID: it.req.GetCollectionID(), - PartitionID: it.req.GetPartitionID(), - SegmentID: it.req.GetSegmentID(), - IndexVersion: it.req.GetIndexVersion(), - CurrentIndexVersion: it.req.GetCurrentIndexVersion(), - NumRows: it.req.GetNumRows(), - Dim: it.req.GetDim(), - IndexFilePrefix: it.req.GetIndexFilePrefix(), - InsertFiles: it.req.GetDataPaths(), - FieldSchema: it.req.GetField(), - StorageConfig: storageConfig, - IndexParams: mapToKVPairs(it.newIndexParams), - TypeParams: mapToKVPairs(it.newTypeParams), - StorePath: it.req.GetStorePath(), - StoreVersion: it.req.GetStoreVersion(), - IndexStorePath: it.req.GetIndexStorePath(), - OptFields: optFields, - PartitionKeyIsolation: it.req.GetPartitionKeyIsolation(), - } - - var err error - it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexParams) - if err != nil { - if it.index != nil && it.index.CleanLocalData() != nil { - log.Warn("failed to clean cached data on disk after build index failed") - } - log.Warn("failed to build index", zap.Error(err)) - return err - } - - buildIndexLatency := it.tr.RecordSpan() - metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(buildIndexLatency.Milliseconds())) - - log.Info("Successfully build index") - return nil -} - -func (it *indexBuildTaskV2) PostExecute(ctx context.Context) error { - log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()), - zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()), - zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion())) - - gcIndex := func() { - if err := it.index.Delete(); err != nil { - log.Warn("IndexNode indexBuildTask Execute CIndexDelete failed", zap.Error(err)) - } - } - version, err := it.index.UpLoadV2() - if err != nil { - log.Warn("failed to upload index", zap.Error(err)) - gcIndex() - return err - } - - encodeIndexFileDur := it.tr.Record("index serialize and upload done") - metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(encodeIndexFileDur.Seconds()) - - // early release index for gc, and we can ensure that Delete is idempotent. - gcIndex() - - // use serialized size before encoding - var serializedSize uint64 - saveFileKeys := make([]string, 0) - - it.node.storeIndexFilesAndStatisticV2(it.req.GetClusterID(), it.req.GetBuildID(), saveFileKeys, serializedSize, it.req.GetCurrentIndexVersion(), version) - log.Debug("save index files done", zap.Strings("IndexFiles", saveFileKeys)) - saveIndexFileDur := it.tr.RecordSpan() - metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(saveIndexFileDur.Seconds()) - it.tr.Elapse("index building all done") - log.Info("Successfully save index files") - return nil -} - // IndexBuildTask is used to record the information of the index tasks. type indexBuildTask struct { ident string diff --git a/internal/indexnode/task_test.go b/internal/indexnode/task_test.go index 28de64275f..6deacd5f7e 100644 --- a/internal/indexnode/task_test.go +++ b/internal/indexnode/task_test.go @@ -20,21 +20,14 @@ import ( "context" "testing" - "github.com/apache/arrow/go/v12/arrow" - "github.com/apache/arrow/go/v12/arrow/array" - "github.com/apache/arrow/go/v12/arrow/memory" "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" - "github.com/milvus-io/milvus-storage/go/storage/schema" "github.com/milvus-io/milvus/internal/proto/etcdpb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/dependency" - "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/metric" @@ -139,105 +132,6 @@ func TestIndexBuildTask(t *testing.T) { suite.Run(t, new(IndexBuildTaskSuite)) } -type IndexBuildTaskV2Suite struct { - suite.Suite - schema *schemapb.CollectionSchema - arrowSchema *arrow.Schema - space *milvus_storage.Space -} - -func (suite *IndexBuildTaskV2Suite) SetupSuite() { - paramtable.Init() -} - -func (suite *IndexBuildTaskV2Suite) SetupTest() { - suite.schema = &schemapb.CollectionSchema{ - Name: "test", - Description: "test", - AutoID: false, - Fields: []*schemapb.FieldSchema{ - {FieldID: 1, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true}, - {FieldID: 2, Name: "ts", DataType: schemapb.DataType_Int64}, - {FieldID: 3, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}}, - }, - } - - var err error - suite.arrowSchema, err = typeutil.ConvertToArrowSchema(suite.schema.Fields) - suite.NoError(err) - - tmpDir := suite.T().TempDir() - opt := options.NewSpaceOptionBuilder(). - SetSchema(schema.NewSchema( - suite.arrowSchema, - &schema.SchemaOptions{ - PrimaryColumn: "pk", - VectorColumn: "vec", - VersionColumn: "ts", - })). - Build() - suite.space, err = milvus_storage.Open("file://"+tmpDir, opt) - suite.NoError(err) - - b := array.NewRecordBuilder(memory.DefaultAllocator, suite.arrowSchema) - defer b.Release() - b.Field(0).(*array.Int64Builder).AppendValues([]int64{1}, nil) - b.Field(1).(*array.Int64Builder).AppendValues([]int64{1}, nil) - fb := b.Field(2).(*array.FixedSizeBinaryBuilder) - fb.Reserve(1) - fb.Append([]byte{1, 2, 3, 4}) - - rec := b.NewRecord() - defer rec.Release() - reader, err := array.NewRecordReader(suite.arrowSchema, []arrow.Record{rec}) - suite.NoError(err) - err = suite.space.Write(reader, &options.DefaultWriteOptions) - suite.NoError(err) -} - -func (suite *IndexBuildTaskV2Suite) TestBuildIndex() { - req := &indexpb.CreateJobRequest{ - BuildID: 1, - IndexVersion: 1, - IndexID: 0, - IndexName: "", - IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}, {Key: common.DimKey, Value: "1"}}, - TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}, - NumRows: 10, - StorageConfig: &indexpb.StorageConfig{ - RootPath: "/tmp/milvus/data", - StorageType: "local", - }, - CollectionID: 1, - PartitionID: 1, - SegmentID: 1, - FieldID: 3, - FieldName: "vec", - FieldType: schemapb.DataType_FloatVector, - StorePath: "file://" + suite.space.Path(), - StoreVersion: suite.space.GetCurrentVersion(), - IndexStorePath: "file://" + suite.space.Path(), - Dim: 4, - OptionalScalarFields: []*indexpb.OptionalFieldInfo{ - {FieldID: 1, FieldName: "pk", FieldType: 5, DataIds: []int64{0}}, - }, - } - - task := newIndexBuildTaskV2(context.Background(), nil, req, NewIndexNode(context.Background(), dependency.NewDefaultFactory(true))) - - var err error - err = task.PreExecute(context.Background()) - suite.NoError(err) - err = task.Execute(context.Background()) - suite.NoError(err) - err = task.PostExecute(context.Background()) - suite.NoError(err) -} - -func TestIndexBuildTaskV2Suite(t *testing.T) { - suite.Run(t, new(IndexBuildTaskV2Suite)) -} - type AnalyzeTaskSuite struct { suite.Suite schema *schemapb.CollectionSchema diff --git a/internal/querynodev2/segments/load_index_info.go b/internal/querynodev2/segments/load_index_info.go index f733d8c181..5b9a072d02 100644 --- a/internal/querynodev2/segments/load_index_info.go +++ b/internal/querynodev2/segments/load_index_info.go @@ -222,13 +222,9 @@ func (li *LoadIndexInfo) appendIndexData(ctx context.Context, indexKeys []string var status C.CStatus GetLoadPool().Submit(func() (any, error) { - if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { - status = C.AppendIndexV3(li.cLoadIndexInfo) - } else { - traceCtx := ParseCTraceContext(ctx) - status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo) - runtime.KeepAlive(traceCtx) - } + traceCtx := ParseCTraceContext(ctx) + status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo) + runtime.KeepAlive(traceCtx) return nil, nil }).Await() @@ -265,13 +261,9 @@ func (li *LoadIndexInfo) finish(ctx context.Context, info *cgopb.LoadIndexInfo) } _, _ = GetLoadPool().Submit(func() (any, error) { - if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { - status = C.AppendIndexV3(li.cLoadIndexInfo) - } else { - traceCtx := ParseCTraceContext(ctx) - status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo) - runtime.KeepAlive(traceCtx) - } + traceCtx := ParseCTraceContext(ctx) + status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo) + runtime.KeepAlive(traceCtx) return nil, nil }).Await() diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index dd3ca8eb27..3941a3ed01 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -29,12 +29,10 @@ import "C" import ( "context" "fmt" - "io" "runtime" "strings" "unsafe" - "github.com/apache/arrow/go/v12/arrow/array" "github.com/cockroachdb/errors" "go.opentelemetry.io/otel" "go.uber.org/atomic" @@ -44,8 +42,6 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" "github.com/milvus-io/milvus/internal/proto/cgopb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" @@ -55,7 +51,6 @@ import ( "github.com/milvus-io/milvus/internal/querynodev2/segments/state" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/cgo" - typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" @@ -259,7 +254,6 @@ type LocalSegment struct { lastDeltaTimestamp *atomic.Uint64 fields *typeutil.ConcurrentMap[int64, *FieldInfo] fieldIndexes *typeutil.ConcurrentMap[int64, *IndexedFieldInfo] - space *milvus_storage.Space } func NewSegment(ctx context.Context, @@ -336,76 +330,6 @@ func NewSegment(ctx context.Context, return segment, nil } -func NewSegmentV2( - ctx context.Context, - collection *Collection, - segmentType SegmentType, - version int64, - loadInfo *querypb.SegmentLoadInfo, -) (Segment, error) { - /* - CSegmentInterface - NewSegment(CCollection collection, uint64_t segment_id, SegmentType seg_type); - */ - if loadInfo.GetLevel() == datapb.SegmentLevel_L0 { - return NewL0Segment(collection, segmentType, version, loadInfo) - } - base, err := newBaseSegment(collection, segmentType, version, loadInfo) - if err != nil { - return nil, err - } - var segmentPtr C.CSegmentInterface - var status C.CStatus - var locker *state.LoadStateLock - switch segmentType { - case SegmentTypeSealed: - status = C.NewSegment(collection.collectionPtr, C.Sealed, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr) - locker = state.NewLoadStateLock(state.LoadStateOnlyMeta) - case SegmentTypeGrowing: - status = C.NewSegment(collection.collectionPtr, C.Growing, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr) - locker = state.NewLoadStateLock(state.LoadStateDataLoaded) - default: - return nil, fmt.Errorf("illegal segment type %d when create segment %d", segmentType, loadInfo.GetSegmentID()) - } - - if err := HandleCStatus(ctx, &status, "NewSegmentFailed"); err != nil { - return nil, err - } - - log.Info("create segment", - zap.Int64("collectionID", loadInfo.GetCollectionID()), - zap.Int64("partitionID", loadInfo.GetPartitionID()), - zap.Int64("segmentID", loadInfo.GetSegmentID()), - zap.String("segmentType", segmentType.String())) - - url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), loadInfo.GetSegmentID()) - if err != nil { - return nil, err - } - space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(loadInfo.GetStorageVersion()).Build()) - if err != nil { - return nil, err - } - - segment := &LocalSegment{ - baseSegment: base, - ptrLock: locker, - ptr: segmentPtr, - lastDeltaTimestamp: atomic.NewUint64(0), - fields: typeutil.NewConcurrentMap[int64, *FieldInfo](), - fieldIndexes: typeutil.NewConcurrentMap[int64, *IndexedFieldInfo](), - space: space, - memSize: atomic.NewInt64(-1), - rowNum: atomic.NewInt64(-1), - insertCount: atomic.NewInt64(0), - } - - if err := segment.initializeSegment(); err != nil { - return nil, err - } - return segment, nil -} - func (s *LocalSegment) initializeSegment() error { loadInfo := s.loadInfo.Load() indexedFieldInfos, fieldBinlogs := separateIndexAndBinlog(loadInfo) @@ -932,18 +856,7 @@ func (s *LocalSegment) LoadMultiFieldData(ctx context.Context) error { var status C.CStatus GetLoadPool().Submit(func() (any, error) { - if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { - uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID()) - if err != nil { - return nil, err - } - - loadFieldDataInfo.appendURI(uri) - loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion()) - status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo) - } else { - status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo) - } + status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo) return nil, nil }).Await() if err := HandleCStatus(ctx, &status, "LoadMultiFieldData failed", @@ -1019,18 +932,7 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun var status C.CStatus GetLoadPool().Submit(func() (any, error) { log.Info("submitted loadFieldData task to load pool") - if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { - uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID()) - if err != nil { - return nil, err - } - - loadFieldDataInfo.appendURI(uri) - loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion()) - status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo) - } else { - status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo) - } + status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo) return nil, nil }).Await() if err := HandleCStatus(ctx, &status, "LoadFieldData failed", @@ -1046,95 +948,6 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun return nil } -func (s *LocalSegment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error { - deleteReader, err := s.space.ScanDelete() - if err != nil { - return err - } - if !deleteReader.Schema().HasField(common.TimeStampFieldName) { - return fmt.Errorf("can not read timestamp field in space") - } - pkFieldSchema, err := typeutil.GetPrimaryFieldSchema(schema) - if err != nil { - return err - } - ids := &schemapb.IDs{} - var pkint64s []int64 - var pkstrings []string - var tss []int64 - for deleteReader.Next() { - rec := deleteReader.Record() - indices := rec.Schema().FieldIndices(common.TimeStampFieldName) - tss = append(tss, rec.Column(indices[0]).(*array.Int64).Int64Values()...) - indices = rec.Schema().FieldIndices(pkFieldSchema.Name) - switch pkFieldSchema.DataType { - case schemapb.DataType_Int64: - pkint64s = append(pkint64s, rec.Column(indices[0]).(*array.Int64).Int64Values()...) - case schemapb.DataType_VarChar: - columnData := rec.Column(indices[0]).(*array.String) - for i := 0; i < columnData.Len(); i++ { - pkstrings = append(pkstrings, columnData.Value(i)) - } - default: - return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType) - } - } - if err := deleteReader.Err(); err != nil && err != io.EOF { - return err - } - - switch pkFieldSchema.DataType { - case schemapb.DataType_Int64: - ids.IdField = &schemapb.IDs_IntId{ - IntId: &schemapb.LongArray{ - Data: pkint64s, - }, - } - case schemapb.DataType_VarChar: - ids.IdField = &schemapb.IDs_StrId{ - StrId: &schemapb.StringArray{ - Data: pkstrings, - }, - } - default: - return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType) - } - - idsBlob, err := proto.Marshal(ids) - if err != nil { - return err - } - - if len(tss) == 0 { - return nil - } - - loadInfo := C.CLoadDeletedRecordInfo{ - timestamps: unsafe.Pointer(&tss[0]), - primary_keys: (*C.uint8_t)(unsafe.Pointer(&idsBlob[0])), - primary_keys_size: C.uint64_t(len(idsBlob)), - row_count: C.int64_t(len(tss)), - } - /* - CStatus - LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_record_info) - */ - var status C.CStatus - GetDynamicPool().Submit(func() (any, error) { - status = C.LoadDeletedRecord(s.ptr, loadInfo) - return nil, nil - }).Await() - - if err := HandleCStatus(ctx, &status, "LoadDeletedRecord failed"); err != nil { - return err - } - - log.Info("load deleted record done", - zap.Int("rowNum", len(tss)), - zap.String("segmentType", s.Type().String())) - return nil -} - func (s *LocalSegment) AddFieldDataInfo(ctx context.Context, rowCount int64, fields []*datapb.FieldBinlog) error { if !s.ptrLock.RLockIf(state.IsNotReleased) { return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released") @@ -1331,13 +1144,6 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn IndexStoreVersion: indexInfo.GetIndexStoreVersion(), } - if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { - uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID()) - if err != nil { - return err - } - indexInfoProto.Uri = uri - } newLoadIndexInfoSpan := tr.RecordSpan() // 2. diff --git a/internal/querynodev2/segments/segment_interface.go b/internal/querynodev2/segments/segment_interface.go index 9489f87b32..164395b206 100644 --- a/internal/querynodev2/segments/segment_interface.go +++ b/internal/querynodev2/segments/segment_interface.go @@ -20,7 +20,6 @@ import ( "context" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proto/segcorepb" @@ -79,7 +78,6 @@ type Segment interface { Insert(ctx context.Context, rowIDs []int64, timestamps []typeutil.Timestamp, record *segcorepb.InsertRecord) error Delete(ctx context.Context, primaryKeys []storage.PrimaryKey, timestamps []typeutil.Timestamp) error LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error - LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error // storageV2 LastDeltaTimestamp() uint64 Release(ctx context.Context, opts ...releaseOption) diff --git a/internal/querynodev2/segments/segment_l0.go b/internal/querynodev2/segments/segment_l0.go index 8a41f5316a..5119e64c66 100644 --- a/internal/querynodev2/segments/segment_l0.go +++ b/internal/querynodev2/segments/segment_l0.go @@ -23,7 +23,6 @@ import ( "github.com/samber/lo" "go.uber.org/zap" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proto/segcorepb" @@ -161,10 +160,6 @@ func (s *L0Segment) LoadDeltaData(ctx context.Context, deltaData *storage.Delete return nil } -func (s *L0Segment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error { - return merr.WrapErrServiceInternal("not implemented") -} - func (s *L0Segment) DeleteRecords() ([]storage.PrimaryKey, []uint64) { s.dataGuard.RLock() defer s.dataGuard.RUnlock() diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index 97db9173d4..0b455ec1cf 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -27,7 +27,6 @@ import "C" import ( "context" "fmt" - "io" "path" "runtime/debug" "strconv" @@ -43,14 +42,11 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/querynodev2/pkoracle" "github.com/milvus-io/milvus/internal/storage" - typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" @@ -126,406 +122,6 @@ type resourceEstimateFactor struct { deltaDataExpansionFactor float64 } -type segmentLoaderV2 struct { - *segmentLoader -} - -func NewLoaderV2( - manager *Manager, - cm storage.ChunkManager, -) *segmentLoaderV2 { - return &segmentLoaderV2{ - segmentLoader: NewLoader(manager, cm), - } -} - -func (loader *segmentLoaderV2) LoadDelta(ctx context.Context, collectionID int64, segment Segment) error { - collection := loader.manager.Collection.Get(collectionID) - if collection == nil { - err := merr.WrapErrCollectionNotFound(collectionID) - log.Warn("failed to get collection while loading delta", zap.Error(err)) - return err - } - return segment.LoadDeltaData2(ctx, collection.Schema()) -} - -func (loader *segmentLoaderV2) Load(ctx context.Context, - collectionID int64, - segmentType SegmentType, - version int64, - segments ...*querypb.SegmentLoadInfo, -) ([]Segment, error) { - log := log.Ctx(ctx).With( - zap.Int64("collectionID", collectionID), - zap.String("segmentType", segmentType.String()), - ) - - if len(segments) == 0 { - log.Info("no segment to load") - return nil, nil - } - // Filter out loaded & loading segments - infos := loader.prepare(ctx, segmentType, segments...) - defer loader.unregister(infos...) - - log = log.With( - zap.Int64s("requestSegments", lo.Map(segments, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })), - zap.Int64s("preparedSegments", lo.Map(infos, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })), - ) - - // continue to wait other task done - log.Info("start loading...", zap.Int("segmentNum", len(segments)), zap.Int("afterFilter", len(infos))) - - // Check memory & storage limit - requestResourceResult, err := loader.requestResource(ctx, infos...) - if err != nil { - log.Warn("request resource failed", zap.Error(err)) - return nil, err - } - defer loader.freeRequest(requestResourceResult.Resource) - - newSegments := typeutil.NewConcurrentMap[int64, Segment]() - loaded := typeutil.NewConcurrentMap[int64, Segment]() - defer func() { - newSegments.Range(func(_ int64, s Segment) bool { - s.Release(context.Background()) - return true - }) - debug.FreeOSMemory() - }() - - for _, info := range infos { - loadInfo := info - - collection := loader.manager.Collection.Get(loadInfo.GetCollectionID()) - if collection == nil { - err := merr.WrapErrCollectionNotFound(loadInfo.GetCollectionID()) - log.Warn("failed to get collection", zap.Error(err)) - return nil, err - } - - segment, err := NewSegmentV2(ctx, collection, segmentType, version, loadInfo) - if err != nil { - log.Warn("load segment failed when create new segment", - zap.Int64("partitionID", loadInfo.GetPartitionID()), - zap.Int64("segmentID", loadInfo.GetSegmentID()), - zap.Error(err), - ) - return nil, err - } - - newSegments.Insert(loadInfo.GetSegmentID(), segment) - } - - loadSegmentFunc := func(idx int) error { - loadInfo := infos[idx] - partitionID := loadInfo.PartitionID - segmentID := loadInfo.SegmentID - segment, _ := newSegments.Get(segmentID) - - metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Inc() - defer metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Dec() - tr := timerecord.NewTimeRecorder("loadDurationPerSegment") - - var err error - if loadInfo.GetLevel() == datapb.SegmentLevel_L0 { - err = loader.LoadDelta(ctx, collectionID, segment) - } else { - err = loader.LoadSegment(ctx, segment.(*LocalSegment), loadInfo) - } - if err != nil { - log.Warn("load segment failed when load data into memory", - zap.Int64("partitionID", partitionID), - zap.Int64("segmentID", segmentID), - zap.Error(err), - ) - return err - } - loader.manager.Segment.Put(ctx, segmentType, segment) - newSegments.GetAndRemove(segmentID) - loaded.Insert(segmentID, segment) - log.Info("load segment done", zap.Int64("segmentID", segmentID)) - loader.notifyLoadFinish(loadInfo) - - metrics.QueryNodeLoadSegmentLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds())) - return nil - } - - // Start to load, - // Make sure we can always benefit from concurrency, and not spawn too many idle goroutines - log.Info("start to load segments in parallel", - zap.Int("segmentNum", len(infos)), - zap.Int("concurrencyLevel", requestResourceResult.ConcurrencyLevel)) - err = funcutil.ProcessFuncParallel(len(infos), - requestResourceResult.ConcurrencyLevel, loadSegmentFunc, "loadSegmentFunc") - if err != nil { - log.Warn("failed to load some segments", zap.Error(err)) - return nil, err - } - - // Wait for all segments loaded - segmentIDs := lo.Map(segments, func(info *querypb.SegmentLoadInfo, _ int) int64 { return info.GetSegmentID() }) - if err := loader.waitSegmentLoadDone(ctx, segmentType, segmentIDs, version); err != nil { - log.Warn("failed to wait the filtered out segments load done", zap.Error(err)) - return nil, err - } - - log.Info("all segment load done") - var result []Segment - loaded.Range(func(_ int64, s Segment) bool { - result = append(result, s) - return true - }) - return result, nil -} - -func (loader *segmentLoaderV2) LoadBloomFilterSet(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) ([]*pkoracle.BloomFilterSet, error) { - log := log.Ctx(ctx).With( - zap.Int64("collectionID", collectionID), - zap.Int64s("segmentIDs", lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) int64 { - return info.GetSegmentID() - })), - ) - - segmentNum := len(infos) - if segmentNum == 0 { - log.Info("no segment to load") - return nil, nil - } - - collection := loader.manager.Collection.Get(collectionID) - if collection == nil { - err := merr.WrapErrCollectionNotFound(collectionID) - log.Warn("failed to get collection while loading segment", zap.Error(err)) - return nil, err - } - - log.Info("start loading remote...", zap.Int("segmentNum", segmentNum)) - - loadedBfs := typeutil.NewConcurrentSet[*pkoracle.BloomFilterSet]() - // TODO check memory for bf size - loadRemoteFunc := func(idx int) error { - loadInfo := infos[idx] - partitionID := loadInfo.PartitionID - segmentID := loadInfo.SegmentID - bfs := pkoracle.NewBloomFilterSet(segmentID, partitionID, commonpb.SegmentState_Sealed) - - log.Info("loading bloom filter for remote...") - err := loader.loadBloomFilter(ctx, segmentID, bfs, loadInfo.StorageVersion) - if err != nil { - log.Warn("load remote segment bloom filter failed", - zap.Int64("partitionID", partitionID), - zap.Int64("segmentID", segmentID), - zap.Error(err), - ) - return err - } - loadedBfs.Insert(bfs) - - return nil - } - - err := funcutil.ProcessFuncParallel(segmentNum, segmentNum, loadRemoteFunc, "loadRemoteFunc") - if err != nil { - // no partial success here - log.Warn("failed to load remote segment", zap.Error(err)) - return nil, err - } - - return loadedBfs.Collect(), nil -} - -func (loader *segmentLoaderV2) loadBloomFilter(ctx context.Context, segmentID int64, bfs *pkoracle.BloomFilterSet, - storeVersion int64, -) error { - log := log.Ctx(ctx).With( - zap.Int64("segmentID", segmentID), - ) - - startTs := time.Now() - - url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), segmentID) - if err != nil { - return err - } - space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(storeVersion).Build()) - if err != nil { - return err - } - - statsBlobs := space.StatisticsBlobs() - blobs := []*storage.Blob{} - - for _, statsBlob := range statsBlobs { - blob := make([]byte, statsBlob.Size) - _, err := space.ReadBlob(statsBlob.Name, blob) - if err != nil && err != io.EOF { - return err - } - - blobs = append(blobs, &storage.Blob{Value: blob}) - } - - var stats []*storage.PrimaryKeyStats - - stats, err = storage.DeserializeStats(blobs) - if err != nil { - log.Warn("failed to deserialize stats", zap.Error(err)) - return err - } - - var size uint - for _, stat := range stats { - pkStat := &storage.PkStatistics{ - PkFilter: stat.BF, - MinPK: stat.MinPk, - MaxPK: stat.MaxPk, - } - size += stat.BF.Cap() - bfs.AddHistoricalStats(pkStat) - } - log.Info("Successfully load pk stats", zap.Duration("time", time.Since(startTs)), zap.Uint("size", size), zap.Int("BFNum", len(stats))) - return nil -} - -func (loader *segmentLoaderV2) LoadSegment(ctx context.Context, - seg Segment, - loadInfo *querypb.SegmentLoadInfo, -) (err error) { - segment := seg.(*LocalSegment) - // TODO: we should create a transaction-like api to load segment for segment interface, - // but not do many things in segment loader. - stateLockGuard, err := segment.StartLoadData() - // segment can not do load now. - if err != nil { - return err - } - defer func() { - // segment is already loaded. - // TODO: if stateLockGuard is nil, we should not call LoadSegment anymore. - // but current Load is not clear enough to do an actual state transition, keep previous logic to avoid introduced bug. - if stateLockGuard != nil { - stateLockGuard.Done(err) - } - }() - - log := log.Ctx(ctx).With( - zap.Int64("collectionID", segment.Collection()), - zap.Int64("partitionID", segment.Partition()), - zap.String("shard", segment.Shard().VirtualName()), - zap.Int64("segmentID", segment.ID()), - ) - log.Info("start loading segment files", - zap.Int64("rowNum", loadInfo.GetNumOfRows()), - zap.String("segmentType", segment.Type().String())) - - collection := loader.manager.Collection.Get(segment.Collection()) - if collection == nil { - err := merr.WrapErrCollectionNotFound(segment.Collection()) - log.Warn("failed to get collection while loading segment", zap.Error(err)) - return err - } - // pkField := GetPkField(collection.Schema()) - - // TODO(xige-16): Optimize the data loading process and reduce data copying - // for now, there will be multiple copies in the process of data loading into segCore - defer debug.FreeOSMemory() - - if segment.Type() == SegmentTypeSealed { - fieldsMap := typeutil.NewConcurrentMap[int64, *schemapb.FieldSchema]() - for _, field := range collection.Schema().GetFields() { - fieldsMap.Insert(field.FieldID, field) - } - // fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo) - indexedFieldInfos := make(map[int64]*IndexedFieldInfo) - for _, indexInfo := range loadInfo.IndexInfos { - if indexInfo.GetIndexStoreVersion() > 0 { - fieldID := indexInfo.FieldID - fieldInfo := &IndexedFieldInfo{ - IndexInfo: indexInfo, - } - indexedFieldInfos[fieldID] = fieldInfo - fieldsMap.Remove(fieldID) - // fieldID2IndexInfo[fieldID] = indexInfo - } - } - - if err := segment.AddFieldDataInfo(ctx, loadInfo.GetNumOfRows(), loadInfo.GetBinlogPaths()); err != nil { - return err - } - - log.Info("load fields...", - zap.Int("fieldNum", fieldsMap.Len()), - zap.Int64s("indexedFields", lo.Keys(indexedFieldInfos)), - ) - - schemaHelper, err := typeutil.CreateSchemaHelper(collection.Schema()) - if err != nil { - return err - } - tr := timerecord.NewTimeRecorder("segmentLoader.LoadIndex") - if err := loader.loadFieldsIndex(ctx, schemaHelper, segment, loadInfo.GetNumOfRows(), indexedFieldInfos); err != nil { - return err - } - metrics.QueryNodeLoadIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds())) - - if err := loader.loadSealedSegmentFields(ctx, segment, fieldsMap, loadInfo.GetNumOfRows()); err != nil { - return err - } - // https://github.com/milvus-io/milvus/23654 - // legacy entry num = 0 - if err := loader.patchEntryNumber(ctx, segment, loadInfo); err != nil { - return err - } - } else { - if err := segment.LoadMultiFieldData(ctx); err != nil { - return err - } - } - - // load statslog if it's growing segment - if segment.segmentType == SegmentTypeGrowing { - log.Info("loading statslog...") - // pkStatsBinlogs, logType := loader.filterPKStatsBinlogs(loadInfo.Statslogs, pkField.GetFieldID()) - err := loader.loadBloomFilter(ctx, segment.ID(), segment.bloomFilterSet, loadInfo.StorageVersion) - if err != nil { - return err - } - } - - log.Info("loading delta...") - return loader.LoadDelta(ctx, segment.Collection(), segment) -} - -func (loader *segmentLoaderV2) LoadLazySegment(ctx context.Context, - segment Segment, - loadInfo *querypb.SegmentLoadInfo, -) (err error) { - return merr.ErrOperationNotSupported -} - -func (loader *segmentLoaderV2) loadSealedSegmentFields(ctx context.Context, segment *LocalSegment, fields *typeutil.ConcurrentMap[int64, *schemapb.FieldSchema], rowCount int64) error { - runningGroup, _ := errgroup.WithContext(ctx) - fields.Range(func(fieldID int64, field *schemapb.FieldSchema) bool { - runningGroup.Go(func() error { - return segment.LoadFieldData(ctx, fieldID, rowCount, nil, false) - }) - return true - }) - - err := runningGroup.Wait() - if err != nil { - return err - } - - log.Ctx(ctx).Info("load field binlogs done for sealed segment", - zap.Int64("collection", segment.Collection()), - zap.Int64("segment", segment.ID()), - zap.String("segmentType", segment.Type().String())) - - return nil -} - func NewLoader( manager *Manager, cm storage.ChunkManager, diff --git a/internal/querynodev2/segments/segment_loader_test.go b/internal/querynodev2/segments/segment_loader_test.go index 03c53cce32..ebb282d50e 100644 --- a/internal/querynodev2/segments/segment_loader_test.go +++ b/internal/querynodev2/segments/segment_loader_test.go @@ -23,9 +23,6 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v12/arrow" - "github.com/apache/arrow/go/v12/arrow/array" - "github.com/apache/arrow/go/v12/arrow/memory" "github.com/cockroachdb/errors" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" @@ -33,14 +30,10 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - milvus_storage "github.com/milvus-io/milvus-storage/go/storage" - "github.com/milvus-io/milvus-storage/go/storage/options" - "github.com/milvus-io/milvus-storage/go/storage/schema" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/initcore" - "github.com/milvus-io/milvus/internal/util/typeutil" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/contextutil" "github.com/milvus-io/milvus/pkg/util/funcutil" @@ -911,152 +904,3 @@ func TestSegmentLoader(t *testing.T) { suite.Run(t, &SegmentLoaderSuite{}) suite.Run(t, &SegmentLoaderDetailSuite{}) } - -type SegmentLoaderV2Suite struct { - suite.Suite - loader *segmentLoaderV2 - - // Dependencies - manager *Manager - rootPath string - chunkManager storage.ChunkManager - - // Data - collectionID int64 - partitionID int64 - segmentID int64 - schema *schemapb.CollectionSchema - segmentNum int -} - -func (suite *SegmentLoaderV2Suite) SetupSuite() { - paramtable.Init() - suite.rootPath = suite.T().Name() - suite.collectionID = rand.Int63() - suite.partitionID = rand.Int63() - suite.segmentID = rand.Int63() - suite.segmentNum = 5 -} - -func (suite *SegmentLoaderV2Suite) SetupTest() { - paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true") - // Dependencies - suite.manager = NewManager() - ctx := context.Background() - // TODO:: cpp chunk manager not support local chunk manager - // suite.chunkManager = storage.NewLocalChunkManager(storage.RootPath( - // fmt.Sprintf("/tmp/milvus-ut/%d", rand.Int63()))) - chunkManagerFactory := storage.NewTestChunkManagerFactory(paramtable.Get(), suite.rootPath) - suite.chunkManager, _ = chunkManagerFactory.NewPersistentStorageChunkManager(ctx) - suite.loader = NewLoaderV2(suite.manager, suite.chunkManager) - initcore.InitRemoteChunkManager(paramtable.Get()) - - // Data - suite.schema = GenTestCollectionSchema("test", schemapb.DataType_Int64, false) - indexMeta := GenTestIndexMeta(suite.collectionID, suite.schema) - loadMeta := &querypb.LoadMetaInfo{ - LoadType: querypb.LoadType_LoadCollection, - CollectionID: suite.collectionID, - PartitionIDs: []int64{suite.partitionID}, - } - suite.manager.Collection.PutOrRef(suite.collectionID, suite.schema, indexMeta, loadMeta) -} - -func (suite *SegmentLoaderV2Suite) TearDownTest() { - ctx := context.Background() - for i := 0; i < suite.segmentNum; i++ { - suite.manager.Segment.Remove(context.Background(), suite.segmentID+int64(i), querypb.DataScope_All) - } - suite.chunkManager.RemoveWithPrefix(ctx, suite.rootPath) - paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false") -} - -func (suite *SegmentLoaderV2Suite) TestLoad() { - tmpDir := suite.T().TempDir() - paramtable.Get().CommonCfg.StorageScheme.SwapTempValue("file") - paramtable.Get().CommonCfg.StoragePathPrefix.SwapTempValue(tmpDir) - ctx := context.Background() - - msgLength := 4 - - arrowSchema, err := typeutil.ConvertToArrowSchema(suite.schema.Fields) - suite.NoError(err) - opt := options.NewSpaceOptionBuilder(). - SetSchema(schema.NewSchema( - arrowSchema, - &schema.SchemaOptions{ - PrimaryColumn: "int64Field", - VectorColumn: "floatVectorField", - VersionColumn: "Timestamp", - })). - Build() - uri, err := typeutil.GetStorageURI("file", tmpDir, suite.segmentID) - suite.NoError(err) - space, err := milvus_storage.Open(uri, opt) - suite.NoError(err) - - b := array.NewRecordBuilder(memory.DefaultAllocator, arrowSchema) - defer b.Release() - insertData, err := genInsertData(msgLength, suite.schema) - suite.NoError(err) - - err = typeutil.BuildRecord(b, insertData, suite.schema.Fields) - suite.NoError(err) - rec := b.NewRecord() - defer rec.Release() - reader, err := array.NewRecordReader(arrowSchema, []arrow.Record{rec}) - suite.NoError(err) - err = space.Write(reader, &options.DefaultWriteOptions) - suite.NoError(err) - - collMeta := genCollectionMeta(suite.collectionID, suite.partitionID, suite.schema) - inCodec := storage.NewInsertCodecWithSchema(collMeta) - statsLog, err := inCodec.SerializePkStatsByData(insertData) - suite.NoError(err) - - err = space.WriteBlob(statsLog.Value, statsLog.Key, false) - suite.NoError(err) - - dschema := space.Manifest().GetSchema().DeleteSchema() - dbuilder := array.NewRecordBuilder(memory.DefaultAllocator, dschema) - defer dbuilder.Release() - dbuilder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2}, nil) - dbuilder.Field(1).(*array.Int64Builder).AppendValues([]int64{100, 200}, nil) - - drec := dbuilder.NewRecord() - defer drec.Release() - - dreader, err := array.NewRecordReader(dschema, []arrow.Record{drec}) - suite.NoError(err) - - err = space.Delete(dreader) - suite.NoError(err) - - segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, &querypb.SegmentLoadInfo{ - SegmentID: suite.segmentID, - PartitionID: suite.partitionID, - CollectionID: suite.collectionID, - NumOfRows: int64(msgLength), - StorageVersion: 3, - InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID), - }) - suite.NoError(err) - - _, err = suite.loader.LoadBloomFilterSet(ctx, suite.collectionID, 0, &querypb.SegmentLoadInfo{ - SegmentID: suite.segmentID, - PartitionID: suite.partitionID, - CollectionID: suite.collectionID, - NumOfRows: int64(msgLength), - StorageVersion: 3, - InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID), - }) - suite.NoError(err) - - segment := segments[0] - suite.EqualValues(4, segment.InsertCount()) - suite.Equal(int64(msgLength-2), segment.RowNum()) -} - -func TestSegmentLoaderV2(t *testing.T) { - suite.Run(t, &SegmentLoaderV2Suite{}) -} diff --git a/internal/querynodev2/server.go b/internal/querynodev2/server.go index e048c94b35..508eb6e484 100644 --- a/internal/querynodev2/server.go +++ b/internal/querynodev2/server.go @@ -348,11 +348,7 @@ func (node *QueryNode) Init() error { node.subscribingChannels = typeutil.NewConcurrentSet[string]() node.unsubscribingChannels = typeutil.NewConcurrentSet[string]() node.manager = segments.NewManager() - if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { - node.loader = segments.NewLoaderV2(node.manager, node.chunkManager) - } else { - node.loader = segments.NewLoader(node.manager, node.chunkManager) - } + node.loader = segments.NewLoader(node.manager, node.chunkManager) node.manager.SetLoader(node.loader) node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, node.GetNodeID()) // init pipeline manager diff --git a/internal/util/bloomfilter/bloom_filter_test.go b/internal/util/bloomfilter/bloom_filter_test.go index df65ecffbd..44d78fa075 100644 --- a/internal/util/bloomfilter/bloom_filter_test.go +++ b/internal/util/bloomfilter/bloom_filter_test.go @@ -25,7 +25,7 @@ import ( "github.com/stretchr/testify/assert" "go.uber.org/zap" - "github.com/milvus-io/milvus-storage/go/common/log" + "github.com/milvus-io/milvus/pkg/log" ) func TestPerformance(t *testing.T) { diff --git a/internal/util/importutilv2/binlog/l0_reader.go b/internal/util/importutilv2/binlog/l0_reader.go index cdf75b0643..ca45c27388 100644 --- a/internal/util/importutilv2/binlog/l0_reader.go +++ b/internal/util/importutilv2/binlog/l0_reader.go @@ -24,9 +24,9 @@ import ( "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus-storage/go/common/log" "github.com/milvus-io/milvus/internal/proto/internalpb" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" ) diff --git a/internal/util/indexcgowrapper/index.go b/internal/util/indexcgowrapper/index.go index 523a2ac1ec..f2458e1c86 100644 --- a/internal/util/indexcgowrapper/index.go +++ b/internal/util/indexcgowrapper/index.go @@ -41,7 +41,6 @@ type CodecIndex interface { Delete() error CleanLocalData() error UpLoad() (map[string]int64, error) - UpLoadV2() (int64, error) } var _ CodecIndex = (*CgoIndex)(nil) @@ -127,35 +126,6 @@ func CreateIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) return index, nil } -func CreateIndexV2(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) { - buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo) - if err != nil { - log.Ctx(ctx).Warn("marshal buildIndexInfo failed", - zap.String("clusterID", buildIndexInfo.GetClusterID()), - zap.Int64("buildID", buildIndexInfo.GetBuildID()), - zap.Error(err)) - return nil, err - } - var indexPtr C.CIndex - status := C.CreateIndexV2(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob))) - if err := HandleCStatus(&status, "failed to create index"); err != nil { - return nil, err - } - - index := &CgoIndex{ - indexPtr: indexPtr, - close: false, - } - - runtime.SetFinalizer(index, func(index *CgoIndex) { - if index != nil && !index.close { - log.Error("there is leakage in index object, please check.") - } - }) - - return index, nil -} - // TODO: this seems to be used only for test. We should mark the method // name with ForTest, or maybe move to test file. func (index *CgoIndex) Build(dataset *Dataset) error { @@ -426,34 +396,3 @@ func (index *CgoIndex) UpLoad() (map[string]int64, error) { return res, nil } - -func (index *CgoIndex) UpLoadV2() (int64, error) { - var cBinarySet C.CBinarySet - - status := C.SerializeIndexAndUpLoadV2(index.indexPtr, &cBinarySet) - defer func() { - if cBinarySet != nil { - C.DeleteBinarySet(cBinarySet) - } - }() - if err := HandleCStatus(&status, "failed to serialize index and upload index"); err != nil { - return -1, err - } - - buffer, err := GetBinarySetValue(cBinarySet, "index_store_version") - if err != nil { - return -1, err - } - var version int64 - - version = int64(buffer[7]) - version = (version << 8) + int64(buffer[6]) - version = (version << 8) + int64(buffer[5]) - version = (version << 8) + int64(buffer[4]) - version = (version << 8) + int64(buffer[3]) - version = (version << 8) + int64(buffer[2]) - version = (version << 8) + int64(buffer[1]) - version = (version << 8) + int64(buffer[0]) - - return version, nil -}