enhance: remove unused code for StorageV2 (#35132)

issue: https://github.com/milvus-io/milvus/issues/34168

Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>
pull/35207/head
zhenshan.cao 2024-08-01 12:08:13 +08:00 committed by GitHub
parent 9412002d7d
commit aa247f192d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
99 changed files with 148 additions and 5882 deletions

View File

@ -335,6 +335,16 @@ test-querycoord:
@echo "Running go unittests..."
@(env bash $(PWD)/scripts/run_go_unittest.sh -t querycoord)
generate-mockery-flushcommon: getdeps
$(INSTALL_PATH)/mockery --name=MetaCache --dir=$(PWD)/internal/flushcommon/metacache --output=$(PWD)/internal/flushcommon/metacache --filename=mock_meta_cache.go --with-expecter --structname=MockMetaCache --outpkg=metacache --inpackage
$(INSTALL_PATH)/mockery --name=SyncManager --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_sync_manager.go --with-expecter --structname=MockSyncManager --outpkg=syncmgr --inpackage
$(INSTALL_PATH)/mockery --name=MetaWriter --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_meta_writer.go --with-expecter --structname=MockMetaWriter --outpkg=syncmgr --inpackage
$(INSTALL_PATH)/mockery --name=Serializer --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_serializer.go --with-expecter --structname=MockSerializer --outpkg=syncmgr --inpackage
$(INSTALL_PATH)/mockery --name=Task --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_task.go --with-expecter --structname=MockTask --outpkg=syncmgr --inpackage
$(INSTALL_PATH)/mockery --name=WriteBuffer --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_write_buffer.go --with-expecter --structname=MockWriteBuffer --outpkg=writebuffer --inpackage
$(INSTALL_PATH)/mockery --name=BufferManager --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_manager.go --with-expecter --structname=MockBufferManager --outpkg=writebuffer --inpackage
$(INSTALL_PATH)/mockery --name=FlowgraphManager --dir=$(PWD)/internal/flushcommon/pipeline --output=$(PWD)/internal/flushcommon/pipeline --filename=mock_fgmanager.go --with-expecter --structname=MockFlowgraphManager --outpkg=pipeline --inpackage
test-metastore:
@echo "Running go unittests..."
@(env bash $(PWD)/scripts/run_go_unittest.sh -t metastore)

2
go.mod
View File

@ -55,8 +55,6 @@ require (
google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f
)
require github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70
require (
github.com/bits-and-blooms/bitset v1.10.0
github.com/cenkalti/backoff/v4 v4.2.1

2
go.sum
View File

@ -608,8 +608,6 @@ github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZz
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632 h1:CXig0DNtUsCLzchCFe3PR2KgOdobbz9gK2nSV7195PM=
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70 h1:Z+sp64fmAOxAG7mU0dfVOXvAXlwRB0c8a96rIM5HevI=
github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70/go.mod h1:GPETMcTZq1gLY1WA6Na5kiNAKnq8SEMMiVKUZrM3sho=
github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A=
github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=

View File

@ -21,7 +21,6 @@
#include <vector>
#include "storage/MemFileManagerImpl.h"
#include "storage/space.h"
#include "pb/clustering.pb.h"
#include "knowhere/cluster/cluster_factory.h"

View File

@ -25,7 +25,6 @@
#include "storage/ChunkManager.h"
#include "storage/DataCodec.h"
#include "storage/Types.h"
#include "storage/space.h"
namespace milvus::clustering {

View File

@ -30,4 +30,4 @@ set(MILVUS_EXEC_SRCS
add_library(milvus_exec STATIC ${MILVUS_EXEC_SRCS})
target_link_libraries(milvus_exec milvus_common milvus-storage ${CONAN_LIBS})
target_link_libraries(milvus_exec milvus_common ${CONAN_LIBS})

View File

@ -25,7 +25,6 @@
#include "index/ScalarIndex.h"
#include "index/Utils.h"
#include "storage/Util.h"
#include "storage/space.h"
namespace milvus {
namespace index {
@ -42,20 +41,6 @@ BitmapIndex<T>::BitmapIndex(
}
}
template <typename T>
BitmapIndex<T>::BitmapIndex(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: is_built_(false),
schema_(file_manager_context.fieldDataMeta.field_schema),
space_(space) {
if (file_manager_context.Valid()) {
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, space);
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
}
}
template <typename T>
void
BitmapIndex<T>::Build(const Config& config) {
@ -101,32 +86,6 @@ BitmapIndex<T>::Build(size_t n, const T* data) {
is_built_ = true;
}
template <typename T>
void
BitmapIndex<T>::BuildV2(const Config& config) {
if (is_built_) {
return;
}
auto field_name = file_manager_->GetIndexMeta().field_name;
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {
PanicInfo(DataFormatBroken, "failed to read data");
}
auto data = rec.ValueUnsafe();
auto total_num_rows = data->num_rows();
auto col_data = data->GetColumnByName(field_name);
// todo: support nullable index
auto field_data = storage::CreateFieldData(
DataType(GetDType<T>()), false, 0, total_num_rows);
field_data->FillFieldData(col_data);
field_datas.push_back(field_data);
}
BuildWithFieldData(field_datas);
}
template <typename T>
void
BitmapIndex<T>::BuildPrimitiveField(
@ -302,21 +261,6 @@ BitmapIndex<T>::Upload(const Config& config) {
return ret;
}
template <typename T>
BinarySet
BitmapIndex<T>::UploadV2(const Config& config) {
auto binary_set = Serialize(config);
file_manager_->AddFileV2(binary_set);
auto remote_path_to_size = file_manager_->GetRemotePathsToFileSize();
BinarySet ret;
for (auto& file : remote_path_to_size) {
ret.Append(file.first, nullptr, file.second);
}
return ret;
}
template <typename T>
void
BitmapIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
@ -420,48 +364,6 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
is_built_ = true;
}
template <typename T>
void
BitmapIndex<T>::LoadV2(const Config& config) {
auto blobs = space_->StatisticsBlobs();
std::vector<std::string> index_files;
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
for (auto& b : blobs) {
if (b.name.rfind(prefix, 0) == 0) {
index_files.push_back(b.name);
}
}
std::map<std::string, FieldDataPtr> index_datas{};
for (auto& file_name : index_files) {
auto res = space_->GetBlobByteSize(file_name);
if (!res.ok()) {
PanicInfo(S3Error, "unable to read index blob");
}
auto index_blob_data =
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
auto status = space_->ReadBlob(file_name, index_blob_data.get());
if (!status.ok()) {
PanicInfo(S3Error, "unable to read index blob");
}
auto raw_index_blob =
storage::DeserializeFileData(index_blob_data, res.value());
auto key = file_name.substr(file_name.find_last_of('/') + 1);
index_datas[key] = raw_index_blob->GetFieldData();
}
AssembleIndexDatas(index_datas);
BinarySet binary_set;
for (auto& [key, data] : index_datas) {
auto size = data->Size();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto buf = std::shared_ptr<uint8_t[]>(
(uint8_t*)const_cast<void*>(data->Data()), deleter);
binary_set.Append(key, buf, size);
}
LoadWithoutAssemble(binary_set, config);
}
template <typename T>
void
BitmapIndex<T>::Load(milvus::tracer::TraceContext ctx, const Config& config) {

View File

@ -25,7 +25,6 @@
#include "storage/FileManager.h"
#include "storage/DiskFileManagerImpl.h"
#include "storage/MemFileManagerImpl.h"
#include "storage/space.h"
namespace milvus {
namespace index {
@ -46,10 +45,6 @@ class BitmapIndex : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
explicit BitmapIndex(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
~BitmapIndex() override = default;
BinarySet
@ -61,9 +56,6 @@ class BitmapIndex : public ScalarIndex<T> {
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
void
LoadV2(const Config& config = {}) override;
int64_t
Count() override {
return total_num_rows_;
@ -83,9 +75,6 @@ class BitmapIndex : public ScalarIndex<T> {
void
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
void
BuildV2(const Config& config = {}) override;
const TargetBitmap
In(size_t n, const T* values) override;
@ -112,9 +101,6 @@ class BitmapIndex : public ScalarIndex<T> {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {}) override;
const bool
HasRawData() const override {
if (schema_.data_type() == proto::schema::DataType::Array) {
@ -195,7 +181,6 @@ class BitmapIndex : public ScalarIndex<T> {
size_t total_num_rows_{0};
proto::schema::FieldSchema schema_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
std::shared_ptr<milvus_storage::Space> space_;
};
} // namespace index

View File

@ -26,6 +26,6 @@ set(INDEX_FILES
milvus_add_pkg_config("milvus_index")
add_library(milvus_index SHARED ${INDEX_FILES})
target_link_libraries(milvus_index milvus_storage milvus-storage tantivy_binding)
target_link_libraries(milvus_index milvus_storage tantivy_binding)
install(TARGETS milvus_index DESTINATION "${CMAKE_INSTALL_LIBDIR}")

View File

@ -23,7 +23,6 @@
#include "index/ScalarIndex.h"
#include "index/Utils.h"
#include "storage/Util.h"
#include "storage/space.h"
namespace milvus {
namespace index {
@ -43,23 +42,6 @@ HybridScalarIndex<T>::HybridScalarIndex(
internal_index_type_ = ScalarIndexType::NONE;
}
template <typename T>
HybridScalarIndex<T>::HybridScalarIndex(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: is_built_(false),
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
file_manager_context_(file_manager_context),
space_(space) {
if (file_manager_context.Valid()) {
mem_file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, space);
AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!");
}
field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type();
internal_index_type_ = ScalarIndexType::NONE;
}
template <typename T>
ScalarIndexType
HybridScalarIndex<T>::SelectIndexBuildType(size_t n, const T* values) {
@ -274,39 +256,6 @@ HybridScalarIndex<T>::Build(const Config& config) {
is_built_ = true;
}
template <typename T>
void
HybridScalarIndex<T>::BuildV2(const Config& config) {
if (is_built_) {
return;
}
bitmap_index_cardinality_limit_ =
GetBitmapCardinalityLimitFromConfig(config);
LOG_INFO("config bitmap cardinality limit to {}",
bitmap_index_cardinality_limit_);
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {
PanicInfo(DataFormatBroken, "failed to read data");
}
auto data = rec.ValueUnsafe();
auto total_num_rows = data->num_rows();
auto col_data = data->GetColumnByName(field_name);
// todo: support nullable index
auto field_data = storage::CreateFieldData(
DataType(GetDType<T>()), false, 0, total_num_rows);
field_data->FillFieldData(col_data);
field_datas.push_back(field_data);
}
SelectIndexBuildType(field_datas);
BuildInternal(field_datas);
is_built_ = true;
}
template <typename T>
BinarySet
HybridScalarIndex<T>::Serialize(const Config& config) {
@ -356,21 +305,6 @@ HybridScalarIndex<T>::Upload(const Config& config) {
return index_ret;
}
template <typename T>
BinarySet
HybridScalarIndex<T>::UploadV2(const Config& config) {
auto internal_index = GetInternalIndex();
auto index_ret = internal_index->Upload(config);
auto index_type_ret = SerializeIndexType();
for (auto& [key, value] : index_type_ret.binary_map_) {
index_ret.Append(key, value);
}
return index_ret;
}
template <typename T>
void
HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
@ -380,12 +314,6 @@ HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
internal_index_type_ = static_cast<ScalarIndexType>(index_type);
}
template <typename T>
void
HybridScalarIndex<T>::LoadV2(const Config& config) {
PanicInfo(Unsupported, "HybridScalarIndex LoadV2 not implemented");
}
template <typename T>
std::string
HybridScalarIndex<T>::GetRemoteIndexTypeFile(

View File

@ -28,7 +28,6 @@
#include "storage/FileManager.h"
#include "storage/DiskFileManagerImpl.h"
#include "storage/MemFileManagerImpl.h"
#include "storage/space.h"
namespace milvus {
namespace index {
@ -46,10 +45,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
explicit HybridScalarIndex(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
~HybridScalarIndex() override = default;
BinarySet
@ -61,9 +56,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
void
LoadV2(const Config& config = {}) override;
int64_t
Count() override {
return internal_index_->Count();
@ -85,9 +77,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
void
Build(const Config& config = {}) override;
void
BuildV2(const Config& config = {}) override;
const TargetBitmap
In(size_t n, const T* values) override {
return internal_index_->In(n, values);
@ -133,9 +122,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {}) override;
private:
ScalarIndexType
SelectBuildTypeForPrimitiveType(
@ -173,7 +159,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
std::shared_ptr<ScalarIndex<T>> internal_index_{nullptr};
storage::FileManagerContext file_manager_context_;
std::shared_ptr<storage::MemFileManagerImpl> mem_file_manager_{nullptr};
std::shared_ptr<milvus_storage::Space> space_{nullptr};
};
} // namespace index

View File

@ -44,9 +44,6 @@ class IndexBase {
virtual void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) = 0;
virtual void
LoadV2(const Config& config = {}) = 0;
virtual void
BuildWithRawData(size_t n,
const void* values,
@ -58,18 +55,12 @@ class IndexBase {
virtual void
Build(const Config& config = {}) = 0;
virtual void
BuildV2(const Config& Config = {}) = 0;
virtual int64_t
Count() = 0;
virtual BinarySet
Upload(const Config& config = {}) = 0;
virtual BinarySet
UploadV2(const Config& config = {}) = 0;
virtual const bool
HasRawData() const = 0;

View File

@ -78,51 +78,6 @@ IndexFactory::CreatePrimitiveScalarIndex<std::string>(
#endif
}
template <typename T>
ScalarIndexPtr<T>
IndexFactory::CreatePrimitiveScalarIndex(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
if (index_type == INVERTED_INDEX_TYPE) {
return std::make_unique<InvertedIndexTantivy<T>>(file_manager_context,
space);
}
if (index_type == BITMAP_INDEX_TYPE) {
return std::make_unique<BitmapIndex<T>>(file_manager_context, space);
}
if (index_type == HYBRID_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<T>>(file_manager_context,
space);
}
return CreateScalarIndexSort<T>(file_manager_context, space);
}
template <>
ScalarIndexPtr<std::string>
IndexFactory::CreatePrimitiveScalarIndex<std::string>(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
#if defined(__linux__) || defined(__APPLE__)
if (index_type == INVERTED_INDEX_TYPE) {
return std::make_unique<InvertedIndexTantivy<std::string>>(
file_manager_context, space);
}
if (index_type == BITMAP_INDEX_TYPE) {
return std::make_unique<BitmapIndex<std::string>>(file_manager_context,
space);
}
if (index_type == HYBRID_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<std::string>>(
file_manager_context, space);
}
return CreateStringIndexMarisa(file_manager_context, space);
#else
PanicInfo(Unsupported, "unsupported platform");
#endif
}
IndexBasePtr
IndexFactory::CreateIndex(
const CreateIndexInfo& create_index_info,
@ -134,19 +89,6 @@ IndexFactory::CreateIndex(
return CreateScalarIndex(create_index_info, file_manager_context);
}
IndexBasePtr
IndexFactory::CreateIndex(
const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
if (IsVectorDataType(create_index_info.field_type)) {
return CreateVectorIndex(
create_index_info, file_manager_context, space);
}
return CreateScalarIndex(create_index_info, file_manager_context, space);
}
IndexBasePtr
IndexFactory::CreatePrimitiveScalarIndex(
DataType data_type,
@ -307,90 +249,4 @@ IndexFactory::CreateVectorIndex(
}
}
}
IndexBasePtr
IndexFactory::CreateVectorIndex(
const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
auto data_type = create_index_info.field_type;
auto index_type = create_index_info.index_type;
auto metric_type = create_index_info.metric_type;
auto version = create_index_info.index_engine_version;
if (knowhere::UseDiskLoad(index_type, version)) {
switch (data_type) {
case DataType::VECTOR_FLOAT: {
return std::make_unique<VectorDiskAnnIndex<float>>(
index_type,
metric_type,
version,
space,
file_manager_context);
}
case DataType::VECTOR_FLOAT16: {
return std::make_unique<VectorDiskAnnIndex<float16>>(
index_type,
metric_type,
version,
space,
file_manager_context);
}
case DataType::VECTOR_BFLOAT16: {
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
index_type,
metric_type,
version,
space,
file_manager_context);
}
case DataType::VECTOR_BINARY: {
return std::make_unique<VectorDiskAnnIndex<bin1>>(
index_type,
metric_type,
version,
space,
file_manager_context);
}
case DataType::VECTOR_SPARSE_FLOAT: {
return std::make_unique<VectorDiskAnnIndex<float>>(
index_type,
metric_type,
version,
space,
file_manager_context);
}
default:
PanicInfo(
DataTypeInvalid,
fmt::format("invalid data type to build disk index: {}",
data_type));
}
} else { // create mem index
switch (data_type) {
case DataType::VECTOR_FLOAT:
case DataType::VECTOR_SPARSE_FLOAT: {
return std::make_unique<VectorMemIndex<float>>(
create_index_info, file_manager_context, space);
}
case DataType::VECTOR_BINARY: {
return std::make_unique<VectorMemIndex<bin1>>(
create_index_info, file_manager_context, space);
}
case DataType::VECTOR_FLOAT16: {
return std::make_unique<VectorMemIndex<float16>>(
create_index_info, file_manager_context, space);
}
case DataType::VECTOR_BFLOAT16: {
return std::make_unique<VectorMemIndex<bfloat16>>(
create_index_info, file_manager_context, space);
}
default:
PanicInfo(
DataTypeInvalid,
fmt::format("invalid data type to build mem index: {}",
data_type));
}
}
}
} // namespace milvus::index

View File

@ -32,7 +32,6 @@
#include "index/ScalarIndexSort.h"
#include "index/StringIndexMarisa.h"
#include "index/BoolIndex.h"
#include "storage/space.h"
namespace milvus::index {
@ -56,11 +55,6 @@ class IndexFactory {
CreateIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context);
IndexBasePtr
CreateIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
IndexBasePtr
CreateVectorIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context);
@ -92,19 +86,6 @@ class IndexFactory {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
IndexBasePtr
CreateVectorIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
IndexBasePtr
CreateScalarIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
PanicInfo(ErrorCode::Unsupported,
"CreateScalarIndexV2 not implemented");
}
// IndexBasePtr
// CreateIndex(DataType dtype, const IndexType& index_type);
private:
@ -115,12 +96,6 @@ class IndexFactory {
CreatePrimitiveScalarIndex(const IndexType& index_type,
const storage::FileManagerContext& file_manager =
storage::FileManagerContext());
template <typename T>
ScalarIndexPtr<T>
CreatePrimitiveScalarIndex(const IndexType& index_type,
const storage::FileManagerContext& file_manager,
std::shared_ptr<milvus_storage::Space> space);
};
} // namespace milvus::index

View File

@ -65,11 +65,10 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx,
std::shared_ptr<milvus_storage::Space> space)
: space_(space), schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx, ctx.space_);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx, ctx.space_);
const storage::FileManagerContext& ctx)
: schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
auto field =
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
@ -139,12 +138,6 @@ InvertedIndexTantivy<T>::Upload(const Config& config) {
return ret;
}
template <typename T>
BinarySet
InvertedIndexTantivy<T>::UploadV2(const Config& config) {
return Upload(config);
}
template <typename T>
void
InvertedIndexTantivy<T>::Build(const Config& config) {
@ -156,28 +149,6 @@ InvertedIndexTantivy<T>::Build(const Config& config) {
BuildWithFieldData(field_datas);
}
template <typename T>
void
InvertedIndexTantivy<T>::BuildV2(const Config& config) {
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {
PanicInfo(DataFormatBroken, "failed to read data");
}
auto data = rec.ValueUnsafe();
auto total_num_rows = data->num_rows();
auto col_data = data->GetColumnByName(field_name);
// todo: support nullable index
auto field_data = storage::CreateFieldData(
DataType(GetDType<T>()), false, 0, total_num_rows);
field_data->FillFieldData(col_data);
field_datas.push_back(field_data);
}
BuildWithFieldData(field_datas);
}
template <typename T>
void
InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
@ -201,14 +172,6 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
}
template <typename T>
void
InvertedIndexTantivy<T>::LoadV2(const Config& config) {
disk_file_manager_->CacheIndexToDisk();
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
}
inline void
apply_hits(TargetBitmap& bitset, const RustArrayWrapper& w, bool v) {
for (size_t j = 0; j < w.array_.len; j++) {

View File

@ -18,7 +18,6 @@
#include "tantivy-binding.h"
#include "tantivy-wrapper.h"
#include "index/StringIndex.h"
#include "storage/space.h"
namespace milvus::index {
@ -34,13 +33,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
using DiskFileManagerPtr = std::shared_ptr<DiskFileManager>;
InvertedIndexTantivy() = default;
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx)
: InvertedIndexTantivy(ctx, nullptr) {
}
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx,
std::shared_ptr<milvus_storage::Space> space);
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx);
~InvertedIndexTantivy();
@ -56,9 +49,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
void
LoadV2(const Config& config = {}) override;
/*
* deprecated.
* TODO: why not remove this?
@ -78,9 +68,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
void
Build(const Config& config = {}) override;
void
BuildV2(const Config& config = {}) override;
int64_t
Count() override {
return wrapper_->count();
@ -102,9 +89,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {}) override;
/*
* deprecated, only used in small chunk index.
*/
@ -196,6 +180,5 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
*/
MemFileManagerPtr mem_file_manager_;
DiskFileManagerPtr disk_file_manager_;
std::shared_ptr<milvus_storage::Space> space_;
};
} // namespace milvus::index

View File

@ -44,73 +44,6 @@ ScalarIndexSort<T>::ScalarIndexSort(
}
}
template <typename T>
inline ScalarIndexSort<T>::ScalarIndexSort(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: is_built_(false), data_(), space_(space) {
if (file_manager_context.Valid()) {
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, space);
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
}
}
template <typename T>
inline void
ScalarIndexSort<T>::BuildV2(const Config& config) {
if (is_built_) {
return;
}
auto field_name = file_manager_->GetIndexMeta().field_name;
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {
PanicInfo(DataFormatBroken, "failed to read data");
}
auto data = rec.ValueUnsafe();
auto total_num_rows = data->num_rows();
auto col_data = data->GetColumnByName(field_name);
auto nullable =
col_data->type()->id() == arrow::Type::NA ? true : false;
// will support build scalar index when nullable in the future just skip it
// now, not support to build index in nullable field_data
// todo: support nullable index
AssertInfo(!nullable,
"not support to build index in nullable field_data");
auto field_data = storage::CreateFieldData(
DataType(GetDType<T>()), nullable, 0, total_num_rows);
field_data->FillFieldData(col_data);
field_datas.push_back(field_data);
}
int64_t total_num_rows = 0;
for (const auto& data : field_datas) {
total_num_rows += data->get_num_rows();
}
if (total_num_rows == 0) {
PanicInfo(DataIsEmpty, "ScalarIndexSort cannot build null values!");
}
data_.reserve(total_num_rows);
int64_t offset = 0;
for (const auto& data : field_datas) {
auto slice_num = data->get_num_rows();
for (size_t i = 0; i < slice_num; ++i) {
auto value = reinterpret_cast<const T*>(data->RawValue(i));
data_.emplace_back(IndexStructure(*value, offset));
offset++;
}
}
std::sort(data_.begin(), data_.end());
idx_to_offsets_.resize(total_num_rows);
for (size_t i = 0; i < total_num_rows; ++i) {
idx_to_offsets_[data_[i].idx_] = i;
}
is_built_ = true;
}
template <typename T>
void
ScalarIndexSort<T>::Build(const Config& config) {
@ -215,21 +148,6 @@ ScalarIndexSort<T>::Upload(const Config& config) {
return ret;
}
template <typename T>
BinarySet
ScalarIndexSort<T>::UploadV2(const Config& config) {
auto binary_set = Serialize(config);
file_manager_->AddFileV2(binary_set);
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
BinarySet ret;
for (auto& file : remote_paths_to_size) {
ret.Append(file.first, nullptr, file.second);
}
return ret;
}
template <typename T>
void
ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
@ -277,47 +195,6 @@ ScalarIndexSort<T>::Load(milvus::tracer::TraceContext ctx,
LoadWithoutAssemble(binary_set, config);
}
template <typename T>
void
ScalarIndexSort<T>::LoadV2(const Config& config) {
auto blobs = space_->StatisticsBlobs();
std::vector<std::string> index_files;
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
for (auto& b : blobs) {
if (b.name.rfind(prefix, 0) == 0) {
index_files.push_back(b.name);
}
}
std::map<std::string, FieldDataPtr> index_datas{};
for (auto& file_name : index_files) {
auto res = space_->GetBlobByteSize(file_name);
if (!res.ok()) {
PanicInfo(S3Error, "unable to read index blob");
}
auto index_blob_data =
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
auto status = space_->ReadBlob(file_name, index_blob_data.get());
if (!status.ok()) {
PanicInfo(S3Error, "unable to read index blob");
}
auto raw_index_blob =
storage::DeserializeFileData(index_blob_data, res.value());
auto key = file_name.substr(file_name.find_last_of('/') + 1);
index_datas[key] = raw_index_blob->GetFieldData();
}
AssembleIndexDatas(index_datas);
BinarySet binary_set;
for (auto& [key, data] : index_datas) {
auto size = data->Size();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto buf = std::shared_ptr<uint8_t[]>(
(uint8_t*)const_cast<void*>(data->Data()), deleter);
binary_set.Append(key, buf, size);
}
LoadWithoutAssemble(binary_set, config);
}
template <typename T>
const TargetBitmap
ScalarIndexSort<T>::In(const size_t n, const T* values) {

View File

@ -26,7 +26,6 @@
#include "index/IndexStructure.h"
#include "index/ScalarIndex.h"
#include "storage/MemFileManagerImpl.h"
#include "storage/space.h"
namespace milvus::index {
@ -37,10 +36,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
explicit ScalarIndexSort(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
BinarySet
Serialize(const Config& config) override;
@ -50,9 +45,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
void
LoadV2(const Config& config = {}) override;
int64_t
Count() override {
return data_.size();
@ -69,9 +61,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
void
Build(const Config& config = {}) override;
void
BuildV2(const Config& config = {}) override;
const TargetBitmap
In(size_t n, const T* values) override;
@ -97,8 +86,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {}) override;
const bool
HasRawData() const override {
@ -133,7 +120,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
std::vector<int32_t> idx_to_offsets_; // used to retrieve.
std::vector<IndexStructure<T>> data_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
std::shared_ptr<milvus_storage::Space> space_;
};
template <typename T>
@ -148,11 +134,4 @@ CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext()) {
return std::make_unique<ScalarIndexSort<T>>(file_manager_context);
}
template <typename T>
inline ScalarIndexSortPtr<T>
CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
return std::make_unique<ScalarIndexSort<T>>(file_manager_context, space);
}
} // namespace milvus::index

View File

@ -36,7 +36,6 @@
#include "index/Utils.h"
#include "index/Index.h"
#include "storage/Util.h"
#include "storage/space.h"
namespace milvus::index {
@ -48,16 +47,6 @@ StringIndexMarisa::StringIndexMarisa(
}
}
StringIndexMarisa::StringIndexMarisa(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: space_(space) {
if (file_manager_context.Valid()) {
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, space_);
}
}
int64_t
StringIndexMarisa::Size() {
return trie_.size();
@ -68,65 +57,6 @@ valid_str_id(size_t str_id) {
return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID;
}
void
StringIndexMarisa::BuildV2(const Config& config) {
if (built_) {
throw std::runtime_error("index has been built");
}
auto field_name = file_manager_->GetIndexMeta().field_name;
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {
PanicInfo(DataFormatBroken, "failed to read data");
}
auto data = rec.ValueUnsafe();
auto total_num_rows = data->num_rows();
auto col_data = data->GetColumnByName(field_name);
auto nullable =
col_data->type()->id() == arrow::Type::NA ? true : false;
// will support build scalar index when nullable in the future just skip it
// now, not support to build index in nullable field_data
// todo: support nullable index
AssertInfo(!nullable,
"not support to build index in nullable field_data");
auto field_data = storage::CreateFieldData(
DataType::STRING, nullable, 0, total_num_rows);
field_data->FillFieldData(col_data);
field_datas.push_back(field_data);
}
int64_t total_num_rows = 0;
// fill key set.
marisa::Keyset keyset;
for (auto data : field_datas) {
auto slice_num = data->get_num_rows();
for (size_t i = 0; i < slice_num; ++i) {
keyset.push_back(
(*static_cast<const std::string*>(data->RawValue(i))).c_str());
}
total_num_rows += slice_num;
}
trie_.build(keyset);
// fill str_ids_
str_ids_.resize(total_num_rows);
int64_t offset = 0;
for (auto data : field_datas) {
auto slice_num = data->get_num_rows();
for (size_t i = 0; i < slice_num; ++i) {
auto str_id =
lookup(*static_cast<const std::string*>(data->RawValue(i)));
AssertInfo(valid_str_id(str_id), "invalid marisa key");
str_ids_[offset++] = str_id;
}
}
// fill str_ids_to_offsets_
fill_offsets();
built_ = true;
}
void
StringIndexMarisa::Build(const Config& config) {
if (built_) {
@ -245,20 +175,6 @@ StringIndexMarisa::Upload(const Config& config) {
return ret;
}
BinarySet
StringIndexMarisa::UploadV2(const Config& config) {
auto binary_set = Serialize(config);
file_manager_->AddFileV2(binary_set);
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
BinarySet ret;
for (auto& file : remote_paths_to_size) {
ret.Append(file.first, nullptr, file.second);
}
return ret;
}
void
StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
const Config& config) {
@ -322,46 +238,6 @@ StringIndexMarisa::Load(milvus::tracer::TraceContext ctx,
LoadWithoutAssemble(binary_set, config);
}
void
StringIndexMarisa::LoadV2(const Config& config) {
auto blobs = space_->StatisticsBlobs();
std::vector<std::string> index_files;
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
for (auto& b : blobs) {
if (b.name.rfind(prefix, 0) == 0) {
index_files.push_back(b.name);
}
}
std::map<std::string, FieldDataPtr> index_datas{};
for (auto& file_name : index_files) {
auto res = space_->GetBlobByteSize(file_name);
if (!res.ok()) {
PanicInfo(DataFormatBroken, "unable to read index blob");
}
auto index_blob_data =
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
auto status = space_->ReadBlob(file_name, index_blob_data.get());
if (!status.ok()) {
PanicInfo(DataFormatBroken, "unable to read index blob");
}
auto raw_index_blob =
storage::DeserializeFileData(index_blob_data, res.value());
index_datas[file_name] = raw_index_blob->GetFieldData();
}
AssembleIndexDatas(index_datas);
BinarySet binary_set;
for (auto& [key, data] : index_datas) {
auto size = data->Size();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto buf = std::shared_ptr<uint8_t[]>(
(uint8_t*)const_cast<void*>(data->Data()), deleter);
auto file_name = key.substr(key.find_last_of('/') + 1);
binary_set.Append(file_name, buf, size);
}
LoadWithoutAssemble(binary_set, config);
}
const TargetBitmap
StringIndexMarisa::In(size_t n, const std::string* values) {
TargetBitmap bitset(str_ids_.size());

View File

@ -23,7 +23,6 @@
#include <map>
#include <memory>
#include "storage/MemFileManagerImpl.h"
#include "storage/space.h"
namespace milvus::index {
@ -33,10 +32,6 @@ class StringIndexMarisa : public StringIndex {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
explicit StringIndexMarisa(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
int64_t
Size() override;
@ -49,9 +44,6 @@ class StringIndexMarisa : public StringIndex {
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
void
LoadV2(const Config& config = {}) override;
int64_t
Count() override {
return str_ids_.size();
@ -71,9 +63,6 @@ class StringIndexMarisa : public StringIndex {
void
BuildWithFieldData(const std::vector<FieldDataPtr>& field_datas) override;
void
BuildV2(const Config& Config = {}) override;
const TargetBitmap
In(size_t n, const std::string* values) override;
@ -98,9 +87,6 @@ class StringIndexMarisa : public StringIndex {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {});
const bool
HasRawData() const override {
return true;
@ -131,7 +117,6 @@ class StringIndexMarisa : public StringIndex {
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
bool built_ = false;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
std::shared_ptr<milvus_storage::Space> space_;
};
using StringIndexMarisaPtr = std::unique_ptr<StringIndexMarisa>;
@ -142,10 +127,4 @@ CreateStringIndexMarisa(
storage::FileManagerContext()) {
return std::make_unique<StringIndexMarisa>(file_manager_context);
}
inline StringIndexPtr
CreateStringIndexMarisa(const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
return std::make_unique<StringIndexMarisa>(file_manager_context, space);
}
} // namespace milvus::index

View File

@ -73,45 +73,6 @@ VectorDiskAnnIndex<T>::VectorDiskAnnIndex(
}
}
template <typename T>
VectorDiskAnnIndex<T>::VectorDiskAnnIndex(
const IndexType& index_type,
const MetricType& metric_type,
const IndexVersion& version,
std::shared_ptr<milvus_storage::Space> space,
const storage::FileManagerContext& file_manager_context)
: space_(space), VectorIndex(index_type, metric_type) {
CheckMetricTypeSupport<T>(metric_type);
file_manager_ = std::make_shared<storage::DiskFileManagerImpl>(
file_manager_context, file_manager_context.space_);
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
auto local_chunk_manager =
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
// As we have guarded dup-load in QueryNode,
// this assertion failed only if the Milvus rebooted in the same pod,
// need to remove these files then re-load the segment
if (local_chunk_manager->Exist(local_index_path_prefix)) {
local_chunk_manager->RemoveDir(local_index_path_prefix);
}
CheckCompatible(version);
local_chunk_manager->CreateDir(local_index_path_prefix);
auto diskann_index_pack =
knowhere::Pack(std::shared_ptr<knowhere::FileManager>(file_manager_));
auto get_index_obj = knowhere::IndexFactory::Instance().Create<T>(
GetIndexType(), version, diskann_index_pack);
if (get_index_obj.has_value()) {
index_ = get_index_obj.value();
} else {
auto err = get_index_obj.error();
if (err == knowhere::Status::invalid_index_error) {
PanicInfo(ErrorCode::Unsupported, get_index_obj.what());
}
PanicInfo(ErrorCode::KnowhereError, get_index_obj.what());
}
}
template <typename T>
void
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
@ -153,21 +114,6 @@ VectorDiskAnnIndex<T>::Load(milvus::tracer::TraceContext ctx,
SetDim(index_.Dim());
}
template <typename T>
void
VectorDiskAnnIndex<T>::LoadV2(const Config& config) {
knowhere::Json load_config = update_load_json(config);
file_manager_->CacheIndexToDisk();
auto stat = index_.Deserialize(knowhere::BinarySet(), load_config);
if (stat != knowhere::Status::success)
PanicInfo(ErrorCode::UnexpectedError,
"failed to Deserialize index, " + KnowhereStatusString(stat));
SetDim(index_.Dim());
}
template <typename T>
BinarySet
VectorDiskAnnIndex<T>::Upload(const Config& config) {
@ -185,53 +131,6 @@ VectorDiskAnnIndex<T>::Upload(const Config& config) {
return ret;
}
template <typename T>
BinarySet
VectorDiskAnnIndex<T>::UploadV2(const Config& config) {
return Upload(config);
}
template <typename T>
void
VectorDiskAnnIndex<T>::BuildV2(const Config& config) {
knowhere::Json build_config;
build_config.update(config);
auto local_data_path = file_manager_->CacheRawDataToDisk<T>(space_);
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
build_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix;
if (GetIndexType() == knowhere::IndexEnum::INDEX_DISKANN) {
auto num_threads = GetValueFromConfig<std::string>(
build_config, DISK_ANN_BUILD_THREAD_NUM);
AssertInfo(
num_threads.has_value(),
"param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
build_config[DISK_ANN_THREADS_NUM] =
std::atoi(num_threads.value().c_str());
}
auto opt_fields = GetValueFromConfig<OptFieldT>(config, VEC_OPT_FIELDS);
if (opt_fields.has_value() && index_.IsAdditionalScalarSupported()) {
build_config[VEC_OPT_FIELDS_PATH] =
file_manager_->CacheOptFieldToDisk(opt_fields.value());
// `partition_key_isolation` is already in the config, so it falls through
// into the index Build call directly
}
build_config.erase("insert_files");
build_config.erase(VEC_OPT_FIELDS);
index_.Build({}, build_config);
auto local_chunk_manager =
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto segment_id = file_manager_->GetFieldDataMeta().segment_id;
local_chunk_manager->RemoveDir(
storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id));
}
template <typename T>
void
VectorDiskAnnIndex<T>::Build(const Config& config) {

View File

@ -21,7 +21,6 @@
#include "index/VectorIndex.h"
#include "storage/DiskFileManagerImpl.h"
#include "storage/space.h"
namespace milvus::index {
@ -35,14 +34,6 @@ class VectorDiskAnnIndex : public VectorIndex {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
explicit VectorDiskAnnIndex(
const IndexType& index_type,
const MetricType& metric_type,
const IndexVersion& version,
std::shared_ptr<milvus_storage::Space> space,
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
BinarySet
Serialize(const Config& config) override { // deprecated
BinarySet binary_set;
@ -58,9 +49,6 @@ class VectorDiskAnnIndex : public VectorIndex {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {}) override;
int64_t
Count() override {
return index_.Count();
@ -73,9 +61,6 @@ class VectorDiskAnnIndex : public VectorIndex {
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
void
LoadV2(const Config& config = {}) override;
void
BuildWithDataset(const DatasetPtr& dataset,
const Config& config = {}) override;
@ -83,9 +68,6 @@ class VectorDiskAnnIndex : public VectorIndex {
void
Build(const Config& config = {}) override;
void
BuildV2(const Config& config = {}) override;
void
Query(const DatasetPtr dataset,
const SearchInfo& search_info,
@ -119,7 +101,6 @@ class VectorDiskAnnIndex : public VectorIndex {
knowhere::Index<knowhere::IndexNode> index_;
std::shared_ptr<storage::DiskFileManagerImpl> file_manager_;
uint32_t search_beamwidth_ = 8;
std::shared_ptr<milvus_storage::Space> space_;
};
template <typename T>

View File

@ -48,7 +48,6 @@
#include "storage/DataCodec.h"
#include "storage/MemFileManagerImpl.h"
#include "storage/ThreadPools.h"
#include "storage/space.h"
#include "storage/Util.h"
#include "monitor/prometheus_client.h"
@ -83,69 +82,6 @@ VectorMemIndex<T>::VectorMemIndex(
}
}
template <typename T>
VectorMemIndex<T>::VectorMemIndex(
const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: VectorIndex(create_index_info.index_type, create_index_info.metric_type),
space_(space),
create_index_info_(create_index_info) {
CheckMetricTypeSupport<T>(create_index_info.metric_type);
AssertInfo(!is_unsupported(create_index_info.index_type,
create_index_info.metric_type),
create_index_info.index_type +
" doesn't support metric: " + create_index_info.metric_type);
if (file_manager_context.Valid()) {
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, file_manager_context.space_);
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
}
auto version = create_index_info.index_engine_version;
CheckCompatible(version);
auto get_index_obj =
knowhere::IndexFactory::Instance().Create<T>(GetIndexType(), version);
if (get_index_obj.has_value()) {
index_ = get_index_obj.value();
} else {
auto err = get_index_obj.error();
if (err == knowhere::Status::invalid_index_error) {
PanicInfo(ErrorCode::Unsupported, get_index_obj.what());
}
PanicInfo(ErrorCode::KnowhereError, get_index_obj.what());
}
}
template <typename T>
BinarySet
VectorMemIndex<T>::UploadV2(const Config& config) {
auto binary_set = Serialize(config);
file_manager_->AddFileV2(binary_set);
auto store_version = file_manager_->space()->GetCurrentVersion();
std::shared_ptr<uint8_t[]> store_version_data(
new uint8_t[sizeof(store_version)]);
store_version_data[0] = store_version & 0x00000000000000FF;
store_version = store_version >> 8;
store_version_data[1] = store_version & 0x00000000000000FF;
store_version = store_version >> 8;
store_version_data[2] = store_version & 0x00000000000000FF;
store_version = store_version >> 8;
store_version_data[3] = store_version & 0x00000000000000FF;
store_version = store_version >> 8;
store_version_data[4] = store_version & 0x00000000000000FF;
store_version = store_version >> 8;
store_version_data[5] = store_version & 0x00000000000000FF;
store_version = store_version >> 8;
store_version_data[6] = store_version & 0x00000000000000FF;
store_version = store_version >> 8;
store_version_data[7] = store_version & 0x00000000000000FF;
BinarySet ret;
ret.Append("index_store_version", store_version_data, 8);
return ret;
}
template <typename T>
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
VectorMemIndex<T>::VectorIterators(const milvus::DatasetPtr dataset,
@ -202,105 +138,6 @@ VectorMemIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
LoadWithoutAssemble(binary_set, config);
}
template <typename T>
void
VectorMemIndex<T>::LoadV2(const Config& config) {
if (config.contains(kMmapFilepath)) {
return LoadFromFileV2(config);
}
auto blobs = space_->StatisticsBlobs();
std::unordered_set<std::string> pending_index_files;
auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
for (auto& blob : blobs) {
if (blob.name.rfind(index_prefix, 0) == 0) {
pending_index_files.insert(blob.name);
}
}
auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META;
auto res = space_->GetBlobByteSize(std::string(slice_meta_file));
std::map<std::string, FieldDataPtr> index_datas{};
if (!res.ok() && !res.status().IsFileNotFound()) {
PanicInfo(DataFormatBroken, "failed to read blob");
}
bool slice_meta_exist = res.ok();
auto read_blob = [&](const std::string& file_name)
-> std::unique_ptr<storage::DataCodec> {
auto res = space_->GetBlobByteSize(file_name);
if (!res.ok()) {
PanicInfo(DataFormatBroken, "unable to read index blob");
}
auto index_blob_data =
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
auto status = space_->ReadBlob(file_name, index_blob_data.get());
if (!status.ok()) {
PanicInfo(DataFormatBroken, "unable to read index blob");
}
return storage::DeserializeFileData(index_blob_data, res.value());
};
if (slice_meta_exist) {
pending_index_files.erase(slice_meta_file);
auto slice_meta_sz = res.value();
auto slice_meta_data =
std::shared_ptr<uint8_t[]>(new uint8_t[slice_meta_sz]);
auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get());
if (!status.ok()) {
PanicInfo(DataFormatBroken, "unable to read slice meta");
}
auto raw_slice_meta =
storage::DeserializeFileData(slice_meta_data, slice_meta_sz);
Config meta_data = Config::parse(std::string(
static_cast<const char*>(raw_slice_meta->GetFieldData()->Data()),
raw_slice_meta->GetFieldData()->Size()));
for (auto& item : meta_data[META]) {
std::string prefix = item[NAME];
int slice_num = item[SLICE_NUM];
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
// todo: support nullable index
auto new_field_data = milvus::storage::CreateFieldData(
DataType::INT8, false, 1, total_len);
for (auto i = 0; i < slice_num; ++i) {
std::string file_name =
index_prefix + "/" + GenSlicedFileName(prefix, i);
auto raw_index_blob = read_blob(file_name);
new_field_data->FillFieldData(
raw_index_blob->GetFieldData()->Data(),
raw_index_blob->GetFieldData()->Size());
pending_index_files.erase(file_name);
}
AssertInfo(
new_field_data->IsFull(),
"index len is inconsistent after disassemble and assemble");
index_datas[prefix] = new_field_data;
}
}
if (!pending_index_files.empty()) {
for (auto& file_name : pending_index_files) {
auto raw_index_blob = read_blob(file_name);
index_datas.insert({file_name, raw_index_blob->GetFieldData()});
}
}
LOG_INFO("construct binary set...");
BinarySet binary_set;
for (auto& [key, data] : index_datas) {
LOG_INFO("add index data to binary set: {}", key);
auto size = data->Size();
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
auto buf = std::shared_ptr<uint8_t[]>(
(uint8_t*)const_cast<void*>(data->Data()), deleter);
auto file_name = key.substr(key.find_last_of('/') + 1);
binary_set.Append(file_name, buf, size);
}
LOG_INFO("load index into Knowhere...");
LoadWithoutAssemble(binary_set, config);
LOG_INFO("load vector index done");
}
template <typename T>
void
VectorMemIndex<T>::Load(milvus::tracer::TraceContext ctx,
@ -442,58 +279,6 @@ VectorMemIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
SetDim(index_.Dim());
}
template <typename T>
void
VectorMemIndex<T>::BuildV2(const Config& config) {
auto field_name = create_index_info_.field_name;
auto field_type = create_index_info_.field_type;
auto dim = create_index_info_.dim;
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec : *reader) {
if (!rec.ok()) {
PanicInfo(IndexBuildError,
"failed to read data: {}",
rec.status().ToString());
}
auto data = rec.ValueUnsafe();
if (data == nullptr) {
break;
}
auto total_num_rows = data->num_rows();
auto col_data = data->GetColumnByName(field_name);
// todo: support nullable index
auto field_data =
storage::CreateFieldData(field_type, false, dim, total_num_rows);
field_data->FillFieldData(col_data);
field_datas.push_back(field_data);
}
int64_t total_size = 0;
int64_t total_num_rows = 0;
for (const auto& data : field_datas) {
total_size += data->Size();
total_num_rows += data->get_num_rows();
AssertInfo(dim == 0 || dim == data->get_dim(),
"inconsistent dim value between field datas!");
}
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[total_size]);
int64_t offset = 0;
for (auto data : field_datas) {
std::memcpy(buf.get() + offset, data->Data(), data->Size());
offset += data->Size();
data.reset();
}
field_datas.clear();
Config build_config;
build_config.update(config);
build_config.erase("insert_files");
auto dataset = GenDataset(total_num_rows, dim, buf.get());
BuildWithDataset(dataset, build_config);
}
template <typename T>
void
VectorMemIndex<T>::Build(const Config& config) {
@ -852,109 +637,6 @@ void VectorMemIndex<T>::LoadFromFile(const Config& config) {
.count());
}
template <typename T>
void
VectorMemIndex<T>::LoadFromFileV2(const Config& config) {
auto filepath = GetValueFromConfig<std::string>(config, kMmapFilepath);
AssertInfo(filepath.has_value(), "mmap filepath is empty when load index");
std::filesystem::create_directories(
std::filesystem::path(filepath.value()).parent_path());
auto file = File::Open(filepath.value(), O_CREAT | O_TRUNC | O_RDWR);
auto blobs = space_->StatisticsBlobs();
std::unordered_set<std::string> pending_index_files;
auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
for (auto& blob : blobs) {
if (blob.name.rfind(index_prefix, 0) == 0) {
pending_index_files.insert(blob.name);
}
}
auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META;
auto res = space_->GetBlobByteSize(std::string(slice_meta_file));
if (!res.ok() && !res.status().IsFileNotFound()) {
PanicInfo(DataFormatBroken, "failed to read blob");
}
bool slice_meta_exist = res.ok();
auto read_blob = [&](const std::string& file_name)
-> std::unique_ptr<storage::DataCodec> {
auto res = space_->GetBlobByteSize(file_name);
if (!res.ok()) {
PanicInfo(DataFormatBroken, "unable to read index blob");
}
auto index_blob_data =
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
auto status = space_->ReadBlob(file_name, index_blob_data.get());
if (!status.ok()) {
PanicInfo(DataFormatBroken, "unable to read index blob");
}
return storage::DeserializeFileData(index_blob_data, res.value());
};
if (slice_meta_exist) {
pending_index_files.erase(slice_meta_file);
auto slice_meta_sz = res.value();
auto slice_meta_data =
std::shared_ptr<uint8_t[]>(new uint8_t[slice_meta_sz]);
auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get());
if (!status.ok()) {
PanicInfo(DataFormatBroken, "unable to read slice meta");
}
auto raw_slice_meta =
storage::DeserializeFileData(slice_meta_data, slice_meta_sz);
Config meta_data = Config::parse(std::string(
static_cast<const char*>(raw_slice_meta->GetFieldData()->Data()),
raw_slice_meta->GetFieldData()->Size()));
for (auto& item : meta_data[META]) {
std::string prefix = item[NAME];
int slice_num = item[SLICE_NUM];
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
for (auto i = 0; i < slice_num; ++i) {
std::string file_name =
index_prefix + "/" + GenSlicedFileName(prefix, i);
auto raw_index_blob = read_blob(file_name);
auto written =
file.Write(raw_index_blob->GetFieldData()->Data(),
raw_index_blob->GetFieldData()->Size());
pending_index_files.erase(file_name);
}
}
}
if (!pending_index_files.empty()) {
for (auto& file_name : pending_index_files) {
auto raw_index_blob = read_blob(file_name);
file.Write(raw_index_blob->GetFieldData()->Data(),
raw_index_blob->GetFieldData()->Size());
}
}
file.Close();
LOG_INFO("load index into Knowhere...");
auto conf = config;
conf.erase(kMmapFilepath);
conf[kEnableMmap] = true;
auto stat = index_.DeserializeFromFile(filepath.value(), conf);
if (stat != knowhere::Status::success) {
PanicInfo(DataFormatBroken,
"failed to Deserialize index: {}",
KnowhereStatusString(stat));
}
auto dim = index_.Dim();
this->SetDim(index_.Dim());
auto ok = unlink(filepath->data());
AssertInfo(ok == 0,
"failed to unlink mmap index file {}: {}",
filepath.value(),
strerror(errno));
LOG_INFO("load vector index done");
}
template class VectorMemIndex<float>;
template class VectorMemIndex<bin1>;
template class VectorMemIndex<float16>;

View File

@ -25,7 +25,6 @@
#include "knowhere/index/index_factory.h"
#include "index/VectorIndex.h"
#include "storage/MemFileManagerImpl.h"
#include "storage/space.h"
#include "index/IndexInfo.h"
namespace milvus::index {
@ -40,9 +39,6 @@ class VectorMemIndex : public VectorIndex {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
explicit VectorMemIndex(const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager,
std::shared_ptr<milvus_storage::Space> space);
BinarySet
Serialize(const Config& config) override;
@ -52,9 +48,6 @@ class VectorMemIndex : public VectorIndex {
void
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
void
LoadV2(const Config& config = {}) override;
void
BuildWithDataset(const DatasetPtr& dataset,
const Config& config = {}) override;
@ -62,9 +55,6 @@ class VectorMemIndex : public VectorIndex {
void
Build(const Config& config = {}) override;
void
BuildV2(const Config& config = {}) override;
void
AddWithDataset(const DatasetPtr& dataset, const Config& config) override;
@ -91,9 +81,6 @@ class VectorMemIndex : public VectorIndex {
BinarySet
Upload(const Config& config = {}) override;
BinarySet
UploadV2(const Config& config = {}) override;
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
VectorIterators(const DatasetPtr dataset,
const knowhere::Json& json,
@ -107,14 +94,10 @@ class VectorMemIndex : public VectorIndex {
void
LoadFromFile(const Config& config);
void
LoadFromFileV2(const Config& config);
protected:
Config config_;
knowhere::Index<knowhere::IndexNode> index_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
std::shared_ptr<milvus_storage::Space> space_;
CreateIndexInfo create_index_info_;
};

View File

@ -26,9 +26,6 @@ class IndexCreatorBase {
virtual void
Build() = 0;
virtual void
BuildV2() = 0;
virtual milvus::BinarySet
Serialize() = 0;
@ -38,9 +35,6 @@ class IndexCreatorBase {
virtual BinarySet
Upload() = 0;
virtual BinarySet
UploadV2() = 0;
};
using IndexCreatorBasePtr = std::unique_ptr<IndexCreatorBase>;

View File

@ -23,7 +23,6 @@
#include "indexbuilder/type_c.h"
#include "storage/Types.h"
#include "storage/FileManager.h"
#include "storage/space.h"
namespace milvus::indexbuilder {
@ -74,41 +73,6 @@ class IndexFactory {
fmt::format("invalid type is {}", invalid_dtype_msg));
}
}
IndexCreatorBasePtr
CreateIndex(DataType type,
const std::string& field_name,
const int64_t dim,
Config& config,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
auto invalid_dtype_msg =
std::string("invalid data type: ") + std::to_string(int(type));
switch (type) {
case DataType::BOOL:
case DataType::INT8:
case DataType::INT16:
case DataType::INT32:
case DataType::INT64:
case DataType::FLOAT:
case DataType::DOUBLE:
case DataType::VARCHAR:
case DataType::STRING:
return CreateScalarIndex(
type, config, file_manager_context, space);
case DataType::VECTOR_FLOAT:
case DataType::VECTOR_BINARY:
case DataType::VECTOR_FLOAT16:
case DataType::VECTOR_BFLOAT16:
case DataType::VECTOR_SPARSE_FLOAT:
return std::make_unique<VecIndexCreator>(
type, field_name, dim, config, file_manager_context, space);
default:
PanicInfo(ErrorCode::DataTypeInvalid, invalid_dtype_msg);
}
}
};
} // namespace milvus::indexbuilder

View File

@ -36,18 +36,6 @@ ScalarIndexCreator::ScalarIndexCreator(
index_info, file_manager_context);
}
ScalarIndexCreator::ScalarIndexCreator(
DataType dtype,
Config& config,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: config_(config), dtype_(dtype) {
milvus::index::CreateIndexInfo index_info;
index_info.field_type = dtype_;
index_info.index_type = index_type();
index_ = index::IndexFactory::GetInstance().CreateIndex(
index_info, file_manager_context, std::move(space));
}
void
ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
auto size = dataset->GetRows();
@ -60,11 +48,6 @@ ScalarIndexCreator::Build() {
index_->Build(config_);
}
void
ScalarIndexCreator::BuildV2() {
index_->BuildV2(config_);
}
milvus::BinarySet
ScalarIndexCreator::Serialize() {
return index_->Serialize(config_);
@ -84,10 +67,4 @@ BinarySet
ScalarIndexCreator::Upload() {
return index_->Upload();
}
BinarySet
ScalarIndexCreator::UploadV2() {
return index_->UploadV2();
}
} // namespace milvus::indexbuilder

View File

@ -17,7 +17,6 @@
#include <common/CDataType.h>
#include "index/Index.h"
#include "index/ScalarIndex.h"
#include "storage/space.h"
namespace milvus::indexbuilder {
@ -27,19 +26,12 @@ class ScalarIndexCreator : public IndexCreatorBase {
Config& config,
const storage::FileManagerContext& file_manager_context);
ScalarIndexCreator(DataType data_type,
Config& config,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
void
Build(const milvus::DatasetPtr& dataset) override;
void
Build() override;
void
BuildV2() override;
milvus::BinarySet
Serialize() override;
@ -49,9 +41,6 @@ class ScalarIndexCreator : public IndexCreatorBase {
BinarySet
Upload() override;
BinarySet
UploadV2() override;
private:
std::string
index_type();
@ -72,13 +61,4 @@ CreateScalarIndex(DataType dtype,
return std::make_unique<ScalarIndexCreator>(
dtype, config, file_manager_context);
}
inline ScalarIndexCreatorPtr
CreateScalarIndex(DataType dtype,
Config& config,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space) {
return std::make_unique<ScalarIndexCreator>(
dtype, config, file_manager_context, space);
}
} // namespace milvus::indexbuilder

View File

@ -24,7 +24,7 @@ VecIndexCreator::VecIndexCreator(
DataType data_type,
Config& config,
const storage::FileManagerContext& file_manager_context)
: VecIndexCreator(data_type, "", 0, config, file_manager_context, nullptr) {
: VecIndexCreator(data_type, "", 0, config, file_manager_context) {
}
VecIndexCreator::VecIndexCreator(
@ -32,9 +32,8 @@ VecIndexCreator::VecIndexCreator(
const std::string& field_name,
const int64_t dim,
Config& config,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: config_(config), data_type_(data_type), space_(std::move(space)) {
const storage::FileManagerContext& file_manager_context)
: config_(config), data_type_(data_type) {
index::CreateIndexInfo index_info;
index_info.field_type = data_type_;
index_info.index_type = index::GetIndexTypeFromConfig(config_);
@ -45,7 +44,7 @@ VecIndexCreator::VecIndexCreator(
index_info.dim = dim;
index_ = index::IndexFactory::GetInstance().CreateIndex(
index_info, file_manager_context, space_);
index_info, file_manager_context);
AssertInfo(index_ != nullptr,
"[VecIndexCreator]Index is null after create index");
}
@ -65,11 +64,6 @@ VecIndexCreator::Build() {
index_->Build(config_);
}
void
VecIndexCreator::BuildV2() {
index_->BuildV2(config_);
}
milvus::BinarySet
VecIndexCreator::Serialize() {
return index_->Serialize(config_);
@ -95,11 +89,6 @@ VecIndexCreator::Upload() {
return index_->Upload();
}
BinarySet
VecIndexCreator::UploadV2() {
return index_->UploadV2();
}
void
VecIndexCreator::CleanLocalData() {
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());

View File

@ -20,7 +20,6 @@
#include "index/VectorIndex.h"
#include "index/IndexInfo.h"
#include "storage/Types.h"
#include "storage/space.h"
namespace milvus::indexbuilder {
@ -37,17 +36,14 @@ class VecIndexCreator : public IndexCreatorBase {
const std::string& field_name,
const int64_t dim,
Config& config,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);
const storage::FileManagerContext& file_manager_context);
void
Build(const milvus::DatasetPtr& dataset) override;
void
Build() override;
void
BuildV2() override;
milvus::BinarySet
Serialize() override;
@ -65,9 +61,6 @@ class VecIndexCreator : public IndexCreatorBase {
BinarySet
Upload() override;
BinarySet
UploadV2() override;
public:
void
CleanLocalData();
@ -76,8 +69,6 @@ class VecIndexCreator : public IndexCreatorBase {
milvus::index::IndexBasePtr index_ = nullptr;
Config config_;
DataType data_type_;
std::shared_ptr<milvus_storage::Space> space_;
};
} // namespace milvus::indexbuilder

View File

@ -15,7 +15,6 @@
#include "fmt/core.h"
#include "indexbuilder/type_c.h"
#include "log/Log.h"
#include "storage/options.h"
#ifdef __linux__
#include <malloc.h>
@ -31,7 +30,6 @@
#include "index/Utils.h"
#include "pb/index_cgo_msg.pb.h"
#include "storage/Util.h"
#include "storage/space.h"
#include "index/Meta.h"
using namespace milvus;
@ -234,107 +232,6 @@ CreateIndex(CIndex* res_index,
}
}
CStatus
CreateIndexV2(CIndex* res_index,
const uint8_t* serialized_build_index_info,
const uint64_t len) {
try {
auto build_index_info =
std::make_unique<milvus::proto::indexcgo::BuildIndexInfo>();
auto res =
build_index_info->ParseFromArray(serialized_build_index_info, len);
AssertInfo(res, "Unmarshall build index info failed");
auto field_type =
static_cast<DataType>(build_index_info->field_schema().data_type());
milvus::index::CreateIndexInfo index_info;
index_info.field_type = field_type;
index_info.dim = build_index_info->dim();
auto storage_config =
get_storage_config(build_index_info->storage_config());
auto config = get_config(build_index_info);
// get index type
auto index_type = milvus::index::GetValueFromConfig<std::string>(
config, "index_type");
AssertInfo(index_type.has_value(), "index type is empty");
index_info.index_type = index_type.value();
auto engine_version = build_index_info->current_index_version();
index_info.index_engine_version = engine_version;
config[milvus::index::INDEX_ENGINE_VERSION] =
std::to_string(engine_version);
// get metric type
if (milvus::IsVectorDataType(field_type)) {
auto metric_type = milvus::index::GetValueFromConfig<std::string>(
config, "metric_type");
AssertInfo(metric_type.has_value(), "metric type is empty");
index_info.metric_type = metric_type.value();
}
milvus::storage::FieldDataMeta field_meta{
build_index_info->collectionid(),
build_index_info->partitionid(),
build_index_info->segmentid(),
build_index_info->field_schema().fieldid(),
build_index_info->field_schema()};
milvus::storage::IndexMeta index_meta{
build_index_info->segmentid(),
build_index_info->field_schema().fieldid(),
build_index_info->buildid(),
build_index_info->index_version(),
"",
build_index_info->field_schema().name(),
field_type,
build_index_info->dim(),
};
auto store_space = milvus_storage::Space::Open(
build_index_info->store_path(),
milvus_storage::Options{nullptr,
build_index_info->store_version()});
AssertInfo(store_space.ok() && store_space.has_value(),
"create space failed: {}",
store_space.status().ToString());
auto index_space = milvus_storage::Space::Open(
build_index_info->index_store_path(),
milvus_storage::Options{.schema = store_space.value()->schema()});
AssertInfo(index_space.ok() && index_space.has_value(),
"create space failed: {}",
index_space.status().ToString());
LOG_INFO("init space success");
auto chunk_manager =
milvus::storage::CreateChunkManager(storage_config);
milvus::storage::FileManagerContext fileManagerContext(
field_meta,
index_meta,
chunk_manager,
std::move(index_space.value()));
auto index =
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
field_type,
build_index_info->field_schema().name(),
build_index_info->dim(),
config,
fileManagerContext,
std::move(store_space.value()));
index->BuildV2();
*res_index = index.release();
return milvus::SuccessCStatus();
} catch (SegcoreError& e) {
auto status = CStatus();
status.error_code = e.get_error_code();
status.error_msg = strdup(e.what());
return status;
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}
CStatus
DeleteIndex(CIndex index) {
auto status = CStatus();
@ -823,29 +720,6 @@ SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set) {
return status;
}
CStatus
SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set) {
auto status = CStatus();
try {
AssertInfo(
index,
"failed to serialize index to binary set, passed index was null");
auto real_index =
reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
auto binary =
std::make_unique<knowhere::BinarySet>(real_index->UploadV2());
*c_binary_set = binary.release();
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;
}
CStatus
AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info,
const int64_t field_id,

View File

@ -128,14 +128,6 @@ AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info,
CStatus
SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set);
CStatus
SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set);
CStatus
CreateIndexV2(CIndex* res_index,
const uint8_t* serialized_build_index_info,
const uint64_t len);
CStatus
AppendIndexStorageInfo(CBuildIndexInfo c_build_index_info,
const char* c_data_store_path,

View File

@ -43,6 +43,6 @@ set(SEGCORE_FILES
reduce/GroupReduce.cpp)
add_library(milvus_segcore SHARED ${SEGCORE_FILES})
target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus-storage milvus_futures)
target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus_futures)
install(TARGETS milvus_segcore DESTINATION "${CMAKE_INSTALL_LIBDIR}")

View File

@ -33,8 +33,6 @@
#include "storage/RemoteChunkManagerSingleton.h"
#include "storage/Util.h"
#include "storage/ThreadPools.h"
#include "storage/options.h"
#include "storage/space.h"
namespace milvus::segcore {
@ -280,89 +278,6 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
reserved_offset + num_rows);
}
void
SegmentGrowingImpl::LoadFieldDataV2(const LoadFieldDataInfo& infos) {
// schema don't include system field
AssertInfo(infos.field_infos.size() == schema_->size() + 2,
"lost some field data when load for growing segment");
AssertInfo(infos.field_infos.find(TimestampFieldID.get()) !=
infos.field_infos.end(),
"timestamps field data should be included");
AssertInfo(
infos.field_infos.find(RowFieldID.get()) != infos.field_infos.end(),
"rowID field data should be included");
auto primary_field_id =
schema_->get_primary_field_id().value_or(FieldId(-1));
AssertInfo(primary_field_id.get() != INVALID_FIELD_ID, "Primary key is -1");
AssertInfo(infos.field_infos.find(primary_field_id.get()) !=
infos.field_infos.end(),
"primary field data should be included");
size_t num_rows = storage::GetNumRowsForLoadInfo(infos);
auto reserved_offset = PreInsert(num_rows);
for (auto& [id, info] : infos.field_infos) {
auto field_id = FieldId(id);
auto field_data_info = FieldDataInfo(field_id.get(), num_rows);
auto& pool =
ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
auto res = milvus_storage::Space::Open(
infos.url, milvus_storage::Options{nullptr, infos.storage_version});
AssertInfo(res.ok(), "init space failed");
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
auto load_future = pool.Submit(
LoadFieldDatasFromRemote2, space, schema_, field_data_info);
auto field_data =
milvus::storage::CollectFieldDataChannel(field_data_info.channel);
if (field_id == TimestampFieldID) {
// step 2: sort timestamp
// query node already guarantees that the timestamp is ordered, avoid field data copy in c++
// step 3: fill into Segment.ConcurrentVector
insert_record_.timestamps_.set_data_raw(reserved_offset,
field_data);
continue;
}
if (field_id == RowFieldID) {
continue;
}
if (!indexing_record_.SyncDataWithIndex(field_id)) {
insert_record_.get_data_base(field_id)->set_data_raw(
reserved_offset, field_data);
}
if (segcore_config_.get_enable_interim_segment_index()) {
auto offset = reserved_offset;
for (auto& data : field_data) {
auto row_count = data->get_num_rows();
indexing_record_.AppendingIndex(
offset, row_count, field_id, data, insert_record_);
offset += row_count;
}
}
try_remove_chunks(field_id);
if (field_id == primary_field_id) {
insert_record_.insert_pks(field_data);
}
// update average row data size
auto field_meta = (*schema_)[field_id];
if (IsVariableDataType(field_meta.get_data_type())) {
SegmentInternalInterface::set_field_avg_size(
field_id,
num_rows,
storage::GetByteSizeOfFieldDatas(field_data));
}
// update the mem size
stats_.mem_size += storage::GetByteSizeOfFieldDatas(field_data);
}
// step 5: update small indexes
insert_record_.ack_responder_.AddSegment(reserved_offset,
reserved_offset + num_rows);
}
SegcoreError
SegmentGrowingImpl::Delete(int64_t reserved_begin,
int64_t size,

View File

@ -64,8 +64,6 @@ class SegmentGrowingImpl : public SegmentGrowing {
void
LoadFieldData(const LoadFieldDataInfo& info) override;
void
LoadFieldDataV2(const LoadFieldDataInfo& info) override;
void
RemoveDuplicatePkRecords() override;

View File

@ -115,9 +115,6 @@ class SegmentInterface {
virtual void
LoadFieldData(const LoadFieldDataInfo& info) = 0;
virtual void
LoadFieldDataV2(const LoadFieldDataInfo& info) = 0;
virtual void
RemoveDuplicatePkRecords() = 0;

View File

@ -281,59 +281,6 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) {
}
}
void
SegmentSealedImpl::LoadFieldDataV2(const LoadFieldDataInfo& load_info) {
// TODO(SPARSE): support storage v2
// NOTE: lock only when data is ready to avoid starvation
// only one field for now, parallel load field data in golang
size_t num_rows = storage::GetNumRowsForLoadInfo(load_info);
for (auto& [id, info] : load_info.field_infos) {
AssertInfo(info.row_count > 0, "The row count of field data is 0");
auto field_id = FieldId(id);
auto insert_files = info.insert_files;
auto field_data_info =
FieldDataInfo(field_id.get(), num_rows, load_info.mmap_dir_path);
LOG_INFO("segment {} loads field {} with num_rows {}",
this->get_segment_id(),
field_id.get(),
num_rows);
auto parallel_degree = static_cast<uint64_t>(
DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
field_data_info.channel->set_capacity(parallel_degree * 2);
auto& pool =
ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
// auto load_future = pool.Submit(
// LoadFieldDatasFromRemote, insert_files, field_data_info.channel);
auto res = milvus_storage::Space::Open(
load_info.url,
milvus_storage::Options{nullptr, load_info.storage_version});
AssertInfo(res.ok(),
fmt::format("init space failed: {}, error: {}",
load_info.url,
res.status().ToString()));
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
auto load_future = pool.Submit(
LoadFieldDatasFromRemote2, space, schema_, field_data_info);
LOG_INFO("segment {} submits load field {} task to thread pool",
this->get_segment_id(),
field_id.get());
if (load_info.mmap_dir_path.empty() ||
SystemProperty::Instance().IsSystem(field_id)) {
LoadFieldData(field_id, field_data_info);
} else {
MapFieldData(field_id, field_data_info);
}
LOG_INFO("segment {} loads field {} done",
this->get_segment_id(),
field_id.get());
}
}
void
SegmentSealedImpl::RemoveDuplicatePkRecords() {
std::unique_lock lck(mutex_);

View File

@ -50,8 +50,6 @@ class SegmentSealedImpl : public SegmentSealed {
LoadIndex(const LoadIndexInfo& info) override;
void
LoadFieldData(const LoadFieldDataInfo& info) override;
void
LoadFieldDataV2(const LoadFieldDataInfo& info) override;
// erase duplicate records when sealed segment loaded done
void
RemoveDuplicatePkRecords() override;

View File

@ -780,35 +780,7 @@ ReverseDataFromIndex(const index::IndexBase* index,
return data_array;
}
void
LoadFieldDatasFromRemote2(std::shared_ptr<milvus_storage::Space> space,
SchemaPtr schema,
FieldDataInfo& field_data_info) {
auto reader = space->ScanData();
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {
PanicInfo(DataFormatBroken, "failed to read data");
}
auto data = rec.ValueUnsafe();
auto total_num_rows = data->num_rows();
for (auto& field : schema->get_fields()) {
if (field.second.get_id().get() != field_data_info.field_id) {
continue;
}
auto col_data =
data->GetColumnByName(field.second.get_name().get());
auto field_data = storage::CreateFieldData(
field.second.get_data_type(),
field.second.is_nullable(),
field.second.is_vector() ? field.second.get_dim() : 0,
total_num_rows);
field_data->FillFieldData(col_data);
field_data_info.channel->push(field_data);
}
}
field_data_info.channel->close();
}
// init segcore storage config first, and create default remote chunk manager
// segcore use default remote chunk manager to load data from minio/s3
void

View File

@ -28,7 +28,6 @@
#include "log/Log.h"
#include "segcore/DeletedRecord.h"
#include "segcore/InsertRecord.h"
#include "storage/space.h"
namespace milvus::segcore {
@ -119,10 +118,6 @@ void
LoadFieldDatasFromRemote(const std::vector<std::string>& remote_files,
FieldDataChannelPtr channel);
void
LoadFieldDatasFromRemote2(std::shared_ptr<milvus_storage::Space> space,
SchemaPtr schema,
FieldDataInfo& field_data_info);
/**
* Returns an index pointing to the first element in the range [first, last) such that `value < element` is true
* (i.e. that is strictly greater than value), or last if no such element is found.

View File

@ -318,77 +318,6 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) {
}
}
CStatus
AppendIndexV3(CLoadIndexInfo c_load_index_info) {
try {
auto load_index_info =
(milvus::segcore::LoadIndexInfo*)c_load_index_info;
auto& index_params = load_index_info->index_params;
auto field_type = load_index_info->field_type;
milvus::index::CreateIndexInfo index_info;
index_info.field_type = load_index_info->field_type;
// get index type
AssertInfo(index_params.find("index_type") != index_params.end(),
"index type is empty");
index_info.index_type = index_params.at("index_type");
// get metric type
if (milvus::IsVectorDataType(field_type)) {
AssertInfo(index_params.find("metric_type") != index_params.end(),
"metric type is empty for vector index");
index_info.metric_type = index_params.at("metric_type");
}
milvus::storage::FieldDataMeta field_meta{
load_index_info->collection_id,
load_index_info->partition_id,
load_index_info->segment_id,
load_index_info->field_id};
milvus::storage::IndexMeta index_meta{load_index_info->segment_id,
load_index_info->field_id,
load_index_info->index_build_id,
load_index_info->index_version};
auto config = milvus::index::ParseConfigFromIndexParams(
load_index_info->index_params);
auto res = milvus_storage::Space::Open(
load_index_info->uri,
milvus_storage::Options{nullptr,
load_index_info->index_store_version});
AssertInfo(res.ok(), "init space failed");
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
milvus::storage::FileManagerContext fileManagerContext(
field_meta, index_meta, nullptr, space);
load_index_info->index =
milvus::index::IndexFactory::GetInstance().CreateIndex(
index_info, fileManagerContext, space);
if (!load_index_info->mmap_dir_path.empty() &&
load_index_info->index->IsMmapSupported()) {
auto filepath =
std::filesystem::path(load_index_info->mmap_dir_path) /
std::to_string(load_index_info->segment_id) /
std::to_string(load_index_info->field_id) /
std::to_string(load_index_info->index_id);
config[kMmapFilepath] = filepath.string();
}
load_index_info->index->LoadV2(config);
auto status = CStatus();
status.error_code = milvus::Success;
status.error_msg = "";
return status;
} catch (std::exception& e) {
auto status = CStatus();
status.error_code = milvus::UnexpectedError;
status.error_msg = strdup(e.what());
return status;
}
}
CStatus
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) {
try {

View File

@ -62,9 +62,6 @@ AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* file_path);
CStatus
AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info);
CStatus
AppendIndexV3(CLoadIndexInfo c_load_index_info);
CStatus
AppendIndexEngineVersionToLoadInfo(CLoadIndexInfo c_load_index_info,
int32_t index_engine_version);

View File

@ -29,7 +29,6 @@
#include "storage/Util.h"
#include "futures/Future.h"
#include "futures/Executor.h"
#include "storage/space.h"
////////////////////////////// common interfaces //////////////////////////////
CStatus
@ -339,20 +338,6 @@ RemoveDuplicatePkRecords(CSegmentInterface c_segment) {
}
}
CStatus
LoadFieldDataV2(CSegmentInterface c_segment,
CLoadFieldDataInfo c_load_field_data_info) {
try {
auto segment =
reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
AssertInfo(segment != nullptr, "segment conversion failed");
auto load_info = (LoadFieldDataInfo*)c_load_field_data_info;
segment->LoadFieldDataV2(*load_info);
return milvus::SuccessCStatus();
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}
// just for test
CStatus
LoadFieldRawData(CSegmentInterface c_segment,

View File

@ -102,10 +102,6 @@ CStatus
LoadFieldData(CSegmentInterface c_segment,
CLoadFieldDataInfo load_field_data_info);
CStatus
LoadFieldDataV2(CSegmentInterface c_segment,
CLoadFieldDataInfo load_field_data_info);
CStatus
RemoveDuplicatePkRecords(CSegmentInterface c_segment);

View File

@ -69,7 +69,6 @@ if (DEFINED AZURE_BUILD_DIR)
"-L${AZURE_BUILD_DIR} -lblob-chunk-manager"
blob-chunk-manager
milvus_common
milvus-storage
milvus_monitor
pthread
${CONAN_LIBS}
@ -77,7 +76,6 @@ if (DEFINED AZURE_BUILD_DIR)
else ()
target_link_libraries(milvus_storage PUBLIC
milvus_common
milvus-storage
milvus_monitor
pthread
${CONAN_LIBS}

View File

@ -45,16 +45,6 @@
#include "storage/Util.h"
namespace milvus::storage {
DiskFileManagerImpl::DiskFileManagerImpl(
const FileManagerContext& fileManagerContext,
std::shared_ptr<milvus_storage::Space> space)
: FileManagerImpl(fileManagerContext.fieldDataMeta,
fileManagerContext.indexMeta),
space_(space) {
rcm_ = fileManagerContext.chunkManagerPtr;
}
DiskFileManagerImpl::DiskFileManagerImpl(
const FileManagerContext& fileManagerContext)
: FileManagerImpl(fileManagerContext.fieldDataMeta,
@ -78,39 +68,10 @@ std::string
DiskFileManagerImpl::GetRemoteIndexPath(const std::string& file_name,
int64_t slice_num) const {
std::string remote_prefix;
if (space_ != nullptr) {
remote_prefix = GetRemoteIndexObjectPrefixV2();
} else {
remote_prefix = GetRemoteIndexObjectPrefix();
}
remote_prefix = GetRemoteIndexObjectPrefix();
return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num);
}
bool
DiskFileManagerImpl::AddFileUsingSpace(
const std::string& local_file_name,
const std::vector<int64_t>& local_file_offsets,
const std::vector<std::string>& remote_files,
const std::vector<int64_t>& remote_file_sizes) {
auto local_chunk_manager =
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
for (int64_t i = 0; i < remote_files.size(); ++i) {
auto buf =
std::shared_ptr<uint8_t[]>(new uint8_t[remote_file_sizes[i]]);
local_chunk_manager->Read(local_file_name,
local_file_offsets[i],
buf.get(),
remote_file_sizes[i]);
auto status =
space_->WriteBlob(remote_files[i], buf.get(), remote_file_sizes[i]);
if (!status.ok()) {
return false;
}
}
return true;
}
bool
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
auto local_chunk_manager =
@ -204,85 +165,17 @@ DiskFileManagerImpl::AddBatchIndexFiles(
}
std::map<std::string, int64_t> res;
if (space_ != nullptr) {
res = PutIndexData(space_,
data_slices,
remote_file_sizes,
remote_files,
field_meta_,
index_meta_);
} else {
res = PutIndexData(rcm_.get(),
data_slices,
remote_file_sizes,
remote_files,
field_meta_,
index_meta_);
}
res = PutIndexData(rcm_.get(),
data_slices,
remote_file_sizes,
remote_files,
field_meta_,
index_meta_);
for (auto& re : res) {
remote_paths_to_size_[re.first] = re.second;
}
}
void
DiskFileManagerImpl::CacheIndexToDisk() {
auto blobs = space_->StatisticsBlobs();
std::vector<std::string> remote_files;
for (auto& blob : blobs) {
remote_files.push_back(blob.name);
}
auto local_chunk_manager =
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
std::map<std::string, std::vector<int>> index_slices;
for (auto& file_path : remote_files) {
auto pos = file_path.find_last_of("_");
index_slices[file_path.substr(0, pos)].emplace_back(
std::stoi(file_path.substr(pos + 1)));
}
for (auto& slices : index_slices) {
std::sort(slices.second.begin(), slices.second.end());
}
auto EstimateParallelDegree = [&](const std::string& file) -> uint64_t {
auto fileSize = space_->GetBlobByteSize(file);
return uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / fileSize.value());
};
for (auto& slices : index_slices) {
auto prefix = slices.first;
auto local_index_file_name =
GetLocalIndexObjectPrefix() +
prefix.substr(prefix.find_last_of('/') + 1);
local_chunk_manager->CreateFile(local_index_file_name);
int64_t offset = 0;
std::vector<std::string> batch_remote_files;
uint64_t max_parallel_degree = INT_MAX;
for (int& iter : slices.second) {
if (batch_remote_files.size() == max_parallel_degree) {
auto next_offset = CacheBatchIndexFilesToDiskV2(
batch_remote_files, local_index_file_name, offset);
offset = next_offset;
batch_remote_files.clear();
}
auto origin_file = prefix + "_" + std::to_string(iter);
if (batch_remote_files.size() == 0) {
// Use first file size as average size to estimate
max_parallel_degree = EstimateParallelDegree(origin_file);
}
batch_remote_files.push_back(origin_file);
}
if (batch_remote_files.size() > 0) {
auto next_offset = CacheBatchIndexFilesToDiskV2(
batch_remote_files, local_index_file_name, offset);
offset = next_offset;
batch_remote_files.clear();
}
local_paths_.emplace_back(local_index_file_name);
}
}
void
DiskFileManagerImpl::CacheIndexToDisk(
const std::vector<std::string>& remote_files) {
@ -329,111 +222,6 @@ DiskFileManagerImpl::CacheIndexToDisk(
}
}
uint64_t
DiskFileManagerImpl::CacheBatchIndexFilesToDisk(
const std::vector<std::string>& remote_files,
const std::string& local_file_name,
uint64_t local_file_init_offfset) {
auto local_chunk_manager =
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto index_datas = GetObjectData(rcm_.get(), remote_files);
int batch_size = remote_files.size();
AssertInfo(index_datas.size() == batch_size,
"inconsistent file num and index data num!");
uint64_t offset = local_file_init_offfset;
for (int i = 0; i < batch_size; ++i) {
auto index_data = index_datas[i].get()->GetFieldData();
auto index_size = index_data->Size();
auto uint8_data =
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data()));
local_chunk_manager->Write(
local_file_name, offset, uint8_data, index_size);
offset += index_size;
}
return offset;
}
uint64_t
DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2(
const std::vector<std::string>& remote_files,
const std::string& local_file_name,
uint64_t local_file_init_offfset) {
auto local_chunk_manager =
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto index_datas = GetObjectData(space_, remote_files);
int batch_size = remote_files.size();
AssertInfo(index_datas.size() == batch_size,
"inconsistent file num and index data num!");
uint64_t offset = local_file_init_offfset;
for (int i = 0; i < batch_size; ++i) {
auto index_data = index_datas[i];
auto index_size = index_data->Size();
auto uint8_data =
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data()));
local_chunk_manager->Write(
local_file_name, offset, uint8_data, index_size);
offset += index_size;
}
return offset;
}
template <typename DataType>
std::string
DiskFileManagerImpl::CacheRawDataToDisk(
std::shared_ptr<milvus_storage::Space> space) {
auto segment_id = GetFieldDataMeta().segment_id;
auto field_id = GetFieldDataMeta().field_id;
auto local_chunk_manager =
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto local_data_path = storage::GenFieldRawDataPathPrefix(
local_chunk_manager, segment_id, field_id) +
"raw_data";
local_chunk_manager->CreateFile(local_data_path);
// file format
// num_rows(uint32) | dim(uint32) | index_data ([]uint8_t)
uint32_t num_rows = 0;
uint32_t dim = 0;
int64_t write_offset = sizeof(num_rows) + sizeof(dim);
auto reader = space->ScanData();
for (auto rec : *reader) {
if (!rec.ok()) {
PanicInfo(IndexBuildError,
fmt::format("failed to read data: {}",
rec.status().ToString()));
}
auto data = rec.ValueUnsafe();
if (data == nullptr) {
break;
}
auto total_num_rows = data->num_rows();
num_rows += total_num_rows;
auto col_data = data->GetColumnByName(index_meta_.field_name);
auto field_data = storage::CreateFieldData(
index_meta_.field_type, false, index_meta_.dim, total_num_rows);
field_data->FillFieldData(col_data);
dim = field_data->get_dim();
auto data_size =
field_data->get_num_rows() * milvus::GetVecRowSize<DataType>(dim);
local_chunk_manager->Write(local_data_path,
write_offset,
const_cast<void*>(field_data->Data()),
data_size);
write_offset += data_size;
}
// write num_rows and dim value to file header
write_offset = 0;
local_chunk_manager->Write(
local_data_path, write_offset, &num_rows, sizeof(num_rows));
write_offset += sizeof(num_rows);
local_chunk_manager->Write(
local_data_path, write_offset, &dim, sizeof(dim));
return local_data_path;
}
void
SortByPath(std::vector<std::string>& paths) {
std::sort(paths.begin(),
@ -682,92 +470,6 @@ WriteOptFieldsIvfMeta(
write_offset += sizeof(num_of_fields);
}
// write optional scalar fields ivf info in the following format without space among them
// | (meta)
// | version (uint8_t) | num_of_fields (uint32_t) |
// | (field_0)
// | field_id (int64_t) | num_of_unique_field_data (uint32_t)
// | size_0 (uint32_t) | offset_0 (uint32_t)...
// | size_1 | offset_0, offset_1, ...
std::string
DiskFileManagerImpl::CacheOptFieldToDisk(
std::shared_ptr<milvus_storage::Space> space, OptFieldT& fields_map) {
const uint32_t num_of_fields = fields_map.size();
if (0 == num_of_fields) {
return "";
} else if (num_of_fields > 1) {
PanicInfo(
ErrorCode::NotImplemented,
"vector index build with multiple fields is not supported yet");
}
if (nullptr == space) {
LOG_ERROR("Failed to cache optional field. Space is null");
return "";
}
auto segment_id = GetFieldDataMeta().segment_id;
auto vec_field_id = GetFieldDataMeta().field_id;
auto local_chunk_manager =
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto local_data_path = storage::GenFieldRawDataPathPrefix(
local_chunk_manager, segment_id, vec_field_id) +
std::string(VEC_OPT_FIELDS);
local_chunk_manager->CreateFile(local_data_path);
uint64_t write_offset = 0;
WriteOptFieldsIvfMeta(
local_chunk_manager, local_data_path, num_of_fields, write_offset);
std::unordered_set<int64_t> actual_field_ids;
auto reader = space->ScanData();
for (auto& [field_id, tup] : fields_map) {
const auto& field_name = std::get<0>(tup);
const auto& field_type = std::get<1>(tup);
std::vector<FieldDataPtr> field_datas;
for (auto rec : *reader) {
if (!rec.ok()) {
PanicInfo(IndexBuildError,
fmt::format("failed to read optional field data: {}",
rec.status().ToString()));
}
auto data = rec.ValueUnsafe();
if (data == nullptr) {
break;
}
auto total_num_rows = data->num_rows();
if (0 == total_num_rows) {
LOG_WARN("optional field {} has no data", field_name);
return "";
}
auto col_data = data->GetColumnByName(field_name);
auto field_data =
storage::CreateFieldData(field_type, false, 1, total_num_rows);
field_data->FillFieldData(col_data);
field_datas.emplace_back(field_data);
}
if (WriteOptFieldIvfData(field_type,
field_id,
local_chunk_manager,
local_data_path,
field_datas,
write_offset)) {
actual_field_ids.insert(field_id);
}
}
if (actual_field_ids.size() != num_of_fields) {
write_offset = 0;
WriteOptFieldsIvfMeta(local_chunk_manager,
local_data_path,
actual_field_ids.size(),
write_offset);
if (actual_field_ids.empty()) {
return "";
}
}
return local_data_path;
}
std::string
DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
const uint32_t num_of_fields = fields_map.size();
@ -904,17 +606,4 @@ DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(
std::vector<std::string> remote_files);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<float>(
std::shared_ptr<milvus_storage::Space> space);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
std::shared_ptr<milvus_storage::Space> space);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
std::shared_ptr<milvus_storage::Space> space);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(
std::shared_ptr<milvus_storage::Space> space);
} // namespace milvus::storage

View File

@ -25,8 +25,6 @@
#include "storage/IndexData.h"
#include "storage/FileManager.h"
#include "storage/ChunkManager.h"
#include "storage/space.h"
#include "common/Consts.h"
namespace milvus::storage {
@ -35,9 +33,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
public:
explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext);
explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext,
std::shared_ptr<milvus_storage::Space> space);
virtual ~DiskFileManagerImpl();
virtual bool
@ -77,19 +72,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
void
CacheIndexToDisk(const std::vector<std::string>& remote_files);
void
CacheIndexToDisk();
uint64_t
CacheBatchIndexFilesToDisk(const std::vector<std::string>& remote_files,
const std::string& local_file_name,
uint64_t local_file_init_offfset);
uint64_t
CacheBatchIndexFilesToDiskV2(const std::vector<std::string>& remote_files,
const std::string& local_file_name,
uint64_t local_file_init_offfset);
void
AddBatchIndexFiles(const std::string& local_file_name,
const std::vector<int64_t>& local_file_offsets,
@ -100,27 +82,12 @@ class DiskFileManagerImpl : public FileManagerImpl {
std::string
CacheRawDataToDisk(std::vector<std::string> remote_files);
template <typename DataType>
std::string
CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space);
std::string
CacheOptFieldToDisk(OptFieldT& fields_map);
std::string
CacheOptFieldToDisk(std::shared_ptr<milvus_storage::Space> space,
OptFieldT& fields_map);
virtual bool
AddFileUsingSpace(const std::string& local_file_name,
const std::vector<int64_t>& local_file_offsets,
const std::vector<std::string>& remote_files,
const std::vector<int64_t>& remote_file_sizes);
std::string
GetRemoteIndexPrefix() const {
return space_ != nullptr ? GetRemoteIndexObjectPrefixV2()
: GetRemoteIndexObjectPrefix();
return GetRemoteIndexObjectPrefix();
}
private:
@ -141,8 +108,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
// remote file path
std::map<std::string, int64_t> remote_paths_to_size_;
std::shared_ptr<milvus_storage::Space> space_;
};
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskFileManagerImpl>;

View File

@ -25,7 +25,6 @@
#include "log/Log.h"
#include "storage/ChunkManager.h"
#include "storage/Types.h"
#include "storage/space.h"
namespace milvus::storage {
@ -40,15 +39,6 @@ struct FileManagerContext {
chunkManagerPtr(chunkManagerPtr) {
}
FileManagerContext(const FieldDataMeta& fieldDataMeta,
const IndexMeta& indexMeta,
const ChunkManagerPtr& chunkManagerPtr,
std::shared_ptr<milvus_storage::Space> space)
: fieldDataMeta(fieldDataMeta),
indexMeta(indexMeta),
chunkManagerPtr(chunkManagerPtr),
space_(space) {
}
bool
Valid() const {
return chunkManagerPtr != nullptr;
@ -57,7 +47,6 @@ struct FileManagerContext {
FieldDataMeta fieldDataMeta;
IndexMeta indexMeta;
ChunkManagerPtr chunkManagerPtr;
std::shared_ptr<milvus_storage::Space> space_;
};
#define FILEMANAGER_TRY try {

View File

@ -26,15 +26,6 @@
namespace milvus::storage {
MemFileManagerImpl::MemFileManagerImpl(
const FileManagerContext& fileManagerContext,
std::shared_ptr<milvus_storage::Space> space)
: FileManagerImpl(fileManagerContext.fieldDataMeta,
fileManagerContext.indexMeta),
space_(space) {
rcm_ = fileManagerContext.chunkManagerPtr;
}
MemFileManagerImpl::MemFileManagerImpl(
const FileManagerContext& fileManagerContext)
: FileManagerImpl(fileManagerContext.fieldDataMeta,
@ -91,50 +82,6 @@ MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
return true;
}
bool
MemFileManagerImpl::AddFileV2(const BinarySet& binary_set) {
std::vector<const uint8_t*> data_slices;
std::vector<int64_t> slice_sizes;
std::vector<std::string> slice_names;
auto AddBatchIndexFiles = [&]() {
auto res = PutIndexData(space_,
data_slices,
slice_sizes,
slice_names,
field_meta_,
index_meta_);
for (auto& [file, size] : res) {
remote_paths_to_size_[file] = size;
}
};
auto remotePrefix = GetRemoteIndexObjectPrefixV2();
int64_t batch_size = 0;
for (auto iter = binary_set.binary_map_.begin();
iter != binary_set.binary_map_.end();
iter++) {
if (batch_size >= DEFAULT_FIELD_MAX_MEMORY_LIMIT) {
AddBatchIndexFiles();
data_slices.clear();
slice_sizes.clear();
slice_names.clear();
batch_size = 0;
}
data_slices.emplace_back(iter->second->data.get());
slice_sizes.emplace_back(iter->second->size);
slice_names.emplace_back(remotePrefix + "/" + iter->first);
batch_size += iter->second->size;
}
if (data_slices.size() > 0) {
AddBatchIndexFiles();
}
return true;
}
bool
MemFileManagerImpl::LoadFile(const std::string& filename) noexcept {
return true;

View File

@ -25,7 +25,6 @@
#include "storage/IndexData.h"
#include "storage/FileManager.h"
#include "storage/ChunkManager.h"
#include "storage/space.h"
namespace milvus::storage {
@ -33,9 +32,6 @@ class MemFileManagerImpl : public FileManagerImpl {
public:
explicit MemFileManagerImpl(const FileManagerContext& fileManagerContext);
MemFileManagerImpl(const FileManagerContext& fileManagerContext,
std::shared_ptr<milvus_storage::Space> space);
virtual bool
LoadFile(const std::string& filename) noexcept;
@ -63,14 +59,6 @@ class MemFileManagerImpl : public FileManagerImpl {
bool
AddFile(const BinarySet& binary_set);
bool
AddFileV2(const BinarySet& binary_set);
std::shared_ptr<milvus_storage::Space>
space() const {
return space_;
}
std::map<std::string, int64_t>
GetRemotePathsToFileSize() const {
return remote_paths_to_size_;
@ -79,7 +67,6 @@ class MemFileManagerImpl : public FileManagerImpl {
private:
// remote file path
std::map<std::string, int64_t> remote_paths_to_size_;
std::shared_ptr<milvus_storage::Space> space_;
};
using MemFileManagerImplPtr = std::shared_ptr<MemFileManagerImpl>;

View File

@ -516,22 +516,6 @@ DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
return DeserializeFileData(buf, fileSize);
}
std::unique_ptr<DataCodec>
DownloadAndDecodeRemoteFileV2(std::shared_ptr<milvus_storage::Space> space,
const std::string& file) {
auto fileSize = space->GetBlobByteSize(file);
if (!fileSize.ok()) {
PanicInfo(FileReadFailed, fileSize.status().ToString());
}
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[fileSize.value()]);
auto status = space->ReadBlob(file, buf.get());
if (!status.ok()) {
PanicInfo(FileReadFailed, status.ToString());
}
return DeserializeFileData(buf, fileSize.value());
}
std::pair<std::string, size_t>
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
uint8_t* buf,
@ -551,27 +535,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
return std::make_pair(std::move(object_key), serialized_index_size);
}
std::pair<std::string, size_t>
EncodeAndUploadIndexSlice2(std::shared_ptr<milvus_storage::Space> space,
uint8_t* buf,
int64_t batch_size,
IndexMeta index_meta,
FieldDataMeta field_meta,
std::string object_key) {
// todo: support nullable index
auto field_data = CreateFieldData(DataType::INT8, false);
field_data->FillFieldData(buf, batch_size);
auto indexData = std::make_shared<IndexData>(field_data);
indexData->set_index_meta(index_meta);
indexData->SetFieldDataMeta(field_meta);
auto serialized_index_data = indexData->serialize_to_remote_file();
auto serialized_index_size = serialized_index_data.size();
auto status = space->WriteBlob(
object_key, serialized_index_data.data(), serialized_index_size);
AssertInfo(status.ok(), "write to space error: {}", status.ToString());
return std::make_pair(std::move(object_key), serialized_index_size);
}
std::pair<std::string, size_t>
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
void* buf,
@ -609,36 +572,6 @@ GetObjectData(ChunkManager* remote_chunk_manager,
return futures;
}
std::vector<FieldDataPtr>
GetObjectData(std::shared_ptr<milvus_storage::Space> space,
const std::vector<std::string>& remote_files) {
auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH);
std::vector<std::future<std::unique_ptr<DataCodec>>> futures;
for (auto& file : remote_files) {
futures.emplace_back(
pool.Submit(DownloadAndDecodeRemoteFileV2, space, file));
}
std::vector<FieldDataPtr> datas;
std::exception_ptr first_exception = nullptr;
for (auto& future : futures) {
try {
auto res = future.get();
datas.emplace_back(res->GetFieldData());
} catch (...) {
if (!first_exception) {
first_exception = std::current_exception();
}
}
}
ReleaseArrowUnused();
if (first_exception) {
std::rethrow_exception(first_exception);
}
return datas;
}
std::map<std::string, int64_t>
PutIndexData(ChunkManager* remote_chunk_manager,
const std::vector<const uint8_t*>& data_slices,
@ -687,54 +620,6 @@ PutIndexData(ChunkManager* remote_chunk_manager,
return remote_paths_to_size;
}
std::map<std::string, int64_t>
PutIndexData(std::shared_ptr<milvus_storage::Space> space,
const std::vector<const uint8_t*>& data_slices,
const std::vector<int64_t>& slice_sizes,
const std::vector<std::string>& slice_names,
FieldDataMeta& field_meta,
IndexMeta& index_meta) {
auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
std::vector<std::future<std::pair<std::string, size_t>>> futures;
AssertInfo(data_slices.size() == slice_sizes.size(),
"inconsistent data slices size {} with slice sizes {}",
data_slices.size(),
slice_sizes.size());
AssertInfo(data_slices.size() == slice_names.size(),
"inconsistent data slices size {} with slice names size {}",
data_slices.size(),
slice_names.size());
for (int64_t i = 0; i < data_slices.size(); ++i) {
futures.push_back(pool.Submit(EncodeAndUploadIndexSlice2,
space,
const_cast<uint8_t*>(data_slices[i]),
slice_sizes[i],
index_meta,
field_meta,
slice_names[i]));
}
std::map<std::string, int64_t> remote_paths_to_size;
std::exception_ptr first_exception = nullptr;
for (auto& future : futures) {
try {
auto res = future.get();
remote_paths_to_size[res.first] = res.second;
} catch (...) {
if (!first_exception) {
first_exception = std::current_exception();
}
}
}
ReleaseArrowUnused();
if (first_exception) {
std::rethrow_exception(first_exception);
}
return remote_paths_to_size;
}
int64_t
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas) {
int64_t count = 0;

View File

@ -31,7 +31,6 @@
#include "storage/ChunkManager.h"
#include "storage/DataCodec.h"
#include "storage/Types.h"
#include "storage/space.h"
namespace milvus::storage {
@ -89,10 +88,6 @@ std::unique_ptr<DataCodec>
DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
const std::string& file);
std::unique_ptr<DataCodec>
DownloadAndDecodeRemoteFileV2(std::shared_ptr<milvus_storage::Space> space,
const std::string& file);
std::pair<std::string, size_t>
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
uint8_t* buf,
@ -102,13 +97,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
std::string object_key);
std::pair<std::string, size_t>
EncodeAndUploadIndexSlice2(std::shared_ptr<milvus_storage::Space> space,
uint8_t* buf,
int64_t batch_size,
IndexMeta index_meta,
FieldDataMeta field_meta,
std::string object_key);
std::pair<std::string, size_t>
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
void* buf,
int64_t element_count,
@ -120,10 +108,6 @@ std::vector<std::future<std::unique_ptr<DataCodec>>>
GetObjectData(ChunkManager* remote_chunk_manager,
const std::vector<std::string>& remote_files);
std::vector<FieldDataPtr>
GetObjectData(std::shared_ptr<milvus_storage::Space> space,
const std::vector<std::string>& remote_files);
std::map<std::string, int64_t>
PutIndexData(ChunkManager* remote_chunk_manager,
const std::vector<const uint8_t*>& data_slices,
@ -132,13 +116,6 @@ PutIndexData(ChunkManager* remote_chunk_manager,
FieldDataMeta& field_meta,
IndexMeta& index_meta);
std::map<std::string, int64_t>
PutIndexData(std::shared_ptr<milvus_storage::Space> space,
const std::vector<const uint8_t*>& data_slices,
const std::vector<int64_t>& slice_sizes,
const std::vector<std::string>& slice_names,
FieldDataMeta& field_meta,
IndexMeta& index_meta);
int64_t
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas);

View File

@ -41,8 +41,6 @@ if (USE_OPENDAL)
endif()
add_subdirectory(tantivy)
add_subdirectory(milvus-storage)
if (LINUX)
add_subdirectory(jemalloc)
endif()

View File

@ -1,48 +0,0 @@
#-------------------------------------------------------------------------------
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
set( MILVUS_STORAGE_VERSION 9d1ad9c)
message(STATUS "Building milvus-storage-${MILVUS_STORAGE_VERSION} from source")
message(STATUS ${CMAKE_BUILD_TYPE})
# message(FATAL_ERROR ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch)
# set(milvus-storage-patch git apply --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch)
set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} )
FetchContent_Declare(
milvus-storage
GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git"
GIT_TAG ${MILVUS_STORAGE_VERSION}
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-src
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-build
SOURCE_SUBDIR cpp
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage_CMakeLists.txt <SOURCE_DIR>/cpp/CMakeLists.txt
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
FetchContent_MakeAvailable(milvus-storage)
# target_compile_features(milvus-storage PUBLIC cxx_std_20)
# FetchContent_GetProperties( milvus-storage )
# if ( NOT milvus-storage_POPULATED )
# FetchContent_Populate( milvus-storage)
# # Adding the following target:
# add_subdirectory( ${milvus-storage_SOURCE_DIR}/cpp
# ${milvus-storage_BINARY_DIR} )
# endif()
# message(FATAL_ERROR ${milvus-storage_SOURCE_DIR} ${milvus-storage_BINARY_DIR})
# get prometheus COMPILE_OPTIONS
# get_property( var DIRECTORY "${milvus-storage_SOURCE_DIR}" PROPERTY COMPILE_OPTIONS )
message( STATUS "milvus-storage src compile options: ${var}" )
# unset(CMAKE_CXX_STANDARD)

View File

@ -1,34 +0,0 @@
cmake_minimum_required(VERSION 3.20.0)
project(milvus-storage VERSION 0.1.0)
option(WITH_UT "Build the testing tree." ON)
option(WITH_ASAN "Build with address sanitizer." OFF)
option(USE_OPENDAL "Build with opendal." OFF)
if (USE_OPENDAL)
add_compile_definitions(MILVUS_OPENDAL)
endif()
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
find_package(Boost REQUIRED)
find_package(Arrow REQUIRED)
find_package(Protobuf REQUIRED)
find_package(glog REQUIRED)
find_package(AWSSDK REQUIRED)
file(GLOB_RECURSE SRC_FILES src/*.cpp src/*.cc)
message(STATUS "SRC_FILES: ${SRC_FILES}")
add_library(milvus-storage ${SRC_FILES})
target_include_directories(milvus-storage PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/milvus-storage ${CMAKE_CURRENT_SOURCE_DIR}/src)
target_link_libraries(milvus-storage PUBLIC arrow::arrow Boost::boost protobuf::protobuf AWS::aws-sdk-cpp-core glog::glog)
if (USE_OPENDAL)
target_link_libraries(milvus-storage PUBLIC opendal)
endif()
if (WITH_UT)
enable_testing()
add_subdirectory(test)
endif()

View File

@ -36,9 +36,6 @@
#include "storage/InsertData.h"
#include "storage/ThreadPool.h"
#include "storage/Types.h"
#include "storage/options.h"
#include "storage/schema.h"
#include "storage/space.h"
#include "storage/Util.h"
#include "storage/DiskFileManagerImpl.h"
#include "storage/LocalChunkManagerSingleton.h"
@ -285,62 +282,6 @@ PrepareInsertData(const int64_t opt_field_data_range) -> std::string {
return path;
}
auto
PrepareInsertDataSpace(const int64_t opt_field_data_range)
-> std::pair<std::string, std::shared_ptr<milvus_storage::Space>> {
std::string path = kOptFieldPath + "space/" + std::to_string(kOptFieldId);
arrow::FieldVector arrow_fields{
arrow::field("pk", arrow::int64()),
arrow::field("ts", arrow::int64()),
arrow::field(kOptFieldName, arrow::int64()),
arrow::field("vec", arrow::fixed_size_binary(1))};
auto arrow_schema = std::make_shared<arrow::Schema>(arrow_fields);
milvus_storage::SchemaOptions schema_options = {
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
auto schema =
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
boost::filesystem::remove_all(path);
boost::filesystem::create_directories(path);
EXPECT_TRUE(schema->Validate().ok());
auto opt_space = milvus_storage::Space::Open(
"file://" + boost::filesystem::canonical(path).string(),
milvus_storage::Options{schema});
EXPECT_TRUE(opt_space.has_value());
auto space = std::move(opt_space.value());
const auto data = PrepareRawFieldData<int64_t>(opt_field_data_range);
arrow::Int64Builder pk_builder;
arrow::Int64Builder ts_builder;
arrow::NumericBuilder<arrow::Int64Type> scalar_builder;
arrow::FixedSizeBinaryBuilder vec_builder(arrow::fixed_size_binary(1));
const uint8_t kByteZero = 0;
for (size_t i = 0; i < kEntityCnt; ++i) {
EXPECT_TRUE(pk_builder.Append(i).ok());
EXPECT_TRUE(ts_builder.Append(i).ok());
EXPECT_TRUE(vec_builder.Append(&kByteZero).ok());
}
for (size_t i = 0; i < kEntityCnt; ++i) {
EXPECT_TRUE(scalar_builder.Append(data[i]).ok());
}
std::shared_ptr<arrow::Array> pk_array;
EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
std::shared_ptr<arrow::Array> ts_array;
EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
std::shared_ptr<arrow::Array> scalar_array;
EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok());
std::shared_ptr<arrow::Array> vec_array;
EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
auto batch =
arrow::RecordBatch::Make(arrow_schema,
kEntityCnt,
{pk_array, ts_array, scalar_array, vec_array});
milvus_storage::WriteOption write_opt = {kEntityCnt};
space->Write(*arrow::RecordBatchReader::Make({batch}, arrow_schema)
.ValueOrDie()
.get(),
write_opt);
return {path, std::move(space)};
}
template <DataType DT>
auto
PrepareOptionalField(const std::shared_ptr<DiskFileManagerImpl>& file_manager,
@ -400,47 +341,24 @@ CheckOptFieldCorrectness(
}
} // namespace
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskFieldEmpty) {
auto file_manager = CreateFileManager(cm_);
{
const auto& [insert_file_space_path, space] =
PrepareInsertDataSpace(kOptFieldDataRange);
OptFieldT opt_fields;
EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty());
EXPECT_TRUE(
file_manager->CacheOptFieldToDisk(space, opt_fields).empty());
}
{
auto opt_fileds =
PrepareOptionalField<DataType::INT64>(file_manager, "");
auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds);
EXPECT_TRUE(res.empty());
}
}
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) {
auto file_manager = CreateFileManager(cm_);
const auto insert_file_path =
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
const auto& [insert_file_space_path, space] =
PrepareInsertDataSpace(kOptFieldDataRange);
OptFieldT opt_fields =
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
opt_fields[kOptFieldId + 1] = {
kOptFieldName + "second", DataType::INT64, {insert_file_space_path}};
kOptFieldName + "second", DataType::INT64, {insert_file_path}};
EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError);
EXPECT_THROW(file_manager->CacheOptFieldToDisk(space, opt_fields),
SegcoreError);
}
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
auto file_manager = CreateFileManager(cm_);
const auto& [insert_file_path, space] =
PrepareInsertDataSpace(kOptFieldDataRange);
const auto insert_file_path =
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
auto opt_fileds =
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
ASSERT_FALSE(res.empty());
CheckOptFieldCorrectness(res);
}
@ -477,12 +395,4 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) {
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
ASSERT_TRUE(res.empty());
}
{
const auto& [insert_file_path, space] = PrepareInsertDataSpace(1);
auto opt_fileds = PrepareOptionalField<DataType::INT64>(
file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
ASSERT_TRUE(res.empty());
}
}
}

View File

@ -32,7 +32,6 @@
#include "index/IndexFactory.h"
#include "common/QueryResult.h"
#include "segcore/Types.h"
#include "storage/options.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "test_utils/storage_test_utils.h"
#include "test_utils/DataGen.h"
@ -916,261 +915,4 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) {
SearchResult result;
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
}
#endif
//class IndexTestV2
// : public ::testing::TestWithParam<std::tuple<Param, int64_t, bool>> {
// protected:
// std::shared_ptr<arrow::Schema>
// TestSchema(int vec_size) {
// arrow::FieldVector fields;
// fields.push_back(arrow::field("pk", arrow::int64()));
// fields.push_back(arrow::field("ts", arrow::int64()));
// fields.push_back(
// arrow::field("vec", arrow::fixed_size_binary(vec_size)));
// return std::make_shared<arrow::Schema>(fields);
// }
//
// std::shared_ptr<arrow::RecordBatchReader>
// TestRecords(int vec_size, GeneratedData& dataset) {
// arrow::Int64Builder pk_builder;
// arrow::Int64Builder ts_builder;
// arrow::FixedSizeBinaryBuilder vec_builder(
// arrow::fixed_size_binary(vec_size));
// if (!is_binary) {
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
// auto data = reinterpret_cast<char*>(xb_data.data());
// for (auto i = 0; i < NB; ++i) {
// EXPECT_TRUE(pk_builder.Append(i).ok());
// EXPECT_TRUE(ts_builder.Append(i).ok());
// EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok());
// }
// } else {
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
// for (auto i = 0; i < NB; ++i) {
// EXPECT_TRUE(pk_builder.Append(i).ok());
// EXPECT_TRUE(ts_builder.Append(i).ok());
// EXPECT_TRUE(
// vec_builder.Append(xb_bin_data.data() + i * vec_size).ok());
// }
// }
// std::shared_ptr<arrow::Array> pk_array;
// EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
// std::shared_ptr<arrow::Array> ts_array;
// EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
// std::shared_ptr<arrow::Array> vec_array;
// EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
// auto schema = TestSchema(vec_size);
// auto rec_batch = arrow::RecordBatch::Make(
// schema, NB, {pk_array, ts_array, vec_array});
// auto reader =
// arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie();
// return reader;
// }
//
// std::shared_ptr<milvus_storage::Space>
// TestSpace(int vec_size, GeneratedData& dataset) {
// auto arrow_schema = TestSchema(vec_size);
// auto schema_options = std::make_shared<milvus_storage::SchemaOptions>();
// schema_options->primary_column = "pk";
// schema_options->version_column = "ts";
// schema_options->vector_column = "vec";
// auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
// schema_options);
// EXPECT_TRUE(schema->Validate().ok());
//
// auto space_res = milvus_storage::Space::Open(
// "file://" + boost::filesystem::canonical(temp_path).string(),
// milvus_storage::Options{schema});
// EXPECT_TRUE(space_res.has_value());
//
// auto space = std::move(space_res.value());
// auto rec = TestRecords(vec_size, dataset);
// auto write_opt = milvus_storage::WriteOption{NB};
// space->Write(rec.get(), &write_opt);
// return std::move(space);
// }
//
// void
// SetUp() override {
// temp_path = boost::filesystem::temp_directory_path() /
// boost::filesystem::unique_path();
// boost::filesystem::create_directory(temp_path);
// storage_config_ = get_default_local_storage_config();
//
// auto param = GetParam();
// index_type = std::get<0>(param).first;
// metric_type = std::get<0>(param).second;
// file_slice_size = std::get<1>(param);
// enable_mmap = index_type != knowhere::IndexEnum::INDEX_DISKANN &&
// std::get<2>(param);
// if (enable_mmap) {
// mmap_file_path = boost::filesystem::temp_directory_path() /
// boost::filesystem::unique_path();
// }
// NB = 3000;
//
// // try to reduce the test time,
// // but the large dataset is needed for the case below.
// auto test_name = std::string(
// testing::UnitTest::GetInstance()->current_test_info()->name());
// if (test_name == "Mmap" &&
// index_type == knowhere::IndexEnum::INDEX_HNSW) {
// NB = 270000;
// }
// build_conf = generate_build_conf(index_type, metric_type);
// load_conf = generate_load_conf(index_type, metric_type, NB);
// search_conf = generate_search_conf(index_type, metric_type);
// range_search_conf = generate_range_search_conf(index_type, metric_type);
//
// std::map<knowhere::MetricType, bool> is_binary_map = {
// {knowhere::IndexEnum::INDEX_FAISS_IDMAP, false},
// {knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false},
// {knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, false},
// {knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, false},
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true},
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, true},
// {knowhere::IndexEnum::INDEX_HNSW, false},
// {knowhere::IndexEnum::INDEX_DISKANN, false},
// };
//
// is_binary = is_binary_map[index_type];
// int vec_size;
// if (is_binary) {
// vec_size = DIM / 8;
// vec_field_data_type = milvus::DataType::VECTOR_BINARY;
// } else {
// vec_size = DIM * 4;
// vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
// }
//
// auto dataset = GenDataset(NB, metric_type, is_binary);
// space = TestSpace(vec_size, dataset);
//
// if (!is_binary) {
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
// xq_dataset = knowhere::GenDataSet(
// NQ, DIM, xb_data.data() + DIM * query_offset);
// } else {
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
// xq_dataset = knowhere::GenDataSet(
// NQ, DIM, xb_bin_data.data() + DIM * query_offset);
// }
// }
//
// void
// TearDown() override {
// boost::filesystem::remove_all(temp_path);
// if (enable_mmap) {
// boost::filesystem::remove_all(mmap_file_path);
// }
// }
//
// protected:
// std::string index_type, metric_type;
// bool is_binary;
// milvus::Config build_conf;
// milvus::Config load_conf;
// milvus::Config search_conf;
// milvus::Config range_search_conf;
// milvus::DataType vec_field_data_type;
// knowhere::DataSetPtr xb_dataset;
// FixedVector<float> xb_data;
// FixedVector<uint8_t> xb_bin_data;
// knowhere::DataSetPtr xq_dataset;
// int64_t query_offset = 100;
// int64_t NB = 3000;
// StorageConfig storage_config_;
//
// boost::filesystem::path temp_path;
// std::shared_ptr<milvus_storage::Space> space;
// int64_t file_slice_size = DEFAULT_INDEX_FILE_SLICE_SIZE;
// bool enable_mmap;
// boost::filesystem::path mmap_file_path;
//};
//
//INSTANTIATE_TEST_SUITE_P(
// IndexTypeParameters,
// IndexTestV2,
// testing::Combine(
// ::testing::Values(
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP,
// knowhere::metric::L2),
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ,
// knowhere::metric::L2),
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
// knowhere::metric::L2),
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
// knowhere::metric::L2),
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
// knowhere::metric::JACCARD),
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
// knowhere::metric::JACCARD),
//#ifdef BUILD_DISK_ANN
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
//#endif
// std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2)),
// testing::Values(DEFAULT_INDEX_FILE_SLICE_SIZE, 5000L),
// testing::Bool()));
//
//TEST_P(IndexTestV2, BuildAndQuery) {
// FILE_SLICE_SIZE = file_slice_size;
// milvus::index::CreateIndexInfo create_index_info;
// create_index_info.index_type = index_type;
// create_index_info.metric_type = metric_type;
// create_index_info.field_type = vec_field_data_type;
// create_index_info.field_name = "vec";
// create_index_info.dim = DIM;
// create_index_info.index_engine_version =
// knowhere::Version::GetCurrentVersion().VersionNumber();
// index::IndexBasePtr index;
//
// milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
// milvus::storage::IndexMeta index_meta{.segment_id = 3,
// .field_id = 100,
// .build_id = 1000,
// .index_version = 1,
// .field_name = "vec",
// .field_type = vec_field_data_type,
// .dim = DIM};
// auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_);
// milvus::storage::FileManagerContext file_manager_context(
// field_data_meta, index_meta, chunk_manager, space);
// index = milvus::index::IndexFactory::GetInstance().CreateIndex(
// create_index_info, file_manager_context, space);
//
// auto build_conf = generate_build_conf(index_type, metric_type);
// index->BuildV2(build_conf);
// milvus::index::IndexBasePtr new_index;
// milvus::index::VectorIndex* vec_index = nullptr;
//
// auto binary_set = index->UploadV2();
// index.reset();
//
// new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
// create_index_info, file_manager_context, space);
// vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
//
// load_conf = generate_load_conf(index_type, metric_type, 0);
// if (enable_mmap) {
// load_conf[kMmapFilepath] = mmap_file_path.string();
// }
// ASSERT_NO_THROW(vec_index->LoadV2(load_conf));
// EXPECT_EQ(vec_index->Count(), NB);
// EXPECT_EQ(vec_index->GetDim(), DIM);
//
// milvus::SearchInfo search_info;
// search_info.topk_ = K;
// search_info.metric_type_ = metric_type;
// search_info.search_params_ = search_conf;
// auto result = vec_index->Query(xq_dataset, search_info, nullptr);
// EXPECT_EQ(result->total_nq_, NQ);
// EXPECT_EQ(result->unity_topK_, K);
// EXPECT_EQ(result->distances_.size(), NQ * K);
// EXPECT_EQ(result->seg_offsets_.size(), NQ * K);
// if (!is_binary) {
// EXPECT_EQ(result->seg_offsets_[0], query_offset);
// }
// search_info.search_params_ = range_search_conf;
// vec_index->Query(xq_dataset, search_info, nullptr);
//}
#endif

View File

@ -301,31 +301,6 @@ TestRecords(int vec_size, GeneratedData& dataset, std::vector<T>& scalars) {
return reader;
}
template <typename T>
std::shared_ptr<milvus_storage::Space>
TestSpace(boost::filesystem::path& temp_path,
int vec_size,
GeneratedData& dataset,
std::vector<T>& scalars) {
auto arrow_schema = TestSchema<T>(vec_size);
milvus_storage::SchemaOptions schema_options{
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
auto schema =
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
EXPECT_TRUE(schema->Validate().ok());
auto space_res = milvus_storage::Space::Open(
"file://" + boost::filesystem::canonical(temp_path).string(),
milvus_storage::Options{schema});
EXPECT_TRUE(space_res.has_value());
auto space = std::move(space_res.value());
auto rec = TestRecords<T>(vec_size, dataset, scalars);
auto write_opt = milvus_storage::WriteOption{nb};
space->Write(*rec, write_opt);
return std::move(space);
}
template <>
struct TypedScalarIndexTestV2<int8_t>::Helper {
using C = arrow::Int8Type;

View File

@ -349,116 +349,5 @@ TEST_F(StringIndexMarisaTest, BaseIndexCodec) {
}
}
}
using milvus::segcore::GeneratedData;
class StringIndexMarisaTestV2 : public StringIndexBaseTest {
std::shared_ptr<arrow::Schema>
TestSchema(int vec_size) {
arrow::FieldVector fields;
fields.push_back(arrow::field("pk", arrow::int64()));
fields.push_back(arrow::field("ts", arrow::int64()));
fields.push_back(arrow::field("scalar", arrow::utf8()));
fields.push_back(
arrow::field("vec", arrow::fixed_size_binary(vec_size)));
return std::make_shared<arrow::Schema>(fields);
}
std::shared_ptr<arrow::RecordBatchReader>
TestRecords(int vec_size,
GeneratedData& dataset,
std::vector<std::string>& scalars) {
arrow::Int64Builder pk_builder;
arrow::Int64Builder ts_builder;
arrow::StringBuilder scalar_builder;
arrow::FixedSizeBinaryBuilder vec_builder(
arrow::fixed_size_binary(vec_size));
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
auto data = reinterpret_cast<char*>(xb_data.data());
for (auto i = 0; i < nb; ++i) {
EXPECT_TRUE(pk_builder.Append(i).ok());
EXPECT_TRUE(ts_builder.Append(i).ok());
EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok());
}
for (auto& v : scalars) {
EXPECT_TRUE(scalar_builder.Append(v).ok());
}
std::shared_ptr<arrow::Array> pk_array;
EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
std::shared_ptr<arrow::Array> ts_array;
EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
std::shared_ptr<arrow::Array> scalar_array;
EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok());
std::shared_ptr<arrow::Array> vec_array;
EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
auto schema = TestSchema(vec_size);
auto rec_batch = arrow::RecordBatch::Make(
schema, nb, {pk_array, ts_array, scalar_array, vec_array});
auto reader =
arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie();
return reader;
}
std::shared_ptr<milvus_storage::Space>
TestSpace(int vec_size,
GeneratedData& dataset,
std::vector<std::string>& scalars) {
auto arrow_schema = TestSchema(vec_size);
milvus_storage::SchemaOptions schema_options{.primary_column = "pk",
.version_column = "ts",
.vector_column = "vec"};
auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
schema_options);
EXPECT_TRUE(schema->Validate().ok());
auto space_res = milvus_storage::Space::Open(
"file://" + boost::filesystem::canonical(temp_path).string(),
milvus_storage::Options{schema});
EXPECT_TRUE(space_res.has_value());
auto space = std::move(space_res.value());
auto rec = TestRecords(vec_size, dataset, scalars);
auto write_opt = milvus_storage::WriteOption{nb};
space->Write(*rec, write_opt);
return std::move(space);
}
void
SetUp() override {
StringIndexBaseTest::SetUp();
temp_path = boost::filesystem::temp_directory_path() /
boost::filesystem::unique_path();
boost::filesystem::create_directory(temp_path);
auto vec_size = DIM * 4;
auto vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
auto dataset = ::GenDataset(nb, knowhere::metric::L2, false);
space = TestSpace(vec_size, dataset, strs);
}
void
TearDown() override {
boost::filesystem::remove_all(temp_path);
}
protected:
boost::filesystem::path temp_path;
std::shared_ptr<milvus_storage::Space> space;
};
TEST_F(StringIndexMarisaTestV2, Base) {
auto storage_config = get_default_local_storage_config();
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config);
milvus::storage::FileManagerContext file_manager_context(
{}, {.field_name = "scalar"}, chunk_manager, space);
auto index =
milvus::index::CreateStringIndexMarisa(file_manager_context, space);
index->BuildV2();
index->UploadV2();
auto new_index =
milvus::index::CreateStringIndexMarisa(file_manager_context, space);
new_index->LoadV2();
ASSERT_EQ(strs.size(), index->Count());
}
} // namespace index
} // namespace milvus

View File

@ -543,10 +543,6 @@ func (s *Server) SaveBinlogPaths(ctx context.Context, req *datapb.SaveBinlogPath
UpdateCheckPointOperator(req.GetSegmentID(), req.GetCheckPoints()),
)
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
operators = append(operators, UpdateStorageVersionOperator(req.GetSegmentID(), req.GetStorageVersion()))
}
// Update segment info in memory and meta.
if err := s.meta.UpdateSegmentsInfo(operators...); err != nil {
log.Error("save binlog and checkpoints failed", zap.Error(err))
@ -882,18 +878,6 @@ func (s *Server) GetRecoveryInfoV2(ctx context.Context, req *datapb.GetRecoveryI
continue
}
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
segmentInfos = append(segmentInfos, &datapb.SegmentInfo{
ID: segment.ID,
PartitionID: segment.PartitionID,
CollectionID: segment.CollectionID,
InsertChannel: segment.InsertChannel,
NumOfRows: segment.NumOfRows,
Level: segment.GetLevel(),
})
continue
}
binlogs := segment.GetBinlogs()
if len(binlogs) == 0 && segment.GetLevel() != datapb.SegmentLevel_L0 {
continue

View File

@ -25,10 +25,8 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/types"
itypeutil "github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/indexparams"
@ -201,68 +199,27 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule
}
}
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
storePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segment.GetID())
if err != nil {
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
it.SetState(indexpb.JobState_JobStateInit, err.Error())
return true
}
indexStorePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue()+"/index", segment.GetID())
if err != nil {
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
it.SetState(indexpb.JobState_JobStateInit, err.Error())
return true
}
it.req = &indexpb.CreateJobRequest{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
BuildID: it.taskID,
IndexVersion: segIndex.IndexVersion + 1,
StorageConfig: storageConfig,
IndexParams: indexParams,
TypeParams: typeParams,
NumRows: segIndex.NumRows,
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
CollectionID: segment.GetCollectionID(),
PartitionID: segment.GetPartitionID(),
SegmentID: segment.GetID(),
FieldID: fieldID,
FieldName: field.GetName(),
FieldType: field.GetDataType(),
StorePath: storePath,
StoreVersion: segment.GetStorageVersion(),
IndexStorePath: indexStorePath,
Dim: int64(dim),
DataIds: binlogIDs,
OptionalScalarFields: optionalFields,
Field: field,
PartitionKeyIsolation: partitionKeyIsolation,
}
} else {
it.req = &indexpb.CreateJobRequest{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
BuildID: it.taskID,
IndexVersion: segIndex.IndexVersion + 1,
StorageConfig: storageConfig,
IndexParams: indexParams,
TypeParams: typeParams,
NumRows: segIndex.NumRows,
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
CollectionID: segment.GetCollectionID(),
PartitionID: segment.GetPartitionID(),
SegmentID: segment.GetID(),
FieldID: fieldID,
FieldName: field.GetName(),
FieldType: field.GetDataType(),
Dim: int64(dim),
DataIds: binlogIDs,
OptionalScalarFields: optionalFields,
Field: field,
PartitionKeyIsolation: partitionKeyIsolation,
}
it.req = &indexpb.CreateJobRequest{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
BuildID: it.taskID,
IndexVersion: segIndex.IndexVersion + 1,
StorageConfig: storageConfig,
IndexParams: indexParams,
TypeParams: typeParams,
NumRows: segIndex.NumRows,
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
CollectionID: segment.GetCollectionID(),
PartitionID: segment.GetPartitionID(),
SegmentID: segment.GetID(),
FieldID: fieldID,
FieldName: field.GetName(),
FieldType: field.GetDataType(),
Dim: int64(dim),
DataIds: binlogIDs,
OptionalScalarFields: optionalFields,
Field: field,
PartitionKeyIsolation: partitionKeyIsolation,
}
log.Ctx(ctx).Info("index task pre check successfully", zap.Int64("taskID", it.GetTaskID()))

View File

@ -911,15 +911,6 @@ func (s *taskSchedulerSuite) Test_scheduler() {
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
s.scheduler(handler)
})
s.Run("test scheduler with indexBuilderV2", func() {
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true")
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
s.scheduler(handler)
})
}
func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
@ -1289,26 +1280,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("True")
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("False")
err := Params.Save("common.storage.scheme", "fake")
defer Params.Reset("common.storage.scheme")
Params.CommonCfg.EnableStorageV2.SwapTempValue("True")
defer Params.CommonCfg.EnableStorageV2.SwapTempValue("False")
scheduler.Start()
// get collection info failed --> init
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
// partition key field is nil, get collection info failed --> init
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
ID: collID,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
},
},
}, nil).Once()
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
// get collection info success, get dim failed --> init
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
ID: collID,
@ -1318,38 +1294,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
{FieldID: s.fieldID, Name: "vec"},
},
},
}, nil).Twice()
// peek client success, update version success, get collection info success, get dim success, get storage uri failed --> init
s.NoError(err)
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
return &collectionInfo{
ID: collID,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64},
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
},
},
}, nil
}).Twice()
s.NoError(err)
}, nil).Once()
// assign failed --> retry
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
Params.Reset("common.storage.scheme")
return &collectionInfo{
ID: collID,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64},
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
},
},
}, nil
}).Once()
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
// retry --> init

View File

@ -30,12 +30,12 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus-storage/go/common/log"
"github.com/milvus-io/milvus/internal/datanode/allocator"
"github.com/milvus-io/milvus/internal/datanode/io"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/tsoutil"

View File

@ -24,8 +24,6 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
@ -113,54 +111,3 @@ func LoadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *s
log.Info("Successfully load pk stats", zap.Any("time", time.Since(startTs)), zap.Uint("size", size))
return result, nil
}
func LoadStatsV2(storageCache *metacache.StorageV2Cache, segment *datapb.SegmentInfo, schema *schemapb.CollectionSchema) ([]*storage.PkStatistics, error) {
space, err := storageCache.GetOrCreateSpace(segment.ID, syncmgr.SpaceCreatorFunc(segment.ID, schema, storageCache.ArrowSchema()))
if err != nil {
return nil, err
}
getResult := func(stats []*storage.PrimaryKeyStats) []*storage.PkStatistics {
result := make([]*storage.PkStatistics, 0, len(stats))
for _, stat := range stats {
pkStat := &storage.PkStatistics{
PkFilter: stat.BF,
MinPK: stat.MinPk,
MaxPK: stat.MaxPk,
}
result = append(result, pkStat)
}
return result
}
blobs := space.StatisticsBlobs()
deserBlobs := make([]*storage.Blob, 0)
for _, b := range blobs {
if b.Name == storage.CompoundStatsType.LogIdx() {
blobData := make([]byte, b.Size)
_, err = space.ReadBlob(b.Name, blobData)
if err != nil {
return nil, err
}
stats, err := storage.DeserializeStatsList(&storage.Blob{Value: blobData})
if err != nil {
return nil, err
}
return getResult(stats), nil
}
}
for _, b := range blobs {
blobData := make([]byte, b.Size)
_, err = space.ReadBlob(b.Name, blobData)
if err != nil {
return nil, err
}
deserBlobs = append(deserBlobs, &storage.Blob{Value: blobData})
}
stats, err := storage.DeserializeStats(deserBlobs)
if err != nil {
return nil, err
}
return getResult(stats), nil
}

View File

@ -32,7 +32,6 @@ import (
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
@ -52,10 +51,6 @@ func NewSyncTask(ctx context.Context,
insertData *storage.InsertData,
deleteData *storage.DeleteData,
) (syncmgr.Task, error) {
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
return nil, merr.WrapErrImportFailed("storage v2 is not supported") // TODO: dyh, resolve storage v2
}
metaCache := metaCaches[vchannel]
if _, ok := metaCache.GetSegmentByID(segmentID); !ok {
metaCache.AddSegment(&datapb.SegmentInfo{

View File

@ -1,70 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metacache
import (
"sync"
"github.com/apache/arrow/go/v12/arrow"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus/internal/util/typeutil"
)
type StorageV2Cache struct {
arrowSchema *arrow.Schema
spaceMu sync.Mutex
spaces map[int64]*milvus_storage.Space
}
func (s *StorageV2Cache) ArrowSchema() *arrow.Schema {
return s.arrowSchema
}
func (s *StorageV2Cache) GetOrCreateSpace(segmentID int64, creator func() (*milvus_storage.Space, error)) (*milvus_storage.Space, error) {
s.spaceMu.Lock()
defer s.spaceMu.Unlock()
space, ok := s.spaces[segmentID]
if ok {
return space, nil
}
space, err := creator()
if err != nil {
return nil, err
}
s.spaces[segmentID] = space
return space, nil
}
// only for unit test
func (s *StorageV2Cache) SetSpace(segmentID int64, space *milvus_storage.Space) {
s.spaceMu.Lock()
defer s.spaceMu.Unlock()
s.spaces[segmentID] = space
}
func NewStorageV2Cache(schema *schemapb.CollectionSchema) (*StorageV2Cache, error) {
arrowSchema, err := typeutil.ConvertToArrowSchema(schema.Fields)
if err != nil {
return nil, err
}
return &StorageV2Cache{
arrowSchema: arrowSchema,
spaces: make(map[int64]*milvus_storage.Space),
}, nil
}

View File

@ -30,7 +30,6 @@ import (
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
"github.com/milvus-io/milvus/internal/flushcommon/writebuffer"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/flowgraph"
"github.com/milvus-io/milvus/pkg/log"
@ -129,12 +128,12 @@ func (dsService *DataSyncService) GetMetaCache() metacache.MetaCache {
return dsService.metacache
}
func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo, storageV2Cache *metacache.StorageV2Cache) (metacache.MetaCache, error) {
func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
tickler.SetTotal(int32(len(unflushed) + len(flushed)))
return initMetaCache(initCtx, storageV2Cache, params.ChunkManager, info, tickler, unflushed, flushed)
return initMetaCache(initCtx, params.ChunkManager, info, tickler, unflushed, flushed)
}
func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2Cache, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
func initMetaCache(initCtx context.Context, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
// tickler will update addSegment progress to watchInfo
futures := make([]*conc.Future[any], 0, len(unflushed)+len(flushed))
segmentPks := typeutil.NewConcurrentMap[int64, []*storage.PkStatistics]()
@ -152,11 +151,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C
future := io.GetOrCreateStatsPool().Submit(func() (any, error) {
var stats []*storage.PkStatistics
var err error
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
stats, err = compaction.LoadStatsV2(storageV2Cache, segment, info.GetSchema())
} else {
stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs())
}
stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs())
if err != nil {
return nil, err
}
@ -190,7 +185,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C
return metacache, nil
}
func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) {
func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) {
var (
channelName = info.GetVchan().GetChannelName()
collectionID = info.GetVchan().GetCollectionID()
@ -204,7 +199,7 @@ func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams,
serverID: params.Session.ServerID,
}
err := params.WriteBufferManager.Register(channelName, metacache, storageV2Cache,
err := params.WriteBufferManager.Register(channelName, metacache,
writebuffer.WithMetaWriter(syncmgr.BrokerMetaWriter(params.Broker, config.serverID)),
writebuffer.WithIDAllocator(params.Allocator))
if err != nil {
@ -287,21 +282,13 @@ func NewDataSyncService(initCtx context.Context, pipelineParams *util.PipelinePa
return nil, err
}
var storageCache *metacache.StorageV2Cache
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
storageCache, err = metacache.NewStorageV2Cache(info.Schema)
if err != nil {
return nil, err
}
}
// init metaCache meta
metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos, storageCache)
metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos)
if err != nil {
return nil, err
}
return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, storageCache, unflushedSegmentInfos, flushedSegmentInfos)
return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, unflushedSegmentInfos, flushedSegmentInfos)
}
func NewDataSyncServiceWithMetaCache(metaCache metacache.MetaCache) *DataSyncService {

View File

@ -289,7 +289,7 @@ func TestGetChannelWithTickler(t *testing.T) {
},
}
metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed, nil)
metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed)
assert.NoError(t, err)
assert.NotNil(t, metaCache)
assert.Equal(t, int64(1), metaCache.Collection())

View File

@ -20,7 +20,6 @@ import (
// MetaWriter is the interface for SyncManager to write segment sync meta.
type MetaWriter interface {
UpdateSync(context.Context, *SyncTask) error
UpdateSyncV2(*SyncTaskV2) error
DropChannel(context.Context, string) error
}
@ -138,82 +137,6 @@ func (b *brokerMetaWriter) UpdateSync(ctx context.Context, pack *SyncTask) error
return nil
}
func (b *brokerMetaWriter) UpdateSyncV2(pack *SyncTaskV2) error {
checkPoints := []*datapb.CheckPoint{}
// only current segment checkpoint info,
segment, ok := pack.metacache.GetSegmentByID(pack.segmentID)
if !ok {
return merr.WrapErrSegmentNotFound(pack.segmentID)
}
checkPoints = append(checkPoints, &datapb.CheckPoint{
SegmentID: pack.segmentID,
NumOfRows: segment.FlushedRows() + pack.batchSize,
Position: pack.checkpoint,
})
startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing),
metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition {
return &datapb.SegmentStartPosition{
SegmentID: info.SegmentID(),
StartPosition: info.StartPosition(),
}
})
log.Info("SaveBinlogPath",
zap.Int64("SegmentID", pack.segmentID),
zap.Int64("CollectionID", pack.collectionID),
zap.Any("startPos", startPos),
zap.Any("checkPoints", checkPoints),
zap.String("vChannelName", pack.channelName),
)
req := &datapb.SaveBinlogPathsRequest{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithSourceID(b.serverID),
),
SegmentID: pack.segmentID,
CollectionID: pack.collectionID,
CheckPoints: checkPoints,
StorageVersion: pack.storageVersion,
StartPositions: startPos,
Flushed: pack.isFlush,
Dropped: pack.isDrop,
Channel: pack.channelName,
}
err := retry.Do(context.Background(), func() error {
err := b.broker.SaveBinlogPaths(context.Background(), req)
// Segment not found during stale segment flush. Segment might get compacted already.
// Stop retry and still proceed to the end, ignoring this error.
if !pack.isFlush && errors.Is(err, merr.ErrSegmentNotFound) {
log.Warn("stale segment not found, could be compacted",
zap.Int64("segmentID", pack.segmentID))
log.Warn("failed to SaveBinlogPaths",
zap.Int64("segmentID", pack.segmentID),
zap.Error(err))
return nil
}
// meta error, datanode handles a virtual channel does not belong here
if errors.IsAny(err, merr.ErrSegmentNotFound, merr.ErrChannelNotFound) {
log.Warn("meta error found, skip sync and start to drop virtual channel", zap.String("channel", pack.channelName))
return nil
}
if err != nil {
return err
}
return nil
}, b.opts...)
if err != nil {
log.Warn("failed to SaveBinlogPaths",
zap.Int64("segmentID", pack.segmentID),
zap.Error(err))
}
return err
}
func (b *brokerMetaWriter) DropChannel(ctx context.Context, channelName string) error {
err := retry.Handle(ctx, func() (bool, error) {
status, err := b.broker.DropVirtualChannel(context.Background(), &datapb.DropVirtualChannelRequest{

View File

@ -67,34 +67,6 @@ func (s *MetaWriterSuite) TestReturnError() {
s.Error(err)
}
func (s *MetaWriterSuite) TestNormalSaveV2() {
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil)
bfs := metacache.NewBloomFilterSet()
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
metacache.UpdateNumOfRows(1000)(seg)
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
task := NewSyncTaskV2()
task.WithMetaCache(s.metacache)
err := s.writer.UpdateSyncV2(task)
s.NoError(err)
}
func (s *MetaWriterSuite) TestReturnErrorV2() {
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(errors.New("mocked"))
bfs := metacache.NewBloomFilterSet()
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
metacache.UpdateNumOfRows(1000)(seg)
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
task := NewSyncTaskV2()
task.WithMetaCache(s.metacache)
err := s.writer.UpdateSyncV2(task)
s.Error(err)
}
func TestMetaWriter(t *testing.T) {
suite.Run(t, new(MetaWriterSuite))
}

View File

@ -107,48 +107,6 @@ func (_c *MockMetaWriter_UpdateSync_Call) RunAndReturn(run func(context.Context,
return _c
}
// UpdateSyncV2 provides a mock function with given fields: _a0
func (_m *MockMetaWriter) UpdateSyncV2(_a0 *SyncTaskV2) error {
ret := _m.Called(_a0)
var r0 error
if rf, ok := ret.Get(0).(func(*SyncTaskV2) error); ok {
r0 = rf(_a0)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockMetaWriter_UpdateSyncV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateSyncV2'
type MockMetaWriter_UpdateSyncV2_Call struct {
*mock.Call
}
// UpdateSyncV2 is a helper method to define mock.On call
// - _a0 *SyncTaskV2
func (_e *MockMetaWriter_Expecter) UpdateSyncV2(_a0 interface{}) *MockMetaWriter_UpdateSyncV2_Call {
return &MockMetaWriter_UpdateSyncV2_Call{Call: _e.mock.On("UpdateSyncV2", _a0)}
}
func (_c *MockMetaWriter_UpdateSyncV2_Call) Run(run func(_a0 *SyncTaskV2)) *MockMetaWriter_UpdateSyncV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(*SyncTaskV2))
})
return _c
}
func (_c *MockMetaWriter_UpdateSyncV2_Call) Return(_a0 error) *MockMetaWriter_UpdateSyncV2_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockMetaWriter_UpdateSyncV2_Call) RunAndReturn(run func(*SyncTaskV2) error) *MockMetaWriter_UpdateSyncV2_Call {
_c.Call.Return(run)
return _c
}
// NewMockMetaWriter creates a new instance of MockMetaWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewMockMetaWriter(t interface {

View File

@ -1,4 +1,4 @@
// Code generated by mockery v2.30.1. DO NOT EDIT.
// Code generated by mockery v2.32.4. DO NOT EDIT.
package syncmgr

View File

@ -1,256 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package syncmgr
import (
"context"
"fmt"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus-storage/go/storage/schema"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/storage"
iTypeutil "github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type storageV2Serializer struct {
*storageV1Serializer
arrowSchema *arrow.Schema
storageV2Cache *metacache.StorageV2Cache
inCodec *storage.InsertCodec
metacache metacache.MetaCache
}
func NewStorageV2Serializer(
storageV2Cache *metacache.StorageV2Cache,
allocator allocator.Interface,
metacache metacache.MetaCache,
metaWriter MetaWriter,
) (*storageV2Serializer, error) {
v1Serializer, err := NewStorageSerializer(allocator, metacache, metaWriter)
if err != nil {
return nil, err
}
return &storageV2Serializer{
storageV1Serializer: v1Serializer,
storageV2Cache: storageV2Cache,
arrowSchema: storageV2Cache.ArrowSchema(),
metacache: metacache,
}, nil
}
func (s *storageV2Serializer) EncodeBuffer(ctx context.Context, pack *SyncPack) (Task, error) {
task := NewSyncTaskV2()
tr := timerecord.NewTimeRecorder("storage_serializer_v2")
metricSegLevel := pack.level.String()
space, err := s.storageV2Cache.GetOrCreateSpace(pack.segmentID, SpaceCreatorFunc(pack.segmentID, s.schema, s.arrowSchema))
if err != nil {
log.Warn("failed to get or create space", zap.Error(err))
return nil, err
}
task.space = space
if len(pack.insertData) > 0 {
insertReader, err := s.serializeInsertData(pack)
if err != nil {
log.Warn("failed to serialize insert data with storagev2", zap.Error(err))
return nil, err
}
task.reader = insertReader
singlePKStats, batchStatsBlob, err := s.serializeStatslog(pack)
if err != nil {
log.Warn("failed to serialized statslog", zap.Error(err))
return nil, err
}
task.statsBlob = batchStatsBlob
s.metacache.UpdateSegments(metacache.RollStats(singlePKStats), metacache.WithSegmentIDs(pack.segmentID))
}
if pack.isFlush {
if pack.level != datapb.SegmentLevel_L0 {
mergedStatsBlob, err := s.serializeMergedPkStats(pack)
if err != nil {
log.Warn("failed to serialize merged stats log", zap.Error(err))
return nil, err
}
task.mergedStatsBlob = mergedStatsBlob
}
task.WithFlush()
}
if pack.deltaData != nil {
deltaReader, err := s.serializeDeltaData(pack)
if err != nil {
log.Warn("failed to serialize delta data", zap.Error(err))
return nil, err
}
task.deleteReader = deltaReader
}
if pack.isDrop {
task.WithDrop()
}
s.setTaskMeta(task, pack)
metrics.DataNodeEncodeBufferLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metricSegLevel).Observe(float64(tr.RecordSpan().Milliseconds()))
return task, nil
}
func (s *storageV2Serializer) setTaskMeta(task *SyncTaskV2, pack *SyncPack) {
task.WithCollectionID(pack.collectionID).
WithPartitionID(pack.partitionID).
WithChannelName(pack.channelName).
WithSegmentID(pack.segmentID).
WithBatchSize(pack.batchSize).
WithSchema(s.metacache.Schema()).
WithStartPosition(pack.startPosition).
WithCheckpoint(pack.checkpoint).
WithLevel(pack.level).
WithTimeRange(pack.tsFrom, pack.tsTo).
WithMetaCache(s.metacache).
WithMetaWriter(s.metaWriter).
WithFailureCallback(func(err error) {
// TODO could change to unsub channel in the future
panic(err)
})
}
func (s *storageV2Serializer) serializeInsertData(pack *SyncPack) (array.RecordReader, error) {
builder := array.NewRecordBuilder(memory.DefaultAllocator, s.arrowSchema)
defer builder.Release()
for _, chunk := range pack.insertData {
if err := iTypeutil.BuildRecord(builder, chunk, s.schema.GetFields()); err != nil {
return nil, err
}
}
rec := builder.NewRecord()
defer rec.Release()
itr, err := array.NewRecordReader(s.arrowSchema, []arrow.Record{rec})
if err != nil {
return nil, err
}
itr.Retain()
return itr, nil
}
func (s *storageV2Serializer) serializeDeltaData(pack *SyncPack) (array.RecordReader, error) {
fields := make([]*schemapb.FieldSchema, 0, 2)
tsField := &schemapb.FieldSchema{
FieldID: common.TimeStampField,
Name: common.TimeStampFieldName,
DataType: schemapb.DataType_Int64,
}
fields = append(fields, s.pkField, tsField)
deltaArrowSchema, err := iTypeutil.ConvertToArrowSchema(fields)
if err != nil {
return nil, err
}
builder := array.NewRecordBuilder(memory.DefaultAllocator, deltaArrowSchema)
defer builder.Release()
switch s.pkField.GetDataType() {
case schemapb.DataType_Int64:
pb := builder.Field(0).(*array.Int64Builder)
for _, pk := range pack.deltaData.Pks {
pb.Append(pk.GetValue().(int64))
}
case schemapb.DataType_VarChar:
pb := builder.Field(0).(*array.StringBuilder)
for _, pk := range pack.deltaData.Pks {
pb.Append(pk.GetValue().(string))
}
default:
return nil, merr.WrapErrParameterInvalidMsg("unexpected pk type %v", s.pkField.GetDataType())
}
for _, ts := range pack.deltaData.Tss {
builder.Field(1).(*array.Int64Builder).Append(int64(ts))
}
rec := builder.NewRecord()
defer rec.Release()
reader, err := array.NewRecordReader(deltaArrowSchema, []arrow.Record{rec})
if err != nil {
return nil, err
}
reader.Retain()
return reader, nil
}
func SpaceCreatorFunc(segmentID int64, collSchema *schemapb.CollectionSchema, arrowSchema *arrow.Schema) func() (*milvus_storage.Space, error) {
return func() (*milvus_storage.Space, error) {
url, err := iTypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segmentID)
if err != nil {
return nil, err
}
pkSchema, err := typeutil.GetPrimaryFieldSchema(collSchema)
if err != nil {
return nil, err
}
vecSchema, err := typeutil.GetVectorFieldSchema(collSchema)
if err != nil {
return nil, err
}
space, err := milvus_storage.Open(
url,
options.NewSpaceOptionBuilder().
SetSchema(schema.NewSchema(
arrowSchema,
&schema.SchemaOptions{
PrimaryColumn: pkSchema.Name,
VectorColumn: vecSchema.Name,
VersionColumn: common.TimeStampFieldName,
},
)).
Build(),
)
return space, err
}
}

View File

@ -1,366 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package syncmgr
import (
"context"
"fmt"
"math/rand"
"testing"
"time"
"github.com/samber/lo"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus-storage/go/storage/schema"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
)
type StorageV2SerializerSuite struct {
suite.Suite
collectionID int64
partitionID int64
segmentID int64
channelName string
schema *schemapb.CollectionSchema
storageCache *metacache.StorageV2Cache
mockAllocator *allocator.MockAllocator
mockCache *metacache.MockMetaCache
mockMetaWriter *MockMetaWriter
serializer *storageV2Serializer
}
func (s *StorageV2SerializerSuite) SetupSuite() {
paramtable.Get().Init(paramtable.NewBaseTable())
s.collectionID = rand.Int63n(100) + 1000
s.partitionID = rand.Int63n(100) + 2000
s.segmentID = rand.Int63n(1000) + 10000
s.channelName = fmt.Sprintf("by-dev-rootcoord-dml0_%d_v1", s.collectionID)
s.schema = &schemapb.CollectionSchema{
Name: "sync_task_test_col",
Fields: []*schemapb.FieldSchema{
{FieldID: common.RowIDField, DataType: schemapb.DataType_Int64, Name: common.RowIDFieldName},
{FieldID: common.TimeStampField, DataType: schemapb.DataType_Int64, Name: common.TimeStampFieldName},
{
FieldID: 100,
Name: "pk",
DataType: schemapb.DataType_Int64,
IsPrimaryKey: true,
},
{
FieldID: 101,
Name: "vector",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "128"},
},
},
},
}
s.mockAllocator = allocator.NewMockAllocator(s.T())
s.mockCache = metacache.NewMockMetaCache(s.T())
s.mockMetaWriter = NewMockMetaWriter(s.T())
}
func (s *StorageV2SerializerSuite) SetupTest() {
storageCache, err := metacache.NewStorageV2Cache(s.schema)
s.Require().NoError(err)
s.storageCache = storageCache
s.mockCache.EXPECT().Collection().Return(s.collectionID)
s.mockCache.EXPECT().Schema().Return(s.schema)
s.serializer, err = NewStorageV2Serializer(storageCache, s.mockAllocator, s.mockCache, s.mockMetaWriter)
s.Require().NoError(err)
}
func (s *StorageV2SerializerSuite) getSpace() *milvus_storage.Space {
tmpDir := s.T().TempDir()
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
SetSchema(schema.NewSchema(s.storageCache.ArrowSchema(), &schema.SchemaOptions{
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
})).Build())
s.Require().NoError(err)
return space
}
func (s *StorageV2SerializerSuite) getBasicPack() *SyncPack {
pack := &SyncPack{}
pack.WithCollectionID(s.collectionID).
WithPartitionID(s.partitionID).
WithSegmentID(s.segmentID).
WithChannelName(s.channelName).
WithCheckpoint(&msgpb.MsgPosition{
Timestamp: 1000,
ChannelName: s.channelName,
})
return pack
}
func (s *StorageV2SerializerSuite) getEmptyInsertBuffer() *storage.InsertData {
buf, err := storage.NewInsertData(s.schema)
s.Require().NoError(err)
return buf
}
func (s *StorageV2SerializerSuite) getInsertBuffer() *storage.InsertData {
buf := s.getEmptyInsertBuffer()
// generate data
for i := 0; i < 10; i++ {
data := make(map[storage.FieldID]any)
data[common.RowIDField] = int64(i + 1)
data[common.TimeStampField] = int64(i + 1)
data[100] = int64(i + 1)
vector := lo.RepeatBy(128, func(_ int) float32 {
return rand.Float32()
})
data[101] = vector
err := buf.Append(data)
s.Require().NoError(err)
}
return buf
}
func (s *StorageV2SerializerSuite) getDeleteBuffer() *storage.DeleteData {
buf := &storage.DeleteData{}
for i := 0; i < 10; i++ {
pk := storage.NewInt64PrimaryKey(int64(i + 1))
ts := tsoutil.ComposeTSByTime(time.Now(), 0)
buf.Append(pk, ts)
}
return buf
}
func (s *StorageV2SerializerSuite) getDeleteBufferZeroTs() *storage.DeleteData {
buf := &storage.DeleteData{}
for i := 0; i < 10; i++ {
pk := storage.NewInt64PrimaryKey(int64(i + 1))
buf.Append(pk, 0)
}
return buf
}
func (s *StorageV2SerializerSuite) getBfs() *metacache.BloomFilterSet {
bfs := metacache.NewBloomFilterSet()
fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{
FieldID: 101,
Name: "ID",
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
}, 16)
s.Require().NoError(err)
ids := []int64{1, 2, 3, 4, 5, 6, 7}
for _, id := range ids {
err = fd.AppendRow(id)
s.Require().NoError(err)
}
bfs.UpdatePKRange(fd)
return bfs
}
func (s *StorageV2SerializerSuite) TestSerializeInsert() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
s.storageCache.SetSpace(s.segmentID, s.getSpace())
s.Run("no_data", func() {
pack := s.getBasicPack()
pack.WithTimeRange(50, 100)
pack.WithDrop()
task, err := s.serializer.EncodeBuffer(ctx, pack)
s.NoError(err)
taskV1, ok := task.(*SyncTaskV2)
s.Require().True(ok)
s.Equal(s.collectionID, taskV1.collectionID)
s.Equal(s.partitionID, taskV1.partitionID)
s.Equal(s.channelName, taskV1.channelName)
s.Equal(&msgpb.MsgPosition{
Timestamp: 1000,
ChannelName: s.channelName,
}, taskV1.checkpoint)
s.EqualValues(50, taskV1.tsFrom)
s.EqualValues(100, taskV1.tsTo)
s.True(taskV1.isDrop)
})
s.Run("empty_insert_data", func() {
pack := s.getBasicPack()
pack.WithTimeRange(50, 100)
pack.WithInsertData([]*storage.InsertData{s.getEmptyInsertBuffer()}).WithBatchSize(0)
_, err := s.serializer.EncodeBuffer(ctx, pack)
s.Error(err)
})
s.Run("with_normal_data", func() {
pack := s.getBasicPack()
pack.WithTimeRange(50, 100)
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return().Once()
task, err := s.serializer.EncodeBuffer(ctx, pack)
s.NoError(err)
taskV2, ok := task.(*SyncTaskV2)
s.Require().True(ok)
s.Equal(s.collectionID, taskV2.collectionID)
s.Equal(s.partitionID, taskV2.partitionID)
s.Equal(s.channelName, taskV2.channelName)
s.Equal(&msgpb.MsgPosition{
Timestamp: 1000,
ChannelName: s.channelName,
}, taskV2.checkpoint)
s.EqualValues(50, taskV2.tsFrom)
s.EqualValues(100, taskV2.tsTo)
s.NotNil(taskV2.reader)
s.NotNil(taskV2.statsBlob)
})
s.Run("with_flush_segment_not_found", func() {
pack := s.getBasicPack()
pack.WithFlush()
s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(nil, false).Once()
_, err := s.serializer.EncodeBuffer(ctx, pack)
s.Error(err)
})
s.Run("with_flush", func() {
pack := s.getBasicPack()
pack.WithTimeRange(50, 100)
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
pack.WithFlush()
bfs := s.getBfs()
segInfo := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
metacache.UpdateNumOfRows(1000)(segInfo)
s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Run(func(action metacache.SegmentAction, filters ...metacache.SegmentFilter) {
action(segInfo)
}).Return().Once()
s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(segInfo, true).Once()
task, err := s.serializer.EncodeBuffer(ctx, pack)
s.NoError(err)
taskV2, ok := task.(*SyncTaskV2)
s.Require().True(ok)
s.Equal(s.collectionID, taskV2.collectionID)
s.Equal(s.partitionID, taskV2.partitionID)
s.Equal(s.channelName, taskV2.channelName)
s.Equal(&msgpb.MsgPosition{
Timestamp: 1000,
ChannelName: s.channelName,
}, taskV2.checkpoint)
s.EqualValues(50, taskV2.tsFrom)
s.EqualValues(100, taskV2.tsTo)
s.NotNil(taskV2.mergedStatsBlob)
})
}
func (s *StorageV2SerializerSuite) TestSerializeDelete() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
s.Run("serialize_failed", func() {
pkField := s.serializer.pkField
s.serializer.pkField = &schemapb.FieldSchema{}
defer func() {
s.serializer.pkField = pkField
}()
pack := s.getBasicPack()
pack.WithDeleteData(s.getDeleteBufferZeroTs())
pack.WithTimeRange(50, 100)
_, err := s.serializer.EncodeBuffer(ctx, pack)
s.Error(err)
})
s.Run("serialize_failed_bad_pk", func() {
pkField := s.serializer.pkField
s.serializer.pkField = &schemapb.FieldSchema{
DataType: schemapb.DataType_Array,
}
defer func() {
s.serializer.pkField = pkField
}()
pack := s.getBasicPack()
pack.WithDeleteData(s.getDeleteBufferZeroTs())
pack.WithTimeRange(50, 100)
_, err := s.serializer.EncodeBuffer(ctx, pack)
s.Error(err)
})
s.Run("serialize_normal", func() {
pack := s.getBasicPack()
pack.WithDeleteData(s.getDeleteBuffer())
pack.WithTimeRange(50, 100)
task, err := s.serializer.EncodeBuffer(ctx, pack)
s.NoError(err)
taskV2, ok := task.(*SyncTaskV2)
s.Require().True(ok)
s.Equal(s.collectionID, taskV2.collectionID)
s.Equal(s.partitionID, taskV2.partitionID)
s.Equal(s.channelName, taskV2.channelName)
s.Equal(&msgpb.MsgPosition{
Timestamp: 1000,
ChannelName: s.channelName,
}, taskV2.checkpoint)
s.EqualValues(50, taskV2.tsFrom)
s.EqualValues(100, taskV2.tsTo)
s.NotNil(taskV2.deleteReader)
})
}
func (s *StorageV2SerializerSuite) TestBadSchema() {
mockCache := metacache.NewMockMetaCache(s.T())
mockCache.EXPECT().Collection().Return(s.collectionID).Once()
mockCache.EXPECT().Schema().Return(&schemapb.CollectionSchema{}).Once()
_, err := NewStorageV2Serializer(s.storageCache, s.mockAllocator, mockCache, s.mockMetaWriter)
s.Error(err)
}
func TestStorageV2Serializer(t *testing.T) {
suite.Run(t, new(StorageV2SerializerSuite))
}

View File

@ -99,7 +99,6 @@ func (mgr *syncManager) SyncData(ctx context.Context, task Task, callbacks ...fu
switch t := task.(type) {
case *SyncTask:
t.WithChunkManager(mgr.chunkManager)
case *SyncTaskV2:
}
return mgr.safeSubmitTask(ctx, task, callbacks...)

View File

@ -1,235 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package syncmgr
import (
"context"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/retry"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type SyncTaskV2 struct {
*SyncTask
arrowSchema *arrow.Schema
reader array.RecordReader
statsBlob *storage.Blob
deleteReader array.RecordReader
storageVersion int64
space *milvus_storage.Space
failureCallback func(err error)
}
func (t *SyncTaskV2) getLogger() *log.MLogger {
return log.Ctx(context.Background()).With(
zap.Int64("collectionID", t.collectionID),
zap.Int64("partitionID", t.partitionID),
zap.Int64("segmentID", t.segmentID),
zap.String("channel", t.channelName),
)
}
func (t *SyncTaskV2) handleError(err error) {
if t.failureCallback != nil {
t.failureCallback(err)
}
}
func (t *SyncTaskV2) Run(ctx context.Context) error {
log := t.getLogger()
var err error
_, ok := t.metacache.GetSegmentByID(t.segmentID)
if !ok {
log.Warn("failed to sync data, segment not found in metacache")
t.handleError(err)
return merr.WrapErrSegmentNotFound(t.segmentID)
}
if err = t.writeSpace(); err != nil {
t.handleError(err)
return err
}
if err = t.writeMeta(); err != nil {
t.handleError(err)
return err
}
actions := []metacache.SegmentAction{metacache.FinishSyncing(t.batchSize)}
switch {
case t.isDrop:
actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Dropped))
case t.isFlush:
actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Flushed))
}
t.metacache.UpdateSegments(metacache.MergeSegmentAction(actions...), metacache.WithSegmentIDs(t.segmentID))
return nil
}
func (t *SyncTaskV2) writeSpace() error {
defer func() {
if t.reader != nil {
t.reader.Release()
}
if t.deleteReader != nil {
t.deleteReader.Release()
}
}()
txn := t.space.NewTransaction()
if t.reader != nil {
txn.Write(t.reader, &options.DefaultWriteOptions)
}
if t.deleteReader != nil {
txn.Delete(t.deleteReader)
}
if t.statsBlob != nil {
txn.WriteBlob(t.statsBlob.Value, t.statsBlob.Key, false)
}
return txn.Commit()
}
func (t *SyncTaskV2) writeMeta() error {
t.storageVersion = t.space.GetCurrentVersion()
return t.metaWriter.UpdateSyncV2(t)
}
func NewSyncTaskV2() *SyncTaskV2 {
return &SyncTaskV2{
SyncTask: NewSyncTask(),
}
}
func (t *SyncTaskV2) WithChunkManager(cm storage.ChunkManager) *SyncTaskV2 {
t.chunkManager = cm
return t
}
func (t *SyncTaskV2) WithAllocator(allocator allocator.Interface) *SyncTaskV2 {
t.allocator = allocator
return t
}
func (t *SyncTaskV2) WithStartPosition(start *msgpb.MsgPosition) *SyncTaskV2 {
t.startPosition = start
return t
}
func (t *SyncTaskV2) WithCheckpoint(cp *msgpb.MsgPosition) *SyncTaskV2 {
t.checkpoint = cp
return t
}
func (t *SyncTaskV2) WithCollectionID(collID int64) *SyncTaskV2 {
t.collectionID = collID
return t
}
func (t *SyncTaskV2) WithPartitionID(partID int64) *SyncTaskV2 {
t.partitionID = partID
return t
}
func (t *SyncTaskV2) WithSegmentID(segID int64) *SyncTaskV2 {
t.segmentID = segID
return t
}
func (t *SyncTaskV2) WithChannelName(chanName string) *SyncTaskV2 {
t.channelName = chanName
return t
}
func (t *SyncTaskV2) WithSchema(schema *schemapb.CollectionSchema) *SyncTaskV2 {
t.schema = schema
return t
}
func (t *SyncTaskV2) WithTimeRange(from, to typeutil.Timestamp) *SyncTaskV2 {
t.tsFrom, t.tsTo = from, to
return t
}
func (t *SyncTaskV2) WithFlush() *SyncTaskV2 {
t.isFlush = true
return t
}
func (t *SyncTaskV2) WithDrop() *SyncTaskV2 {
t.isDrop = true
return t
}
func (t *SyncTaskV2) WithMetaCache(metacache metacache.MetaCache) *SyncTaskV2 {
t.metacache = metacache
return t
}
func (t *SyncTaskV2) WithMetaWriter(metaWriter MetaWriter) *SyncTaskV2 {
t.metaWriter = metaWriter
return t
}
func (t *SyncTaskV2) WithWriteRetryOptions(opts ...retry.Option) *SyncTaskV2 {
t.writeRetryOpts = opts
return t
}
func (t *SyncTaskV2) WithFailureCallback(callback func(error)) *SyncTaskV2 {
t.failureCallback = callback
return t
}
func (t *SyncTaskV2) WithBatchSize(batchSize int64) *SyncTaskV2 {
t.batchSize = batchSize
return t
}
func (t *SyncTaskV2) WithSpace(space *milvus_storage.Space) *SyncTaskV2 {
t.space = space
return t
}
func (t *SyncTaskV2) WithArrowSchema(arrowSchema *arrow.Schema) *SyncTaskV2 {
t.arrowSchema = arrowSchema
return t
}
func (t *SyncTaskV2) WithLevel(level datapb.SegmentLevel) *SyncTaskV2 {
t.level = level
return t
}

View File

@ -1,403 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package syncmgr
import (
"context"
"fmt"
"math/rand"
"testing"
"time"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/samber/lo"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus-storage/go/storage/schema"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/datanode/broker"
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
)
type SyncTaskSuiteV2 struct {
suite.Suite
collectionID int64
partitionID int64
segmentID int64
channelName string
metacache *metacache.MockMetaCache
allocator *allocator.MockGIDAllocator
schema *schemapb.CollectionSchema
arrowSchema *arrow.Schema
broker *broker.MockBroker
space *milvus_storage.Space
}
func (s *SyncTaskSuiteV2) SetupSuite() {
paramtable.Get().Init(paramtable.NewBaseTable())
s.collectionID = 100
s.partitionID = 101
s.segmentID = 1001
s.channelName = "by-dev-rootcoord-dml_0_100v0"
s.schema = &schemapb.CollectionSchema{
Name: "sync_task_test_col",
Fields: []*schemapb.FieldSchema{
{FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64},
{FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64},
{
FieldID: 100,
Name: "pk",
DataType: schemapb.DataType_Int64,
IsPrimaryKey: true,
},
{
FieldID: 101,
Name: "vector",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "128"},
},
},
},
}
arrowSchema, err := typeutil.ConvertToArrowSchema(s.schema.Fields)
s.NoError(err)
s.arrowSchema = arrowSchema
}
func (s *SyncTaskSuiteV2) SetupTest() {
s.allocator = allocator.NewMockGIDAllocator()
s.allocator.AllocF = func(count uint32) (int64, int64, error) {
return time.Now().Unix(), int64(count), nil
}
s.allocator.AllocOneF = func() (allocator.UniqueID, error) {
return time.Now().Unix(), nil
}
s.broker = broker.NewMockBroker(s.T())
s.metacache = metacache.NewMockMetaCache(s.T())
tmpDir := s.T().TempDir()
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
SetSchema(schema.NewSchema(s.arrowSchema, &schema.SchemaOptions{
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
})).Build())
s.Require().NoError(err)
s.space = space
}
func (s *SyncTaskSuiteV2) getEmptyInsertBuffer() *storage.InsertData {
buf, err := storage.NewInsertData(s.schema)
s.Require().NoError(err)
return buf
}
func (s *SyncTaskSuiteV2) getInsertBuffer() *storage.InsertData {
buf := s.getEmptyInsertBuffer()
// generate data
for i := 0; i < 10; i++ {
data := make(map[storage.FieldID]any)
data[common.RowIDField] = int64(i + 1)
data[common.TimeStampField] = int64(i + 1)
data[100] = int64(i + 1)
vector := lo.RepeatBy(128, func(_ int) float32 {
return rand.Float32()
})
data[101] = vector
err := buf.Append(data)
s.Require().NoError(err)
}
return buf
}
func (s *SyncTaskSuiteV2) getDeleteBuffer() *storage.DeleteData {
buf := &storage.DeleteData{}
for i := 0; i < 10; i++ {
pk := storage.NewInt64PrimaryKey(int64(i + 1))
ts := tsoutil.ComposeTSByTime(time.Now(), 0)
buf.Append(pk, ts)
}
return buf
}
func (s *SyncTaskSuiteV2) getDeleteBufferZeroTs() *storage.DeleteData {
buf := &storage.DeleteData{}
for i := 0; i < 10; i++ {
pk := storage.NewInt64PrimaryKey(int64(i + 1))
buf.Append(pk, 0)
}
return buf
}
func (s *SyncTaskSuiteV2) getSuiteSyncTask() *SyncTaskV2 {
pack := &SyncPack{}
pack.WithCollectionID(s.collectionID).
WithPartitionID(s.partitionID).
WithSegmentID(s.segmentID).
WithChannelName(s.channelName).
WithCheckpoint(&msgpb.MsgPosition{
Timestamp: 1000,
ChannelName: s.channelName,
})
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
pack.WithDeleteData(s.getDeleteBuffer())
storageCache, err := metacache.NewStorageV2Cache(s.schema)
s.Require().NoError(err)
s.metacache.EXPECT().Collection().Return(s.collectionID)
s.metacache.EXPECT().Schema().Return(s.schema)
serializer, err := NewStorageV2Serializer(storageCache, s.allocator, s.metacache, nil)
s.Require().NoError(err)
task, err := serializer.EncodeBuffer(context.Background(), pack)
s.Require().NoError(err)
taskV2, ok := task.(*SyncTaskV2)
s.Require().True(ok)
taskV2.WithMetaCache(s.metacache)
return taskV2
}
func (s *SyncTaskSuiteV2) TestRunNormal() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil)
bfs := metacache.NewBloomFilterSet()
fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{
FieldID: 101,
Name: "ID",
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
}, 16)
s.Require().NoError(err)
ids := []int64{1, 2, 3, 4, 5, 6, 7}
for _, id := range ids {
err = fd.AppendRow(id)
s.Require().NoError(err)
}
bfs.UpdatePKRange(fd)
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
metacache.UpdateNumOfRows(1000)(seg)
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
s.Run("without_insert_delete", func() {
task := s.getSuiteSyncTask()
task.WithMetaWriter(BrokerMetaWriter(s.broker, 1))
task.WithTimeRange(50, 100)
task.WithCheckpoint(&msgpb.MsgPosition{
ChannelName: s.channelName,
MsgID: []byte{1, 2, 3, 4},
Timestamp: 100,
})
err := task.Run(ctx)
s.NoError(err)
})
s.Run("with_insert_delete_cp", func() {
task := s.getSuiteSyncTask()
task.WithTimeRange(50, 100)
task.WithMetaWriter(BrokerMetaWriter(s.broker, 1))
task.WithCheckpoint(&msgpb.MsgPosition{
ChannelName: s.channelName,
MsgID: []byte{1, 2, 3, 4},
Timestamp: 100,
})
err := task.Run(ctx)
s.NoError(err)
})
}
func (s *SyncTaskSuiteV2) TestBuildRecord() {
fieldSchemas := []*schemapb.FieldSchema{
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
}
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
s.NoError(err)
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
defer b.Release()
data := &storage.InsertData{
Data: map[int64]storage.FieldData{
1: &storage.BoolFieldData{Data: []bool{true, false}},
2: &storage.Int8FieldData{Data: []int8{3, 4}},
3: &storage.Int16FieldData{Data: []int16{3, 4}},
4: &storage.Int32FieldData{Data: []int32{3, 4}},
5: &storage.Int64FieldData{Data: []int64{3, 4}},
6: &storage.FloatFieldData{Data: []float32{3, 4}},
7: &storage.DoubleFieldData{Data: []float64{3, 4}},
8: &storage.StringFieldData{Data: []string{"3", "4"}},
9: &storage.StringFieldData{Data: []string{"3", "4"}},
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
11: &storage.FloatVectorFieldData{
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
Dim: 4,
},
12: &storage.ArrayFieldData{
ElementType: schemapb.DataType_Int32,
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
},
},
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
},
},
},
},
13: &storage.JSONFieldData{
Data: [][]byte{
[]byte(`{"batch":2}`),
[]byte(`{"key":"world"}`),
},
},
14: &storage.Float16VectorFieldData{
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
Dim: 4,
},
},
}
err = typeutil.BuildRecord(b, data, fieldSchemas)
s.NoError(err)
s.EqualValues(2, b.NewRecord().NumRows())
}
func (s *SyncTaskSuiteV2) TestBuildRecordNullable() {
fieldSchemas := []*schemapb.FieldSchema{
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
}
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
s.NoError(err)
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
defer b.Release()
data := &storage.InsertData{
Data: map[int64]storage.FieldData{
1: &storage.BoolFieldData{Data: []bool{true, false}, ValidData: []bool{true, true}},
2: &storage.Int8FieldData{Data: []int8{3, 4}, ValidData: []bool{true, true}},
3: &storage.Int16FieldData{Data: []int16{3, 4}, ValidData: []bool{true, true}},
4: &storage.Int32FieldData{Data: []int32{3, 4}, ValidData: []bool{true, true}},
5: &storage.Int64FieldData{Data: []int64{3, 4}, ValidData: []bool{true, true}},
6: &storage.FloatFieldData{Data: []float32{3, 4}, ValidData: []bool{true, true}},
7: &storage.DoubleFieldData{Data: []float64{3, 4}, ValidData: []bool{true, true}},
8: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
9: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
11: &storage.FloatVectorFieldData{
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
Dim: 4,
},
12: &storage.ArrayFieldData{
ElementType: schemapb.DataType_Int32,
Data: []*schemapb.ScalarField{
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
},
},
{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
},
},
},
ValidData: []bool{true, true},
},
13: &storage.JSONFieldData{
Data: [][]byte{
[]byte(`{"batch":2}`),
[]byte(`{"key":"world"}`),
},
ValidData: []bool{true, true},
},
14: &storage.Float16VectorFieldData{
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
Dim: 4,
},
},
}
err = typeutil.BuildRecord(b, data, fieldSchemas)
s.NoError(err)
s.EqualValues(2, b.NewRecord().NumRows())
}
func TestSyncTaskV2(t *testing.T) {
suite.Run(t, new(SyncTaskSuiteV2))
}

View File

@ -19,8 +19,8 @@ type bfWriteBuffer struct {
metacache metacache.MetaCache
}
func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option)
func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
base, err := newWriteBufferBase(channel, metacache, syncMgr, option)
if err != nil {
return nil, err
}

View File

@ -13,16 +13,11 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus-storage/go/storage/schema"
"github.com/milvus-io/milvus/internal/datanode/broker"
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/mq/msgstream"
@ -41,7 +36,6 @@ type BFWriteBufferSuite struct {
metacacheInt64 *metacache.MockMetaCache
metacacheVarchar *metacache.MockMetaCache
broker *broker.MockBroker
storageV2Cache *metacache.StorageV2Cache
}
func (s *BFWriteBufferSuite) SetupSuite() {
@ -89,10 +83,6 @@ func (s *BFWriteBufferSuite) SetupSuite() {
},
},
}
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
s.Require().NoError(err)
s.storageV2Cache = storageCache
}
func (s *BFWriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
@ -201,16 +191,11 @@ func (s *BFWriteBufferSuite) SetupTest() {
s.metacacheVarchar.EXPECT().Collection().Return(s.collID).Maybe()
s.broker = broker.NewMockBroker(s.T())
var err error
s.storageV2Cache, err = metacache.NewStorageV2Cache(s.collInt64Schema)
s.Require().NoError(err)
}
func (s *BFWriteBufferSuite) TestBufferData() {
s.Run("normal_run_int64", func() {
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
s.Require().NoError(err)
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{})
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{})
s.NoError(err)
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
@ -237,9 +222,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
})
s.Run("normal_run_varchar", func() {
storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema)
s.Require().NoError(err)
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{})
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{})
s.NoError(err)
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
@ -261,9 +244,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
})
s.Run("int_pk_type_not_match", func() {
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
s.Require().NoError(err)
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{})
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{})
s.NoError(err)
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
@ -281,9 +262,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
})
s.Run("varchar_pk_not_match", func() {
storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema)
s.Require().NoError(err)
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{})
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{})
s.NoError(err)
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
@ -305,7 +284,7 @@ func (s *BFWriteBufferSuite) TestAutoSync() {
paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1")
s.Run("normal_auto_sync", func() {
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, nil, s.syncMgr, &writeBufferOption{
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{
syncPolicies: []SyncPolicy{
GetFullBufferPolicy(),
GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)),
@ -340,92 +319,11 @@ func (s *BFWriteBufferSuite) TestAutoSync() {
})
}
func (s *BFWriteBufferSuite) TestBufferDataWithStorageV2() {
params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true")
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
params.Params.CommonCfg.StorageScheme.SwapTempValue("file")
tmpDir := s.T().TempDir()
arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields)
s.Require().NoError(err)
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
})).Build())
s.Require().NoError(err)
s.storageV2Cache.SetSpace(1000, space)
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{})
s.NoError(err)
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false)
s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return()
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64)
delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
s.NoError(err)
}
func (s *BFWriteBufferSuite) TestAutoSyncWithStorageV2() {
params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true")
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1")
tmpDir := s.T().TempDir()
arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields)
s.Require().NoError(err)
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
})).Build())
s.Require().NoError(err)
s.storageV2Cache.SetSpace(1002, space)
s.Run("normal_auto_sync", func() {
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{
syncPolicies: []SyncPolicy{
GetFullBufferPolicy(),
GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)),
GetSealedSegmentsPolicy(s.metacacheInt64),
},
})
s.NoError(err)
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1002}, metacache.NewBloomFilterSet())
segCompacted := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg, segCompacted})
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false).Once()
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once()
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true)
s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002})
s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return()
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return()
s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything, mock.Anything).Return(nil)
pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64)
delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
metrics.DataNodeFlowGraphBufferDataSize.Reset()
err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
s.NoError(err)
value, err := metrics.DataNodeFlowGraphBufferDataSize.GetMetricWithLabelValues(fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(s.metacacheInt64.Collection()))
s.NoError(err)
s.MetricsEqual(value, 0)
})
}
func (s *BFWriteBufferSuite) TestCreateFailure() {
metacache := metacache.NewMockMetaCache(s.T())
metacache.EXPECT().Collection().Return(s.collID)
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
_, err := NewBFWriteBuffer(s.channelName, metacache, s.storageV2Cache, s.syncMgr, &writeBufferOption{})
_, err := NewBFWriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{})
s.Error(err)
}

View File

@ -33,11 +33,11 @@ type l0WriteBuffer struct {
idAllocator allocator.Interface
}
func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
if option.idAllocator == nil {
return nil, merr.WrapErrServiceInternal("id allocator is nil when creating l0 write buffer")
}
base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option)
base, err := newWriteBufferBase(channel, metacache, syncMgr, option)
if err != nil {
return nil, err
}

View File

@ -28,13 +28,12 @@ import (
type L0WriteBufferSuite struct {
testutils.PromMetricsSuite
channelName string
collID int64
collSchema *schemapb.CollectionSchema
syncMgr *syncmgr.MockSyncManager
metacache *metacache.MockMetaCache
allocator *allocator.MockGIDAllocator
storageCache *metacache.StorageV2Cache
channelName string
collID int64
collSchema *schemapb.CollectionSchema
syncMgr *syncmgr.MockSyncManager
metacache *metacache.MockMetaCache
allocator *allocator.MockGIDAllocator
}
func (s *L0WriteBufferSuite) SetupSuite() {
@ -61,10 +60,6 @@ func (s *L0WriteBufferSuite) SetupSuite() {
},
}
s.channelName = "by-dev-rootcoord-dml_0v0"
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
s.Require().NoError(err)
s.storageCache = storageCache
}
func (s *L0WriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
@ -173,7 +168,7 @@ func (s *L0WriteBufferSuite) SetupTest() {
func (s *L0WriteBufferSuite) TestBufferData() {
s.Run("normal_run", func() {
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
idAllocator: s.allocator,
})
s.NoError(err)
@ -202,7 +197,7 @@ func (s *L0WriteBufferSuite) TestBufferData() {
})
s.Run("pk_type_not_match", func() {
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
idAllocator: s.allocator,
})
s.NoError(err)
@ -225,7 +220,7 @@ func (s *L0WriteBufferSuite) TestCreateFailure() {
metacache := metacache.NewMockMetaCache(s.T())
metacache.EXPECT().Collection().Return(s.collID)
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
_, err := NewL0WriteBuffer(s.channelName, metacache, s.storageCache, s.syncMgr, &writeBufferOption{
_, err := NewL0WriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{
idAllocator: s.allocator,
})
s.Error(err)

View File

@ -23,7 +23,7 @@ import (
//go:generate mockery --name=BufferManager --structname=MockBufferManager --output=./ --filename=mock_manager.go --with-expecter --inpackage
type BufferManager interface {
// Register adds a WriteBuffer with provided schema & options.
Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error
Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error
// SealSegments notifies writeBuffer corresponding to provided channel to seal segments.
// which will cause segment start flush procedure.
SealSegments(ctx context.Context, channel string, segmentIDs []int64) error
@ -140,7 +140,7 @@ func (m *bufferManager) Stop() {
}
// Register a new WriteBuffer for channel.
func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error {
func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error {
m.mut.Lock()
defer m.mut.Unlock()
@ -148,7 +148,7 @@ func (m *bufferManager) Register(channel string, metacache metacache.MetaCache,
if ok {
return merr.WrapErrChannelReduplicate(channel)
}
buf, err := NewWriteBuffer(channel, metacache, storageV2Cache, m.syncMgr, opts...)
buf, err := NewWriteBuffer(channel, metacache, m.syncMgr, opts...)
if err != nil {
return err
}

View File

@ -73,13 +73,10 @@ func (s *ManagerSuite) SetupTest() {
func (s *ManagerSuite) TestRegister() {
manager := s.manager
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
s.Require().NoError(err)
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
s.NoError(err)
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
err = manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
s.Error(err)
s.ErrorIs(err, merr.ErrChannelReduplicate)
}
@ -183,9 +180,7 @@ func (s *ManagerSuite) TestRemoveChannel() {
})
s.Run("remove_channel", func() {
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
s.Require().NoError(err)
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
s.Require().NoError(err)
s.NotPanics(func() {

View File

@ -1,4 +1,4 @@
// Code generated by mockery v2.30.1. DO NOT EDIT.
// Code generated by mockery v2.32.4. DO NOT EDIT.
package writebuffer
@ -278,20 +278,20 @@ func (_c *MockBufferManager_NotifyCheckpointUpdated_Call) RunAndReturn(run func(
return _c
}
// Register provides a mock function with given fields: channel, _a1, storageV2Cache, opts
func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error {
// Register provides a mock function with given fields: channel, _a1, opts
func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption) error {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, channel, _a1, storageV2Cache)
_ca = append(_ca, channel, _a1)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 error
if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error); ok {
r0 = rf(channel, _a1, storageV2Cache, opts...)
if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, ...WriteBufferOption) error); ok {
r0 = rf(channel, _a1, opts...)
} else {
r0 = ret.Error(0)
}
@ -307,22 +307,21 @@ type MockBufferManager_Register_Call struct {
// Register is a helper method to define mock.On call
// - channel string
// - _a1 metacache.MetaCache
// - storageV2Cache *metacache.StorageV2Cache
// - opts ...WriteBufferOption
func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, storageV2Cache interface{}, opts ...interface{}) *MockBufferManager_Register_Call {
func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, opts ...interface{}) *MockBufferManager_Register_Call {
return &MockBufferManager_Register_Call{Call: _e.mock.On("Register",
append([]interface{}{channel, _a1, storageV2Cache}, opts...)...)}
append([]interface{}{channel, _a1}, opts...)...)}
}
func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call {
func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]WriteBufferOption, len(args)-3)
for i, a := range args[3:] {
variadicArgs := make([]WriteBufferOption, len(args)-2)
for i, a := range args[2:] {
if a != nil {
variadicArgs[i] = a.(WriteBufferOption)
}
}
run(args[0].(string), args[1].(metacache.MetaCache), args[2].(*metacache.StorageV2Cache), variadicArgs...)
run(args[0].(string), args[1].(metacache.MetaCache), variadicArgs...)
})
return _c
}
@ -332,7 +331,7 @@ func (_c *MockBufferManager_Register_Call) Return(_a0 error) *MockBufferManager_
return _c
}
func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error) *MockBufferManager_Register_Call {
func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, ...WriteBufferOption) error) *MockBufferManager_Register_Call {
_c.Call.Return(run)
return _c
}

View File

@ -16,7 +16,6 @@ import (
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -100,7 +99,7 @@ func (c *checkpointCandidates) GetEarliestWithDefault(def *checkpointCandidate)
return result
}
func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) {
func NewWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) {
option := defaultWBOption(metacache)
for _, opt := range opts {
opt(option)
@ -108,9 +107,9 @@ func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cach
switch option.deletePolicy {
case DeletePolicyBFPkOracle:
return NewBFWriteBuffer(channel, metacache, storageV2Cache, syncMgr, option)
return NewBFWriteBuffer(channel, metacache, syncMgr, option)
case DeletePolicyL0Delta:
return NewL0WriteBuffer(channel, metacache, storageV2Cache, syncMgr, option)
return NewL0WriteBuffer(channel, metacache, syncMgr, option)
default:
return nil, merr.WrapErrParameterInvalid("valid delete policy config", option.deletePolicy)
}
@ -140,34 +139,23 @@ type writeBufferBase struct {
checkpoint *msgpb.MsgPosition
flushTimestamp *atomic.Uint64
storagev2Cache *metacache.StorageV2Cache
// pre build logger
logger *log.MLogger
cpRatedLogger *log.MLogger
}
func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) {
func newWriteBufferBase(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) {
flushTs := atomic.NewUint64(nonFlushTS)
flushTsPolicy := GetFlushTsPolicy(flushTs, metacache)
option.syncPolicies = append(option.syncPolicies, flushTsPolicy)
var serializer syncmgr.Serializer
var err error
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
serializer, err = syncmgr.NewStorageV2Serializer(
storageV2Cache,
option.idAllocator,
metacache,
option.metaWriter,
)
} else {
serializer, err = syncmgr.NewStorageSerializer(
option.idAllocator,
metacache,
option.metaWriter,
)
}
serializer, err = syncmgr.NewStorageSerializer(
option.idAllocator,
metacache,
option.metaWriter,
)
if err != nil {
return nil, err
}
@ -201,7 +189,6 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2
syncCheckpoint: newCheckpointCandiates(),
syncPolicies: option.syncPolicies,
flushTimestamp: flushTs,
storagev2Cache: storageV2Cache,
}
wb.logger = log.With(zap.Int64("collectionID", wb.collectionID),
@ -660,8 +647,6 @@ func (wb *writeBufferBase) Close(ctx context.Context, drop bool) {
switch t := syncTask.(type) {
case *syncmgr.SyncTask:
t.WithDrop()
case *syncmgr.SyncTaskV2:
t.WithDrop()
}
f := wb.syncMgr.SyncData(ctx, syncTask, func(err error) error {

View File

@ -22,13 +22,12 @@ import (
type WriteBufferSuite struct {
suite.Suite
collID int64
channelName string
collSchema *schemapb.CollectionSchema
wb *writeBufferBase
syncMgr *syncmgr.MockSyncManager
metacache *metacache.MockMetaCache
storageCache *metacache.StorageV2Cache
collID int64
channelName string
collSchema *schemapb.CollectionSchema
wb *writeBufferBase
syncMgr *syncmgr.MockSyncManager
metacache *metacache.MockMetaCache
}
func (s *WriteBufferSuite) SetupSuite() {
@ -47,14 +46,12 @@ func (s *WriteBufferSuite) SetupSuite() {
}
func (s *WriteBufferSuite) SetupTest() {
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
s.Require().NoError(err)
s.storageCache = storageCache
s.syncMgr = syncmgr.NewMockSyncManager(s.T())
s.metacache = metacache.NewMockMetaCache(s.T())
s.metacache.EXPECT().Schema().Return(s.collSchema).Maybe()
s.metacache.EXPECT().Collection().Return(s.collID).Maybe()
s.wb, err = newWriteBufferBase(s.channelName, s.metacache, storageCache, s.syncMgr, &writeBufferOption{
var err error
s.wb, err = newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
return metacache.NewBloomFilterSet()
},
@ -66,7 +63,7 @@ func (s *WriteBufferSuite) TestDefaultOption() {
s.Run("default BFPkOracle", func() {
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "false")
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr)
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr)
s.NoError(err)
_, ok := wb.(*bfWriteBuffer)
s.True(ok)
@ -75,7 +72,7 @@ func (s *WriteBufferSuite) TestDefaultOption() {
s.Run("default L0Delta policy", func() {
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "true")
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator()))
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator()))
s.NoError(err)
_, ok := wb.(*l0WriteBuffer)
s.True(ok)
@ -83,18 +80,18 @@ func (s *WriteBufferSuite) TestDefaultOption() {
}
func (s *WriteBufferSuite) TestWriteBufferType() {
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
s.NoError(err)
_, ok := wb.(*bfWriteBuffer)
s.True(ok)
wb, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator()))
wb, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator()))
s.NoError(err)
_, ok = wb.(*l0WriteBuffer)
s.True(ok)
_, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(""))
_, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(""))
s.Error(err)
}
@ -114,7 +111,7 @@ func (s *WriteBufferSuite) TestFlushSegments() {
s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return()
s.metacache.EXPECT().GetSegmentByID(mock.Anything, mock.Anything, mock.Anything).Return(nil, true)
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
s.NoError(err)
err = wb.SealSegments(context.Background(), []int64{segmentID})
@ -265,7 +262,7 @@ func (s *WriteBufferSuite) TestGetCheckpoint() {
}
func (s *WriteBufferSuite) TestSyncSegmentsError() {
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
return metacache.NewBloomFilterSet()
},
@ -298,7 +295,7 @@ func (s *WriteBufferSuite) TestSyncSegmentsError() {
}
func (s *WriteBufferSuite) TestEvictBuffer() {
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
return metacache.NewBloomFilterSet()
},
@ -367,7 +364,7 @@ func (s *WriteBufferSuite) TestEvictBuffer() {
}
func (s *WriteBufferSuite) TestDropPartitions() {
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
return metacache.NewBloomFilterSet()
},

View File

@ -97,12 +97,7 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
return merr.Status(err), nil
}
var task task
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
task = newIndexBuildTaskV2(taskCtx, taskCancel, req, i)
} else {
task = newIndexBuildTask(taskCtx, taskCancel, req, cm, i)
}
task := newIndexBuildTask(taskCtx, taskCancel, req, cm, i)
ret := merr.Success()
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
log.Warn("IndexNode failed to schedule",
@ -327,12 +322,7 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
return merr.Status(err), nil
}
var task task
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
task = newIndexBuildTaskV2(taskCtx, taskCancel, indexRequest, i)
} else {
task = newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i)
}
task := newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i)
ret := merr.Success()
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
log.Warn("IndexNode failed to schedule",

View File

@ -43,187 +43,6 @@ import (
"github.com/milvus-io/milvus/pkg/util/timerecord"
)
type indexBuildTaskV2 struct {
*indexBuildTask
}
func newIndexBuildTaskV2(ctx context.Context,
cancel context.CancelFunc,
req *indexpb.CreateJobRequest,
node *IndexNode,
) *indexBuildTaskV2 {
t := &indexBuildTaskV2{
indexBuildTask: &indexBuildTask{
ident: fmt.Sprintf("%s/%d", req.GetClusterID(), req.GetBuildID()),
cancel: cancel,
ctx: ctx,
req: req,
tr: timerecord.NewTimeRecorder(fmt.Sprintf("IndexBuildID: %d, ClusterID: %s", req.GetBuildID(), req.GetClusterID())),
node: node,
},
}
t.parseParams()
return t
}
func (it *indexBuildTaskV2) parseParams() {
// fill field for requests before v2.5.0
if it.req.GetField() == nil || it.req.GetField().GetDataType() == schemapb.DataType_None {
it.req.Field = &schemapb.FieldSchema{
FieldID: it.req.GetFieldID(),
Name: it.req.GetFieldName(),
DataType: it.req.GetFieldType(),
}
}
}
func (it *indexBuildTaskV2) Execute(ctx context.Context) error {
log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()),
zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()),
zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion()))
indexType := it.newIndexParams[common.IndexTypeKey]
if indexType == indexparamcheck.IndexDISKANN {
// check index node support disk index
if !Params.IndexNodeCfg.EnableDisk.GetAsBool() {
log.Warn("IndexNode don't support build disk index",
zap.String("index type", it.newIndexParams[common.IndexTypeKey]),
zap.Bool("enable disk", Params.IndexNodeCfg.EnableDisk.GetAsBool()))
return merr.WrapErrIndexNotSupported("disk index")
}
// check load size and size of field data
localUsedSize, err := indexcgowrapper.GetLocalUsedSize(paramtable.Get().LocalStorageCfg.Path.GetValue())
if err != nil {
log.Warn("IndexNode get local used size failed")
return err
}
fieldDataSize, err := estimateFieldDataSize(it.req.GetDim(), it.req.GetNumRows(), it.req.GetField().GetDataType())
if err != nil {
log.Warn("IndexNode get local used size failed")
return err
}
usedLocalSizeWhenBuild := int64(float64(fieldDataSize)*diskUsageRatio) + localUsedSize
maxUsedLocalSize := int64(Params.IndexNodeCfg.DiskCapacityLimit.GetAsFloat() * Params.IndexNodeCfg.MaxDiskUsagePercentage.GetAsFloat())
if usedLocalSizeWhenBuild > maxUsedLocalSize {
log.Warn("IndexNode don't has enough disk size to build disk ann index",
zap.Int64("usedLocalSizeWhenBuild", usedLocalSizeWhenBuild),
zap.Int64("maxUsedLocalSize", maxUsedLocalSize))
return merr.WrapErrServiceDiskLimitExceeded(float32(usedLocalSizeWhenBuild), float32(maxUsedLocalSize))
}
err = indexparams.SetDiskIndexBuildParams(it.newIndexParams, int64(fieldDataSize))
if err != nil {
log.Warn("failed to fill disk index params", zap.Error(err))
return err
}
}
storageConfig := &indexcgopb.StorageConfig{
Address: it.req.GetStorageConfig().GetAddress(),
AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(),
SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(),
UseSSL: it.req.GetStorageConfig().GetUseSSL(),
BucketName: it.req.GetStorageConfig().GetBucketName(),
RootPath: it.req.GetStorageConfig().GetRootPath(),
UseIAM: it.req.GetStorageConfig().GetUseIAM(),
IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(),
StorageType: it.req.GetStorageConfig().GetStorageType(),
UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(),
Region: it.req.GetStorageConfig().GetRegion(),
CloudProvider: it.req.GetStorageConfig().GetCloudProvider(),
RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(),
SslCACert: it.req.GetStorageConfig().GetSslCACert(),
}
optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields()))
for _, optField := range it.req.GetOptionalScalarFields() {
optFields = append(optFields, &indexcgopb.OptionalFieldInfo{
FieldID: optField.GetFieldID(),
FieldName: optField.GetFieldName(),
FieldType: optField.GetFieldType(),
DataPaths: optField.GetDataPaths(),
})
}
buildIndexParams := &indexcgopb.BuildIndexInfo{
ClusterID: it.req.GetClusterID(),
BuildID: it.req.GetBuildID(),
CollectionID: it.req.GetCollectionID(),
PartitionID: it.req.GetPartitionID(),
SegmentID: it.req.GetSegmentID(),
IndexVersion: it.req.GetIndexVersion(),
CurrentIndexVersion: it.req.GetCurrentIndexVersion(),
NumRows: it.req.GetNumRows(),
Dim: it.req.GetDim(),
IndexFilePrefix: it.req.GetIndexFilePrefix(),
InsertFiles: it.req.GetDataPaths(),
FieldSchema: it.req.GetField(),
StorageConfig: storageConfig,
IndexParams: mapToKVPairs(it.newIndexParams),
TypeParams: mapToKVPairs(it.newTypeParams),
StorePath: it.req.GetStorePath(),
StoreVersion: it.req.GetStoreVersion(),
IndexStorePath: it.req.GetIndexStorePath(),
OptFields: optFields,
PartitionKeyIsolation: it.req.GetPartitionKeyIsolation(),
}
var err error
it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexParams)
if err != nil {
if it.index != nil && it.index.CleanLocalData() != nil {
log.Warn("failed to clean cached data on disk after build index failed")
}
log.Warn("failed to build index", zap.Error(err))
return err
}
buildIndexLatency := it.tr.RecordSpan()
metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(buildIndexLatency.Milliseconds()))
log.Info("Successfully build index")
return nil
}
func (it *indexBuildTaskV2) PostExecute(ctx context.Context) error {
log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()),
zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()),
zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion()))
gcIndex := func() {
if err := it.index.Delete(); err != nil {
log.Warn("IndexNode indexBuildTask Execute CIndexDelete failed", zap.Error(err))
}
}
version, err := it.index.UpLoadV2()
if err != nil {
log.Warn("failed to upload index", zap.Error(err))
gcIndex()
return err
}
encodeIndexFileDur := it.tr.Record("index serialize and upload done")
metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(encodeIndexFileDur.Seconds())
// early release index for gc, and we can ensure that Delete is idempotent.
gcIndex()
// use serialized size before encoding
var serializedSize uint64
saveFileKeys := make([]string, 0)
it.node.storeIndexFilesAndStatisticV2(it.req.GetClusterID(), it.req.GetBuildID(), saveFileKeys, serializedSize, it.req.GetCurrentIndexVersion(), version)
log.Debug("save index files done", zap.Strings("IndexFiles", saveFileKeys))
saveIndexFileDur := it.tr.RecordSpan()
metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(saveIndexFileDur.Seconds())
it.tr.Elapse("index building all done")
log.Info("Successfully save index files")
return nil
}
// IndexBuildTask is used to record the information of the index tasks.
type indexBuildTask struct {
ident string

View File

@ -20,21 +20,14 @@ import (
"context"
"testing"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus-storage/go/storage/schema"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/metautil"
"github.com/milvus-io/milvus/pkg/util/metric"
@ -139,105 +132,6 @@ func TestIndexBuildTask(t *testing.T) {
suite.Run(t, new(IndexBuildTaskSuite))
}
type IndexBuildTaskV2Suite struct {
suite.Suite
schema *schemapb.CollectionSchema
arrowSchema *arrow.Schema
space *milvus_storage.Space
}
func (suite *IndexBuildTaskV2Suite) SetupSuite() {
paramtable.Init()
}
func (suite *IndexBuildTaskV2Suite) SetupTest() {
suite.schema = &schemapb.CollectionSchema{
Name: "test",
Description: "test",
AutoID: false,
Fields: []*schemapb.FieldSchema{
{FieldID: 1, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
{FieldID: 2, Name: "ts", DataType: schemapb.DataType_Int64},
{FieldID: 3, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
},
}
var err error
suite.arrowSchema, err = typeutil.ConvertToArrowSchema(suite.schema.Fields)
suite.NoError(err)
tmpDir := suite.T().TempDir()
opt := options.NewSpaceOptionBuilder().
SetSchema(schema.NewSchema(
suite.arrowSchema,
&schema.SchemaOptions{
PrimaryColumn: "pk",
VectorColumn: "vec",
VersionColumn: "ts",
})).
Build()
suite.space, err = milvus_storage.Open("file://"+tmpDir, opt)
suite.NoError(err)
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.arrowSchema)
defer b.Release()
b.Field(0).(*array.Int64Builder).AppendValues([]int64{1}, nil)
b.Field(1).(*array.Int64Builder).AppendValues([]int64{1}, nil)
fb := b.Field(2).(*array.FixedSizeBinaryBuilder)
fb.Reserve(1)
fb.Append([]byte{1, 2, 3, 4})
rec := b.NewRecord()
defer rec.Release()
reader, err := array.NewRecordReader(suite.arrowSchema, []arrow.Record{rec})
suite.NoError(err)
err = suite.space.Write(reader, &options.DefaultWriteOptions)
suite.NoError(err)
}
func (suite *IndexBuildTaskV2Suite) TestBuildIndex() {
req := &indexpb.CreateJobRequest{
BuildID: 1,
IndexVersion: 1,
IndexID: 0,
IndexName: "",
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}, {Key: common.DimKey, Value: "1"}},
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}},
NumRows: 10,
StorageConfig: &indexpb.StorageConfig{
RootPath: "/tmp/milvus/data",
StorageType: "local",
},
CollectionID: 1,
PartitionID: 1,
SegmentID: 1,
FieldID: 3,
FieldName: "vec",
FieldType: schemapb.DataType_FloatVector,
StorePath: "file://" + suite.space.Path(),
StoreVersion: suite.space.GetCurrentVersion(),
IndexStorePath: "file://" + suite.space.Path(),
Dim: 4,
OptionalScalarFields: []*indexpb.OptionalFieldInfo{
{FieldID: 1, FieldName: "pk", FieldType: 5, DataIds: []int64{0}},
},
}
task := newIndexBuildTaskV2(context.Background(), nil, req, NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)))
var err error
err = task.PreExecute(context.Background())
suite.NoError(err)
err = task.Execute(context.Background())
suite.NoError(err)
err = task.PostExecute(context.Background())
suite.NoError(err)
}
func TestIndexBuildTaskV2Suite(t *testing.T) {
suite.Run(t, new(IndexBuildTaskV2Suite))
}
type AnalyzeTaskSuite struct {
suite.Suite
schema *schemapb.CollectionSchema

View File

@ -222,13 +222,9 @@ func (li *LoadIndexInfo) appendIndexData(ctx context.Context, indexKeys []string
var status C.CStatus
GetLoadPool().Submit(func() (any, error) {
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
status = C.AppendIndexV3(li.cLoadIndexInfo)
} else {
traceCtx := ParseCTraceContext(ctx)
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
runtime.KeepAlive(traceCtx)
}
traceCtx := ParseCTraceContext(ctx)
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
runtime.KeepAlive(traceCtx)
return nil, nil
}).Await()
@ -265,13 +261,9 @@ func (li *LoadIndexInfo) finish(ctx context.Context, info *cgopb.LoadIndexInfo)
}
_, _ = GetLoadPool().Submit(func() (any, error) {
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
status = C.AppendIndexV3(li.cLoadIndexInfo)
} else {
traceCtx := ParseCTraceContext(ctx)
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
runtime.KeepAlive(traceCtx)
}
traceCtx := ParseCTraceContext(ctx)
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
runtime.KeepAlive(traceCtx)
return nil, nil
}).Await()

View File

@ -29,12 +29,10 @@ import "C"
import (
"context"
"fmt"
"io"
"runtime"
"strings"
"unsafe"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/cockroachdb/errors"
"go.opentelemetry.io/otel"
"go.uber.org/atomic"
@ -44,8 +42,6 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus/internal/proto/cgopb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
@ -55,7 +51,6 @@ import (
"github.com/milvus-io/milvus/internal/querynodev2/segments/state"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/cgo"
typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -259,7 +254,6 @@ type LocalSegment struct {
lastDeltaTimestamp *atomic.Uint64
fields *typeutil.ConcurrentMap[int64, *FieldInfo]
fieldIndexes *typeutil.ConcurrentMap[int64, *IndexedFieldInfo]
space *milvus_storage.Space
}
func NewSegment(ctx context.Context,
@ -336,76 +330,6 @@ func NewSegment(ctx context.Context,
return segment, nil
}
func NewSegmentV2(
ctx context.Context,
collection *Collection,
segmentType SegmentType,
version int64,
loadInfo *querypb.SegmentLoadInfo,
) (Segment, error) {
/*
CSegmentInterface
NewSegment(CCollection collection, uint64_t segment_id, SegmentType seg_type);
*/
if loadInfo.GetLevel() == datapb.SegmentLevel_L0 {
return NewL0Segment(collection, segmentType, version, loadInfo)
}
base, err := newBaseSegment(collection, segmentType, version, loadInfo)
if err != nil {
return nil, err
}
var segmentPtr C.CSegmentInterface
var status C.CStatus
var locker *state.LoadStateLock
switch segmentType {
case SegmentTypeSealed:
status = C.NewSegment(collection.collectionPtr, C.Sealed, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr)
locker = state.NewLoadStateLock(state.LoadStateOnlyMeta)
case SegmentTypeGrowing:
status = C.NewSegment(collection.collectionPtr, C.Growing, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr)
locker = state.NewLoadStateLock(state.LoadStateDataLoaded)
default:
return nil, fmt.Errorf("illegal segment type %d when create segment %d", segmentType, loadInfo.GetSegmentID())
}
if err := HandleCStatus(ctx, &status, "NewSegmentFailed"); err != nil {
return nil, err
}
log.Info("create segment",
zap.Int64("collectionID", loadInfo.GetCollectionID()),
zap.Int64("partitionID", loadInfo.GetPartitionID()),
zap.Int64("segmentID", loadInfo.GetSegmentID()),
zap.String("segmentType", segmentType.String()))
url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), loadInfo.GetSegmentID())
if err != nil {
return nil, err
}
space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(loadInfo.GetStorageVersion()).Build())
if err != nil {
return nil, err
}
segment := &LocalSegment{
baseSegment: base,
ptrLock: locker,
ptr: segmentPtr,
lastDeltaTimestamp: atomic.NewUint64(0),
fields: typeutil.NewConcurrentMap[int64, *FieldInfo](),
fieldIndexes: typeutil.NewConcurrentMap[int64, *IndexedFieldInfo](),
space: space,
memSize: atomic.NewInt64(-1),
rowNum: atomic.NewInt64(-1),
insertCount: atomic.NewInt64(0),
}
if err := segment.initializeSegment(); err != nil {
return nil, err
}
return segment, nil
}
func (s *LocalSegment) initializeSegment() error {
loadInfo := s.loadInfo.Load()
indexedFieldInfos, fieldBinlogs := separateIndexAndBinlog(loadInfo)
@ -932,18 +856,7 @@ func (s *LocalSegment) LoadMultiFieldData(ctx context.Context) error {
var status C.CStatus
GetLoadPool().Submit(func() (any, error) {
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
if err != nil {
return nil, err
}
loadFieldDataInfo.appendURI(uri)
loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion())
status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
} else {
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
}
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
return nil, nil
}).Await()
if err := HandleCStatus(ctx, &status, "LoadMultiFieldData failed",
@ -1019,18 +932,7 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun
var status C.CStatus
GetLoadPool().Submit(func() (any, error) {
log.Info("submitted loadFieldData task to load pool")
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
if err != nil {
return nil, err
}
loadFieldDataInfo.appendURI(uri)
loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion())
status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
} else {
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
}
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
return nil, nil
}).Await()
if err := HandleCStatus(ctx, &status, "LoadFieldData failed",
@ -1046,95 +948,6 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun
return nil
}
func (s *LocalSegment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error {
deleteReader, err := s.space.ScanDelete()
if err != nil {
return err
}
if !deleteReader.Schema().HasField(common.TimeStampFieldName) {
return fmt.Errorf("can not read timestamp field in space")
}
pkFieldSchema, err := typeutil.GetPrimaryFieldSchema(schema)
if err != nil {
return err
}
ids := &schemapb.IDs{}
var pkint64s []int64
var pkstrings []string
var tss []int64
for deleteReader.Next() {
rec := deleteReader.Record()
indices := rec.Schema().FieldIndices(common.TimeStampFieldName)
tss = append(tss, rec.Column(indices[0]).(*array.Int64).Int64Values()...)
indices = rec.Schema().FieldIndices(pkFieldSchema.Name)
switch pkFieldSchema.DataType {
case schemapb.DataType_Int64:
pkint64s = append(pkint64s, rec.Column(indices[0]).(*array.Int64).Int64Values()...)
case schemapb.DataType_VarChar:
columnData := rec.Column(indices[0]).(*array.String)
for i := 0; i < columnData.Len(); i++ {
pkstrings = append(pkstrings, columnData.Value(i))
}
default:
return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType)
}
}
if err := deleteReader.Err(); err != nil && err != io.EOF {
return err
}
switch pkFieldSchema.DataType {
case schemapb.DataType_Int64:
ids.IdField = &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: pkint64s,
},
}
case schemapb.DataType_VarChar:
ids.IdField = &schemapb.IDs_StrId{
StrId: &schemapb.StringArray{
Data: pkstrings,
},
}
default:
return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType)
}
idsBlob, err := proto.Marshal(ids)
if err != nil {
return err
}
if len(tss) == 0 {
return nil
}
loadInfo := C.CLoadDeletedRecordInfo{
timestamps: unsafe.Pointer(&tss[0]),
primary_keys: (*C.uint8_t)(unsafe.Pointer(&idsBlob[0])),
primary_keys_size: C.uint64_t(len(idsBlob)),
row_count: C.int64_t(len(tss)),
}
/*
CStatus
LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_record_info)
*/
var status C.CStatus
GetDynamicPool().Submit(func() (any, error) {
status = C.LoadDeletedRecord(s.ptr, loadInfo)
return nil, nil
}).Await()
if err := HandleCStatus(ctx, &status, "LoadDeletedRecord failed"); err != nil {
return err
}
log.Info("load deleted record done",
zap.Int("rowNum", len(tss)),
zap.String("segmentType", s.Type().String()))
return nil
}
func (s *LocalSegment) AddFieldDataInfo(ctx context.Context, rowCount int64, fields []*datapb.FieldBinlog) error {
if !s.ptrLock.RLockIf(state.IsNotReleased) {
return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released")
@ -1331,13 +1144,6 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn
IndexStoreVersion: indexInfo.GetIndexStoreVersion(),
}
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
if err != nil {
return err
}
indexInfoProto.Uri = uri
}
newLoadIndexInfoSpan := tr.RecordSpan()
// 2.

View File

@ -20,7 +20,6 @@ import (
"context"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/proto/segcorepb"
@ -79,7 +78,6 @@ type Segment interface {
Insert(ctx context.Context, rowIDs []int64, timestamps []typeutil.Timestamp, record *segcorepb.InsertRecord) error
Delete(ctx context.Context, primaryKeys []storage.PrimaryKey, timestamps []typeutil.Timestamp) error
LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error
LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error // storageV2
LastDeltaTimestamp() uint64
Release(ctx context.Context, opts ...releaseOption)

View File

@ -23,7 +23,6 @@ import (
"github.com/samber/lo"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/proto/segcorepb"
@ -161,10 +160,6 @@ func (s *L0Segment) LoadDeltaData(ctx context.Context, deltaData *storage.Delete
return nil
}
func (s *L0Segment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error {
return merr.WrapErrServiceInternal("not implemented")
}
func (s *L0Segment) DeleteRecords() ([]storage.PrimaryKey, []uint64) {
s.dataGuard.RLock()
defer s.dataGuard.RUnlock()

View File

@ -27,7 +27,6 @@ import "C"
import (
"context"
"fmt"
"io"
"path"
"runtime/debug"
"strconv"
@ -43,14 +42,11 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querynodev2/pkoracle"
"github.com/milvus-io/milvus/internal/storage"
typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -126,406 +122,6 @@ type resourceEstimateFactor struct {
deltaDataExpansionFactor float64
}
type segmentLoaderV2 struct {
*segmentLoader
}
func NewLoaderV2(
manager *Manager,
cm storage.ChunkManager,
) *segmentLoaderV2 {
return &segmentLoaderV2{
segmentLoader: NewLoader(manager, cm),
}
}
func (loader *segmentLoaderV2) LoadDelta(ctx context.Context, collectionID int64, segment Segment) error {
collection := loader.manager.Collection.Get(collectionID)
if collection == nil {
err := merr.WrapErrCollectionNotFound(collectionID)
log.Warn("failed to get collection while loading delta", zap.Error(err))
return err
}
return segment.LoadDeltaData2(ctx, collection.Schema())
}
func (loader *segmentLoaderV2) Load(ctx context.Context,
collectionID int64,
segmentType SegmentType,
version int64,
segments ...*querypb.SegmentLoadInfo,
) ([]Segment, error) {
log := log.Ctx(ctx).With(
zap.Int64("collectionID", collectionID),
zap.String("segmentType", segmentType.String()),
)
if len(segments) == 0 {
log.Info("no segment to load")
return nil, nil
}
// Filter out loaded & loading segments
infos := loader.prepare(ctx, segmentType, segments...)
defer loader.unregister(infos...)
log = log.With(
zap.Int64s("requestSegments", lo.Map(segments, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
zap.Int64s("preparedSegments", lo.Map(infos, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
)
// continue to wait other task done
log.Info("start loading...", zap.Int("segmentNum", len(segments)), zap.Int("afterFilter", len(infos)))
// Check memory & storage limit
requestResourceResult, err := loader.requestResource(ctx, infos...)
if err != nil {
log.Warn("request resource failed", zap.Error(err))
return nil, err
}
defer loader.freeRequest(requestResourceResult.Resource)
newSegments := typeutil.NewConcurrentMap[int64, Segment]()
loaded := typeutil.NewConcurrentMap[int64, Segment]()
defer func() {
newSegments.Range(func(_ int64, s Segment) bool {
s.Release(context.Background())
return true
})
debug.FreeOSMemory()
}()
for _, info := range infos {
loadInfo := info
collection := loader.manager.Collection.Get(loadInfo.GetCollectionID())
if collection == nil {
err := merr.WrapErrCollectionNotFound(loadInfo.GetCollectionID())
log.Warn("failed to get collection", zap.Error(err))
return nil, err
}
segment, err := NewSegmentV2(ctx, collection, segmentType, version, loadInfo)
if err != nil {
log.Warn("load segment failed when create new segment",
zap.Int64("partitionID", loadInfo.GetPartitionID()),
zap.Int64("segmentID", loadInfo.GetSegmentID()),
zap.Error(err),
)
return nil, err
}
newSegments.Insert(loadInfo.GetSegmentID(), segment)
}
loadSegmentFunc := func(idx int) error {
loadInfo := infos[idx]
partitionID := loadInfo.PartitionID
segmentID := loadInfo.SegmentID
segment, _ := newSegments.Get(segmentID)
metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Inc()
defer metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Dec()
tr := timerecord.NewTimeRecorder("loadDurationPerSegment")
var err error
if loadInfo.GetLevel() == datapb.SegmentLevel_L0 {
err = loader.LoadDelta(ctx, collectionID, segment)
} else {
err = loader.LoadSegment(ctx, segment.(*LocalSegment), loadInfo)
}
if err != nil {
log.Warn("load segment failed when load data into memory",
zap.Int64("partitionID", partitionID),
zap.Int64("segmentID", segmentID),
zap.Error(err),
)
return err
}
loader.manager.Segment.Put(ctx, segmentType, segment)
newSegments.GetAndRemove(segmentID)
loaded.Insert(segmentID, segment)
log.Info("load segment done", zap.Int64("segmentID", segmentID))
loader.notifyLoadFinish(loadInfo)
metrics.QueryNodeLoadSegmentLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds()))
return nil
}
// Start to load,
// Make sure we can always benefit from concurrency, and not spawn too many idle goroutines
log.Info("start to load segments in parallel",
zap.Int("segmentNum", len(infos)),
zap.Int("concurrencyLevel", requestResourceResult.ConcurrencyLevel))
err = funcutil.ProcessFuncParallel(len(infos),
requestResourceResult.ConcurrencyLevel, loadSegmentFunc, "loadSegmentFunc")
if err != nil {
log.Warn("failed to load some segments", zap.Error(err))
return nil, err
}
// Wait for all segments loaded
segmentIDs := lo.Map(segments, func(info *querypb.SegmentLoadInfo, _ int) int64 { return info.GetSegmentID() })
if err := loader.waitSegmentLoadDone(ctx, segmentType, segmentIDs, version); err != nil {
log.Warn("failed to wait the filtered out segments load done", zap.Error(err))
return nil, err
}
log.Info("all segment load done")
var result []Segment
loaded.Range(func(_ int64, s Segment) bool {
result = append(result, s)
return true
})
return result, nil
}
func (loader *segmentLoaderV2) LoadBloomFilterSet(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) ([]*pkoracle.BloomFilterSet, error) {
log := log.Ctx(ctx).With(
zap.Int64("collectionID", collectionID),
zap.Int64s("segmentIDs", lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) int64 {
return info.GetSegmentID()
})),
)
segmentNum := len(infos)
if segmentNum == 0 {
log.Info("no segment to load")
return nil, nil
}
collection := loader.manager.Collection.Get(collectionID)
if collection == nil {
err := merr.WrapErrCollectionNotFound(collectionID)
log.Warn("failed to get collection while loading segment", zap.Error(err))
return nil, err
}
log.Info("start loading remote...", zap.Int("segmentNum", segmentNum))
loadedBfs := typeutil.NewConcurrentSet[*pkoracle.BloomFilterSet]()
// TODO check memory for bf size
loadRemoteFunc := func(idx int) error {
loadInfo := infos[idx]
partitionID := loadInfo.PartitionID
segmentID := loadInfo.SegmentID
bfs := pkoracle.NewBloomFilterSet(segmentID, partitionID, commonpb.SegmentState_Sealed)
log.Info("loading bloom filter for remote...")
err := loader.loadBloomFilter(ctx, segmentID, bfs, loadInfo.StorageVersion)
if err != nil {
log.Warn("load remote segment bloom filter failed",
zap.Int64("partitionID", partitionID),
zap.Int64("segmentID", segmentID),
zap.Error(err),
)
return err
}
loadedBfs.Insert(bfs)
return nil
}
err := funcutil.ProcessFuncParallel(segmentNum, segmentNum, loadRemoteFunc, "loadRemoteFunc")
if err != nil {
// no partial success here
log.Warn("failed to load remote segment", zap.Error(err))
return nil, err
}
return loadedBfs.Collect(), nil
}
func (loader *segmentLoaderV2) loadBloomFilter(ctx context.Context, segmentID int64, bfs *pkoracle.BloomFilterSet,
storeVersion int64,
) error {
log := log.Ctx(ctx).With(
zap.Int64("segmentID", segmentID),
)
startTs := time.Now()
url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), segmentID)
if err != nil {
return err
}
space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(storeVersion).Build())
if err != nil {
return err
}
statsBlobs := space.StatisticsBlobs()
blobs := []*storage.Blob{}
for _, statsBlob := range statsBlobs {
blob := make([]byte, statsBlob.Size)
_, err := space.ReadBlob(statsBlob.Name, blob)
if err != nil && err != io.EOF {
return err
}
blobs = append(blobs, &storage.Blob{Value: blob})
}
var stats []*storage.PrimaryKeyStats
stats, err = storage.DeserializeStats(blobs)
if err != nil {
log.Warn("failed to deserialize stats", zap.Error(err))
return err
}
var size uint
for _, stat := range stats {
pkStat := &storage.PkStatistics{
PkFilter: stat.BF,
MinPK: stat.MinPk,
MaxPK: stat.MaxPk,
}
size += stat.BF.Cap()
bfs.AddHistoricalStats(pkStat)
}
log.Info("Successfully load pk stats", zap.Duration("time", time.Since(startTs)), zap.Uint("size", size), zap.Int("BFNum", len(stats)))
return nil
}
func (loader *segmentLoaderV2) LoadSegment(ctx context.Context,
seg Segment,
loadInfo *querypb.SegmentLoadInfo,
) (err error) {
segment := seg.(*LocalSegment)
// TODO: we should create a transaction-like api to load segment for segment interface,
// but not do many things in segment loader.
stateLockGuard, err := segment.StartLoadData()
// segment can not do load now.
if err != nil {
return err
}
defer func() {
// segment is already loaded.
// TODO: if stateLockGuard is nil, we should not call LoadSegment anymore.
// but current Load is not clear enough to do an actual state transition, keep previous logic to avoid introduced bug.
if stateLockGuard != nil {
stateLockGuard.Done(err)
}
}()
log := log.Ctx(ctx).With(
zap.Int64("collectionID", segment.Collection()),
zap.Int64("partitionID", segment.Partition()),
zap.String("shard", segment.Shard().VirtualName()),
zap.Int64("segmentID", segment.ID()),
)
log.Info("start loading segment files",
zap.Int64("rowNum", loadInfo.GetNumOfRows()),
zap.String("segmentType", segment.Type().String()))
collection := loader.manager.Collection.Get(segment.Collection())
if collection == nil {
err := merr.WrapErrCollectionNotFound(segment.Collection())
log.Warn("failed to get collection while loading segment", zap.Error(err))
return err
}
// pkField := GetPkField(collection.Schema())
// TODO(xige-16): Optimize the data loading process and reduce data copying
// for now, there will be multiple copies in the process of data loading into segCore
defer debug.FreeOSMemory()
if segment.Type() == SegmentTypeSealed {
fieldsMap := typeutil.NewConcurrentMap[int64, *schemapb.FieldSchema]()
for _, field := range collection.Schema().GetFields() {
fieldsMap.Insert(field.FieldID, field)
}
// fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo)
indexedFieldInfos := make(map[int64]*IndexedFieldInfo)
for _, indexInfo := range loadInfo.IndexInfos {
if indexInfo.GetIndexStoreVersion() > 0 {
fieldID := indexInfo.FieldID
fieldInfo := &IndexedFieldInfo{
IndexInfo: indexInfo,
}
indexedFieldInfos[fieldID] = fieldInfo
fieldsMap.Remove(fieldID)
// fieldID2IndexInfo[fieldID] = indexInfo
}
}
if err := segment.AddFieldDataInfo(ctx, loadInfo.GetNumOfRows(), loadInfo.GetBinlogPaths()); err != nil {
return err
}
log.Info("load fields...",
zap.Int("fieldNum", fieldsMap.Len()),
zap.Int64s("indexedFields", lo.Keys(indexedFieldInfos)),
)
schemaHelper, err := typeutil.CreateSchemaHelper(collection.Schema())
if err != nil {
return err
}
tr := timerecord.NewTimeRecorder("segmentLoader.LoadIndex")
if err := loader.loadFieldsIndex(ctx, schemaHelper, segment, loadInfo.GetNumOfRows(), indexedFieldInfos); err != nil {
return err
}
metrics.QueryNodeLoadIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds()))
if err := loader.loadSealedSegmentFields(ctx, segment, fieldsMap, loadInfo.GetNumOfRows()); err != nil {
return err
}
// https://github.com/milvus-io/milvus/23654
// legacy entry num = 0
if err := loader.patchEntryNumber(ctx, segment, loadInfo); err != nil {
return err
}
} else {
if err := segment.LoadMultiFieldData(ctx); err != nil {
return err
}
}
// load statslog if it's growing segment
if segment.segmentType == SegmentTypeGrowing {
log.Info("loading statslog...")
// pkStatsBinlogs, logType := loader.filterPKStatsBinlogs(loadInfo.Statslogs, pkField.GetFieldID())
err := loader.loadBloomFilter(ctx, segment.ID(), segment.bloomFilterSet, loadInfo.StorageVersion)
if err != nil {
return err
}
}
log.Info("loading delta...")
return loader.LoadDelta(ctx, segment.Collection(), segment)
}
func (loader *segmentLoaderV2) LoadLazySegment(ctx context.Context,
segment Segment,
loadInfo *querypb.SegmentLoadInfo,
) (err error) {
return merr.ErrOperationNotSupported
}
func (loader *segmentLoaderV2) loadSealedSegmentFields(ctx context.Context, segment *LocalSegment, fields *typeutil.ConcurrentMap[int64, *schemapb.FieldSchema], rowCount int64) error {
runningGroup, _ := errgroup.WithContext(ctx)
fields.Range(func(fieldID int64, field *schemapb.FieldSchema) bool {
runningGroup.Go(func() error {
return segment.LoadFieldData(ctx, fieldID, rowCount, nil, false)
})
return true
})
err := runningGroup.Wait()
if err != nil {
return err
}
log.Ctx(ctx).Info("load field binlogs done for sealed segment",
zap.Int64("collection", segment.Collection()),
zap.Int64("segment", segment.ID()),
zap.String("segmentType", segment.Type().String()))
return nil
}
func NewLoader(
manager *Manager,
cm storage.ChunkManager,

View File

@ -23,9 +23,6 @@ import (
"testing"
"time"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
@ -33,14 +30,10 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
"github.com/milvus-io/milvus-storage/go/storage/options"
"github.com/milvus-io/milvus-storage/go/storage/schema"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/initcore"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/contextutil"
"github.com/milvus-io/milvus/pkg/util/funcutil"
@ -911,152 +904,3 @@ func TestSegmentLoader(t *testing.T) {
suite.Run(t, &SegmentLoaderSuite{})
suite.Run(t, &SegmentLoaderDetailSuite{})
}
type SegmentLoaderV2Suite struct {
suite.Suite
loader *segmentLoaderV2
// Dependencies
manager *Manager
rootPath string
chunkManager storage.ChunkManager
// Data
collectionID int64
partitionID int64
segmentID int64
schema *schemapb.CollectionSchema
segmentNum int
}
func (suite *SegmentLoaderV2Suite) SetupSuite() {
paramtable.Init()
suite.rootPath = suite.T().Name()
suite.collectionID = rand.Int63()
suite.partitionID = rand.Int63()
suite.segmentID = rand.Int63()
suite.segmentNum = 5
}
func (suite *SegmentLoaderV2Suite) SetupTest() {
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true")
// Dependencies
suite.manager = NewManager()
ctx := context.Background()
// TODO:: cpp chunk manager not support local chunk manager
// suite.chunkManager = storage.NewLocalChunkManager(storage.RootPath(
// fmt.Sprintf("/tmp/milvus-ut/%d", rand.Int63())))
chunkManagerFactory := storage.NewTestChunkManagerFactory(paramtable.Get(), suite.rootPath)
suite.chunkManager, _ = chunkManagerFactory.NewPersistentStorageChunkManager(ctx)
suite.loader = NewLoaderV2(suite.manager, suite.chunkManager)
initcore.InitRemoteChunkManager(paramtable.Get())
// Data
suite.schema = GenTestCollectionSchema("test", schemapb.DataType_Int64, false)
indexMeta := GenTestIndexMeta(suite.collectionID, suite.schema)
loadMeta := &querypb.LoadMetaInfo{
LoadType: querypb.LoadType_LoadCollection,
CollectionID: suite.collectionID,
PartitionIDs: []int64{suite.partitionID},
}
suite.manager.Collection.PutOrRef(suite.collectionID, suite.schema, indexMeta, loadMeta)
}
func (suite *SegmentLoaderV2Suite) TearDownTest() {
ctx := context.Background()
for i := 0; i < suite.segmentNum; i++ {
suite.manager.Segment.Remove(context.Background(), suite.segmentID+int64(i), querypb.DataScope_All)
}
suite.chunkManager.RemoveWithPrefix(ctx, suite.rootPath)
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
}
func (suite *SegmentLoaderV2Suite) TestLoad() {
tmpDir := suite.T().TempDir()
paramtable.Get().CommonCfg.StorageScheme.SwapTempValue("file")
paramtable.Get().CommonCfg.StoragePathPrefix.SwapTempValue(tmpDir)
ctx := context.Background()
msgLength := 4
arrowSchema, err := typeutil.ConvertToArrowSchema(suite.schema.Fields)
suite.NoError(err)
opt := options.NewSpaceOptionBuilder().
SetSchema(schema.NewSchema(
arrowSchema,
&schema.SchemaOptions{
PrimaryColumn: "int64Field",
VectorColumn: "floatVectorField",
VersionColumn: "Timestamp",
})).
Build()
uri, err := typeutil.GetStorageURI("file", tmpDir, suite.segmentID)
suite.NoError(err)
space, err := milvus_storage.Open(uri, opt)
suite.NoError(err)
b := array.NewRecordBuilder(memory.DefaultAllocator, arrowSchema)
defer b.Release()
insertData, err := genInsertData(msgLength, suite.schema)
suite.NoError(err)
err = typeutil.BuildRecord(b, insertData, suite.schema.Fields)
suite.NoError(err)
rec := b.NewRecord()
defer rec.Release()
reader, err := array.NewRecordReader(arrowSchema, []arrow.Record{rec})
suite.NoError(err)
err = space.Write(reader, &options.DefaultWriteOptions)
suite.NoError(err)
collMeta := genCollectionMeta(suite.collectionID, suite.partitionID, suite.schema)
inCodec := storage.NewInsertCodecWithSchema(collMeta)
statsLog, err := inCodec.SerializePkStatsByData(insertData)
suite.NoError(err)
err = space.WriteBlob(statsLog.Value, statsLog.Key, false)
suite.NoError(err)
dschema := space.Manifest().GetSchema().DeleteSchema()
dbuilder := array.NewRecordBuilder(memory.DefaultAllocator, dschema)
defer dbuilder.Release()
dbuilder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2}, nil)
dbuilder.Field(1).(*array.Int64Builder).AppendValues([]int64{100, 200}, nil)
drec := dbuilder.NewRecord()
defer drec.Release()
dreader, err := array.NewRecordReader(dschema, []arrow.Record{drec})
suite.NoError(err)
err = space.Delete(dreader)
suite.NoError(err)
segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, &querypb.SegmentLoadInfo{
SegmentID: suite.segmentID,
PartitionID: suite.partitionID,
CollectionID: suite.collectionID,
NumOfRows: int64(msgLength),
StorageVersion: 3,
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
})
suite.NoError(err)
_, err = suite.loader.LoadBloomFilterSet(ctx, suite.collectionID, 0, &querypb.SegmentLoadInfo{
SegmentID: suite.segmentID,
PartitionID: suite.partitionID,
CollectionID: suite.collectionID,
NumOfRows: int64(msgLength),
StorageVersion: 3,
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
})
suite.NoError(err)
segment := segments[0]
suite.EqualValues(4, segment.InsertCount())
suite.Equal(int64(msgLength-2), segment.RowNum())
}
func TestSegmentLoaderV2(t *testing.T) {
suite.Run(t, &SegmentLoaderV2Suite{})
}

View File

@ -348,11 +348,7 @@ func (node *QueryNode) Init() error {
node.subscribingChannels = typeutil.NewConcurrentSet[string]()
node.unsubscribingChannels = typeutil.NewConcurrentSet[string]()
node.manager = segments.NewManager()
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
node.loader = segments.NewLoaderV2(node.manager, node.chunkManager)
} else {
node.loader = segments.NewLoader(node.manager, node.chunkManager)
}
node.loader = segments.NewLoader(node.manager, node.chunkManager)
node.manager.SetLoader(node.loader)
node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, node.GetNodeID())
// init pipeline manager

View File

@ -25,7 +25,7 @@ import (
"github.com/stretchr/testify/assert"
"go.uber.org/zap"
"github.com/milvus-io/milvus-storage/go/common/log"
"github.com/milvus-io/milvus/pkg/log"
)
func TestPerformance(t *testing.T) {

View File

@ -24,9 +24,9 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus-storage/go/common/log"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
)

View File

@ -41,7 +41,6 @@ type CodecIndex interface {
Delete() error
CleanLocalData() error
UpLoad() (map[string]int64, error)
UpLoadV2() (int64, error)
}
var _ CodecIndex = (*CgoIndex)(nil)
@ -127,35 +126,6 @@ func CreateIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo)
return index, nil
}
func CreateIndexV2(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) {
buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo)
if err != nil {
log.Ctx(ctx).Warn("marshal buildIndexInfo failed",
zap.String("clusterID", buildIndexInfo.GetClusterID()),
zap.Int64("buildID", buildIndexInfo.GetBuildID()),
zap.Error(err))
return nil, err
}
var indexPtr C.CIndex
status := C.CreateIndexV2(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob)))
if err := HandleCStatus(&status, "failed to create index"); err != nil {
return nil, err
}
index := &CgoIndex{
indexPtr: indexPtr,
close: false,
}
runtime.SetFinalizer(index, func(index *CgoIndex) {
if index != nil && !index.close {
log.Error("there is leakage in index object, please check.")
}
})
return index, nil
}
// TODO: this seems to be used only for test. We should mark the method
// name with ForTest, or maybe move to test file.
func (index *CgoIndex) Build(dataset *Dataset) error {
@ -426,34 +396,3 @@ func (index *CgoIndex) UpLoad() (map[string]int64, error) {
return res, nil
}
func (index *CgoIndex) UpLoadV2() (int64, error) {
var cBinarySet C.CBinarySet
status := C.SerializeIndexAndUpLoadV2(index.indexPtr, &cBinarySet)
defer func() {
if cBinarySet != nil {
C.DeleteBinarySet(cBinarySet)
}
}()
if err := HandleCStatus(&status, "failed to serialize index and upload index"); err != nil {
return -1, err
}
buffer, err := GetBinarySetValue(cBinarySet, "index_store_version")
if err != nil {
return -1, err
}
var version int64
version = int64(buffer[7])
version = (version << 8) + int64(buffer[6])
version = (version << 8) + int64(buffer[5])
version = (version << 8) + int64(buffer[4])
version = (version << 8) + int64(buffer[3])
version = (version << 8) + int64(buffer[2])
version = (version << 8) + int64(buffer[1])
version = (version << 8) + int64(buffer[0])
return version, nil
}