mirror of https://github.com/milvus-io/milvus.git
enhance: remove unused code for StorageV2 (#35132)
issue: https://github.com/milvus-io/milvus/issues/34168 Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>pull/35207/head
parent
9412002d7d
commit
aa247f192d
10
Makefile
10
Makefile
|
@ -335,6 +335,16 @@ test-querycoord:
|
|||
@echo "Running go unittests..."
|
||||
@(env bash $(PWD)/scripts/run_go_unittest.sh -t querycoord)
|
||||
|
||||
generate-mockery-flushcommon: getdeps
|
||||
$(INSTALL_PATH)/mockery --name=MetaCache --dir=$(PWD)/internal/flushcommon/metacache --output=$(PWD)/internal/flushcommon/metacache --filename=mock_meta_cache.go --with-expecter --structname=MockMetaCache --outpkg=metacache --inpackage
|
||||
$(INSTALL_PATH)/mockery --name=SyncManager --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_sync_manager.go --with-expecter --structname=MockSyncManager --outpkg=syncmgr --inpackage
|
||||
$(INSTALL_PATH)/mockery --name=MetaWriter --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_meta_writer.go --with-expecter --structname=MockMetaWriter --outpkg=syncmgr --inpackage
|
||||
$(INSTALL_PATH)/mockery --name=Serializer --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_serializer.go --with-expecter --structname=MockSerializer --outpkg=syncmgr --inpackage
|
||||
$(INSTALL_PATH)/mockery --name=Task --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_task.go --with-expecter --structname=MockTask --outpkg=syncmgr --inpackage
|
||||
$(INSTALL_PATH)/mockery --name=WriteBuffer --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_write_buffer.go --with-expecter --structname=MockWriteBuffer --outpkg=writebuffer --inpackage
|
||||
$(INSTALL_PATH)/mockery --name=BufferManager --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_manager.go --with-expecter --structname=MockBufferManager --outpkg=writebuffer --inpackage
|
||||
$(INSTALL_PATH)/mockery --name=FlowgraphManager --dir=$(PWD)/internal/flushcommon/pipeline --output=$(PWD)/internal/flushcommon/pipeline --filename=mock_fgmanager.go --with-expecter --structname=MockFlowgraphManager --outpkg=pipeline --inpackage
|
||||
|
||||
test-metastore:
|
||||
@echo "Running go unittests..."
|
||||
@(env bash $(PWD)/scripts/run_go_unittest.sh -t metastore)
|
||||
|
|
2
go.mod
2
go.mod
|
@ -55,8 +55,6 @@ require (
|
|||
google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f
|
||||
)
|
||||
|
||||
require github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70
|
||||
|
||||
require (
|
||||
github.com/bits-and-blooms/bitset v1.10.0
|
||||
github.com/cenkalti/backoff/v4 v4.2.1
|
||||
|
|
2
go.sum
2
go.sum
|
@ -608,8 +608,6 @@ github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZz
|
|||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632 h1:CXig0DNtUsCLzchCFe3PR2KgOdobbz9gK2nSV7195PM=
|
||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
|
||||
github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70 h1:Z+sp64fmAOxAG7mU0dfVOXvAXlwRB0c8a96rIM5HevI=
|
||||
github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70/go.mod h1:GPETMcTZq1gLY1WA6Na5kiNAKnq8SEMMiVKUZrM3sho=
|
||||
github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A=
|
||||
github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w=
|
||||
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include <vector>
|
||||
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/space.h"
|
||||
#include "pb/clustering.pb.h"
|
||||
#include "knowhere/cluster/cluster_factory.h"
|
||||
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "storage/ChunkManager.h"
|
||||
#include "storage/DataCodec.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::clustering {
|
||||
|
||||
|
|
|
@ -30,4 +30,4 @@ set(MILVUS_EXEC_SRCS
|
|||
|
||||
add_library(milvus_exec STATIC ${MILVUS_EXEC_SRCS})
|
||||
|
||||
target_link_libraries(milvus_exec milvus_common milvus-storage ${CONAN_LIBS})
|
||||
target_link_libraries(milvus_exec milvus_common ${CONAN_LIBS})
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "index/ScalarIndex.h"
|
||||
#include "index/Utils.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace index {
|
||||
|
@ -42,20 +41,6 @@ BitmapIndex<T>::BitmapIndex(
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BitmapIndex<T>::BitmapIndex(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: is_built_(false),
|
||||
schema_(file_manager_context.fieldDataMeta.field_schema),
|
||||
space_(space) {
|
||||
if (file_manager_context.Valid()) {
|
||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
||||
file_manager_context, space);
|
||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::Build(const Config& config) {
|
||||
|
@ -101,32 +86,6 @@ BitmapIndex<T>::Build(size_t n, const T* data) {
|
|||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::BuildV2(const Config& config) {
|
||||
if (is_built_) {
|
||||
return;
|
||||
}
|
||||
auto field_name = file_manager_->GetIndexMeta().field_name;
|
||||
auto reader = space_->ScanData();
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read data");
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
auto total_num_rows = data->num_rows();
|
||||
auto col_data = data->GetColumnByName(field_name);
|
||||
// todo: support nullable index
|
||||
auto field_data = storage::CreateFieldData(
|
||||
DataType(GetDType<T>()), false, 0, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_datas.push_back(field_data);
|
||||
}
|
||||
|
||||
BuildWithFieldData(field_datas);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::BuildPrimitiveField(
|
||||
|
@ -302,21 +261,6 @@ BitmapIndex<T>::Upload(const Config& config) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
BitmapIndex<T>::UploadV2(const Config& config) {
|
||||
auto binary_set = Serialize(config);
|
||||
|
||||
file_manager_->AddFileV2(binary_set);
|
||||
|
||||
auto remote_path_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet ret;
|
||||
for (auto& file : remote_path_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
|
||||
|
@ -420,48 +364,6 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
|||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::LoadV2(const Config& config) {
|
||||
auto blobs = space_->StatisticsBlobs();
|
||||
std::vector<std::string> index_files;
|
||||
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
||||
for (auto& b : blobs) {
|
||||
if (b.name.rfind(prefix, 0) == 0) {
|
||||
index_files.push_back(b.name);
|
||||
}
|
||||
}
|
||||
std::map<std::string, FieldDataPtr> index_datas{};
|
||||
for (auto& file_name : index_files) {
|
||||
auto res = space_->GetBlobByteSize(file_name);
|
||||
if (!res.ok()) {
|
||||
PanicInfo(S3Error, "unable to read index blob");
|
||||
}
|
||||
auto index_blob_data =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(S3Error, "unable to read index blob");
|
||||
}
|
||||
auto raw_index_blob =
|
||||
storage::DeserializeFileData(index_blob_data, res.value());
|
||||
auto key = file_name.substr(file_name.find_last_of('/') + 1);
|
||||
index_datas[key] = raw_index_blob->GetFieldData();
|
||||
}
|
||||
AssembleIndexDatas(index_datas);
|
||||
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
binary_set.Append(key, buf, size);
|
||||
}
|
||||
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::Load(milvus::tracer::TraceContext ctx, const Config& config) {
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "storage/FileManager.h"
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace index {
|
||||
|
@ -46,10 +45,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
explicit BitmapIndex(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
~BitmapIndex() override = default;
|
||||
|
||||
BinarySet
|
||||
|
@ -61,9 +56,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
LoadV2(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return total_num_rows_;
|
||||
|
@ -83,9 +75,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
void
|
||||
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
||||
|
||||
void
|
||||
BuildV2(const Config& config = {}) override;
|
||||
|
||||
const TargetBitmap
|
||||
In(size_t n, const T* values) override;
|
||||
|
||||
|
@ -112,9 +101,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
BinarySet
|
||||
UploadV2(const Config& config = {}) override;
|
||||
|
||||
const bool
|
||||
HasRawData() const override {
|
||||
if (schema_.data_type() == proto::schema::DataType::Array) {
|
||||
|
@ -195,7 +181,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
size_t total_num_rows_{0};
|
||||
proto::schema::FieldSchema schema_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
} // namespace index
|
||||
|
|
|
@ -26,6 +26,6 @@ set(INDEX_FILES
|
|||
milvus_add_pkg_config("milvus_index")
|
||||
add_library(milvus_index SHARED ${INDEX_FILES})
|
||||
|
||||
target_link_libraries(milvus_index milvus_storage milvus-storage tantivy_binding)
|
||||
target_link_libraries(milvus_index milvus_storage tantivy_binding)
|
||||
|
||||
install(TARGETS milvus_index DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "index/ScalarIndex.h"
|
||||
#include "index/Utils.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace index {
|
||||
|
@ -43,23 +42,6 @@ HybridScalarIndex<T>::HybridScalarIndex(
|
|||
internal_index_type_ = ScalarIndexType::NONE;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
HybridScalarIndex<T>::HybridScalarIndex(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: is_built_(false),
|
||||
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
|
||||
file_manager_context_(file_manager_context),
|
||||
space_(space) {
|
||||
if (file_manager_context.Valid()) {
|
||||
mem_file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
||||
file_manager_context, space);
|
||||
AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!");
|
||||
}
|
||||
field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type();
|
||||
internal_index_type_ = ScalarIndexType::NONE;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ScalarIndexType
|
||||
HybridScalarIndex<T>::SelectIndexBuildType(size_t n, const T* values) {
|
||||
|
@ -274,39 +256,6 @@ HybridScalarIndex<T>::Build(const Config& config) {
|
|||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
HybridScalarIndex<T>::BuildV2(const Config& config) {
|
||||
if (is_built_) {
|
||||
return;
|
||||
}
|
||||
bitmap_index_cardinality_limit_ =
|
||||
GetBitmapCardinalityLimitFromConfig(config);
|
||||
LOG_INFO("config bitmap cardinality limit to {}",
|
||||
bitmap_index_cardinality_limit_);
|
||||
|
||||
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
|
||||
auto reader = space_->ScanData();
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read data");
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
auto total_num_rows = data->num_rows();
|
||||
auto col_data = data->GetColumnByName(field_name);
|
||||
// todo: support nullable index
|
||||
auto field_data = storage::CreateFieldData(
|
||||
DataType(GetDType<T>()), false, 0, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_datas.push_back(field_data);
|
||||
}
|
||||
|
||||
SelectIndexBuildType(field_datas);
|
||||
BuildInternal(field_datas);
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
HybridScalarIndex<T>::Serialize(const Config& config) {
|
||||
|
@ -356,21 +305,6 @@ HybridScalarIndex<T>::Upload(const Config& config) {
|
|||
return index_ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
HybridScalarIndex<T>::UploadV2(const Config& config) {
|
||||
auto internal_index = GetInternalIndex();
|
||||
auto index_ret = internal_index->Upload(config);
|
||||
|
||||
auto index_type_ret = SerializeIndexType();
|
||||
|
||||
for (auto& [key, value] : index_type_ret.binary_map_) {
|
||||
index_ret.Append(key, value);
|
||||
}
|
||||
|
||||
return index_ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
|
||||
|
@ -380,12 +314,6 @@ HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
|
|||
internal_index_type_ = static_cast<ScalarIndexType>(index_type);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
HybridScalarIndex<T>::LoadV2(const Config& config) {
|
||||
PanicInfo(Unsupported, "HybridScalarIndex LoadV2 not implemented");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string
|
||||
HybridScalarIndex<T>::GetRemoteIndexTypeFile(
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include "storage/FileManager.h"
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace index {
|
||||
|
@ -46,10 +45,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
explicit HybridScalarIndex(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
~HybridScalarIndex() override = default;
|
||||
|
||||
BinarySet
|
||||
|
@ -61,9 +56,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
LoadV2(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return internal_index_->Count();
|
||||
|
@ -85,9 +77,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildV2(const Config& config = {}) override;
|
||||
|
||||
const TargetBitmap
|
||||
In(size_t n, const T* values) override {
|
||||
return internal_index_->In(n, values);
|
||||
|
@ -133,9 +122,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
BinarySet
|
||||
UploadV2(const Config& config = {}) override;
|
||||
|
||||
private:
|
||||
ScalarIndexType
|
||||
SelectBuildTypeForPrimitiveType(
|
||||
|
@ -173,7 +159,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
std::shared_ptr<ScalarIndex<T>> internal_index_{nullptr};
|
||||
storage::FileManagerContext file_manager_context_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> mem_file_manager_{nullptr};
|
||||
std::shared_ptr<milvus_storage::Space> space_{nullptr};
|
||||
};
|
||||
|
||||
} // namespace index
|
||||
|
|
|
@ -44,9 +44,6 @@ class IndexBase {
|
|||
virtual void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) = 0;
|
||||
|
||||
virtual void
|
||||
LoadV2(const Config& config = {}) = 0;
|
||||
|
||||
virtual void
|
||||
BuildWithRawData(size_t n,
|
||||
const void* values,
|
||||
|
@ -58,18 +55,12 @@ class IndexBase {
|
|||
virtual void
|
||||
Build(const Config& config = {}) = 0;
|
||||
|
||||
virtual void
|
||||
BuildV2(const Config& Config = {}) = 0;
|
||||
|
||||
virtual int64_t
|
||||
Count() = 0;
|
||||
|
||||
virtual BinarySet
|
||||
Upload(const Config& config = {}) = 0;
|
||||
|
||||
virtual BinarySet
|
||||
UploadV2(const Config& config = {}) = 0;
|
||||
|
||||
virtual const bool
|
||||
HasRawData() const = 0;
|
||||
|
||||
|
|
|
@ -78,51 +78,6 @@ IndexFactory::CreatePrimitiveScalarIndex<std::string>(
|
|||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ScalarIndexPtr<T>
|
||||
IndexFactory::CreatePrimitiveScalarIndex(
|
||||
const IndexType& index_type,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
if (index_type == INVERTED_INDEX_TYPE) {
|
||||
return std::make_unique<InvertedIndexTantivy<T>>(file_manager_context,
|
||||
space);
|
||||
}
|
||||
if (index_type == BITMAP_INDEX_TYPE) {
|
||||
return std::make_unique<BitmapIndex<T>>(file_manager_context, space);
|
||||
}
|
||||
if (index_type == HYBRID_INDEX_TYPE) {
|
||||
return std::make_unique<HybridScalarIndex<T>>(file_manager_context,
|
||||
space);
|
||||
}
|
||||
return CreateScalarIndexSort<T>(file_manager_context, space);
|
||||
}
|
||||
|
||||
template <>
|
||||
ScalarIndexPtr<std::string>
|
||||
IndexFactory::CreatePrimitiveScalarIndex<std::string>(
|
||||
const IndexType& index_type,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
if (index_type == INVERTED_INDEX_TYPE) {
|
||||
return std::make_unique<InvertedIndexTantivy<std::string>>(
|
||||
file_manager_context, space);
|
||||
}
|
||||
if (index_type == BITMAP_INDEX_TYPE) {
|
||||
return std::make_unique<BitmapIndex<std::string>>(file_manager_context,
|
||||
space);
|
||||
}
|
||||
if (index_type == HYBRID_INDEX_TYPE) {
|
||||
return std::make_unique<HybridScalarIndex<std::string>>(
|
||||
file_manager_context, space);
|
||||
}
|
||||
return CreateStringIndexMarisa(file_manager_context, space);
|
||||
#else
|
||||
PanicInfo(Unsupported, "unsupported platform");
|
||||
#endif
|
||||
}
|
||||
|
||||
IndexBasePtr
|
||||
IndexFactory::CreateIndex(
|
||||
const CreateIndexInfo& create_index_info,
|
||||
|
@ -134,19 +89,6 @@ IndexFactory::CreateIndex(
|
|||
return CreateScalarIndex(create_index_info, file_manager_context);
|
||||
}
|
||||
|
||||
IndexBasePtr
|
||||
IndexFactory::CreateIndex(
|
||||
const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
if (IsVectorDataType(create_index_info.field_type)) {
|
||||
return CreateVectorIndex(
|
||||
create_index_info, file_manager_context, space);
|
||||
}
|
||||
|
||||
return CreateScalarIndex(create_index_info, file_manager_context, space);
|
||||
}
|
||||
|
||||
IndexBasePtr
|
||||
IndexFactory::CreatePrimitiveScalarIndex(
|
||||
DataType data_type,
|
||||
|
@ -307,90 +249,4 @@ IndexFactory::CreateVectorIndex(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
IndexBasePtr
|
||||
IndexFactory::CreateVectorIndex(
|
||||
const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
auto data_type = create_index_info.field_type;
|
||||
auto index_type = create_index_info.index_type;
|
||||
auto metric_type = create_index_info.metric_type;
|
||||
auto version = create_index_info.index_engine_version;
|
||||
|
||||
if (knowhere::UseDiskLoad(index_type, version)) {
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT: {
|
||||
return std::make_unique<VectorDiskAnnIndex<float>>(
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
case DataType::VECTOR_FLOAT16: {
|
||||
return std::make_unique<VectorDiskAnnIndex<float16>>(
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
case DataType::VECTOR_BFLOAT16: {
|
||||
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
case DataType::VECTOR_BINARY: {
|
||||
return std::make_unique<VectorDiskAnnIndex<bin1>>(
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
||||
return std::make_unique<VectorDiskAnnIndex<float>>(
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
DataTypeInvalid,
|
||||
fmt::format("invalid data type to build disk index: {}",
|
||||
data_type));
|
||||
}
|
||||
} else { // create mem index
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
||||
return std::make_unique<VectorMemIndex<float>>(
|
||||
create_index_info, file_manager_context, space);
|
||||
}
|
||||
case DataType::VECTOR_BINARY: {
|
||||
return std::make_unique<VectorMemIndex<bin1>>(
|
||||
create_index_info, file_manager_context, space);
|
||||
}
|
||||
case DataType::VECTOR_FLOAT16: {
|
||||
return std::make_unique<VectorMemIndex<float16>>(
|
||||
create_index_info, file_manager_context, space);
|
||||
}
|
||||
case DataType::VECTOR_BFLOAT16: {
|
||||
return std::make_unique<VectorMemIndex<bfloat16>>(
|
||||
create_index_info, file_manager_context, space);
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
DataTypeInvalid,
|
||||
fmt::format("invalid data type to build mem index: {}",
|
||||
data_type));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include "index/ScalarIndexSort.h"
|
||||
#include "index/StringIndexMarisa.h"
|
||||
#include "index/BoolIndex.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
|
@ -56,11 +55,6 @@ class IndexFactory {
|
|||
CreateIndex(const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context);
|
||||
|
||||
IndexBasePtr
|
||||
CreateIndex(const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
IndexBasePtr
|
||||
CreateVectorIndex(const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context);
|
||||
|
@ -92,19 +86,6 @@ class IndexFactory {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
IndexBasePtr
|
||||
CreateVectorIndex(const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
IndexBasePtr
|
||||
CreateScalarIndex(const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"CreateScalarIndexV2 not implemented");
|
||||
}
|
||||
|
||||
// IndexBasePtr
|
||||
// CreateIndex(DataType dtype, const IndexType& index_type);
|
||||
private:
|
||||
|
@ -115,12 +96,6 @@ class IndexFactory {
|
|||
CreatePrimitiveScalarIndex(const IndexType& index_type,
|
||||
const storage::FileManagerContext& file_manager =
|
||||
storage::FileManagerContext());
|
||||
|
||||
template <typename T>
|
||||
ScalarIndexPtr<T>
|
||||
CreatePrimitiveScalarIndex(const IndexType& index_type,
|
||||
const storage::FileManagerContext& file_manager,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
};
|
||||
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -65,11 +65,10 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
|
|||
|
||||
template <typename T>
|
||||
InvertedIndexTantivy<T>::InvertedIndexTantivy(
|
||||
const storage::FileManagerContext& ctx,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: space_(space), schema_(ctx.fieldDataMeta.field_schema) {
|
||||
mem_file_manager_ = std::make_shared<MemFileManager>(ctx, ctx.space_);
|
||||
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx, ctx.space_);
|
||||
const storage::FileManagerContext& ctx)
|
||||
: schema_(ctx.fieldDataMeta.field_schema) {
|
||||
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
|
||||
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
|
||||
auto field =
|
||||
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
|
||||
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
|
||||
|
@ -139,12 +138,6 @@ InvertedIndexTantivy<T>::Upload(const Config& config) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
InvertedIndexTantivy<T>::UploadV2(const Config& config) {
|
||||
return Upload(config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
InvertedIndexTantivy<T>::Build(const Config& config) {
|
||||
|
@ -156,28 +149,6 @@ InvertedIndexTantivy<T>::Build(const Config& config) {
|
|||
BuildWithFieldData(field_datas);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
InvertedIndexTantivy<T>::BuildV2(const Config& config) {
|
||||
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
|
||||
auto reader = space_->ScanData();
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read data");
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
auto total_num_rows = data->num_rows();
|
||||
auto col_data = data->GetColumnByName(field_name);
|
||||
// todo: support nullable index
|
||||
auto field_data = storage::CreateFieldData(
|
||||
DataType(GetDType<T>()), false, 0, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_datas.push_back(field_data);
|
||||
}
|
||||
BuildWithFieldData(field_datas);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
||||
|
@ -201,14 +172,6 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
InvertedIndexTantivy<T>::LoadV2(const Config& config) {
|
||||
disk_file_manager_->CacheIndexToDisk();
|
||||
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
|
||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
|
||||
}
|
||||
|
||||
inline void
|
||||
apply_hits(TargetBitmap& bitset, const RustArrayWrapper& w, bool v) {
|
||||
for (size_t j = 0; j < w.array_.len; j++) {
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include "tantivy-binding.h"
|
||||
#include "tantivy-wrapper.h"
|
||||
#include "index/StringIndex.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
|
@ -34,13 +33,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
using DiskFileManagerPtr = std::shared_ptr<DiskFileManager>;
|
||||
|
||||
InvertedIndexTantivy() = default;
|
||||
|
||||
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx)
|
||||
: InvertedIndexTantivy(ctx, nullptr) {
|
||||
}
|
||||
|
||||
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx);
|
||||
|
||||
~InvertedIndexTantivy();
|
||||
|
||||
|
@ -56,9 +49,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
LoadV2(const Config& config = {}) override;
|
||||
|
||||
/*
|
||||
* deprecated.
|
||||
* TODO: why not remove this?
|
||||
|
@ -78,9 +68,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildV2(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return wrapper_->count();
|
||||
|
@ -102,9 +89,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
BinarySet
|
||||
UploadV2(const Config& config = {}) override;
|
||||
|
||||
/*
|
||||
* deprecated, only used in small chunk index.
|
||||
*/
|
||||
|
@ -196,6 +180,5 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
*/
|
||||
MemFileManagerPtr mem_file_manager_;
|
||||
DiskFileManagerPtr disk_file_manager_;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -44,73 +44,6 @@ ScalarIndexSort<T>::ScalarIndexSort(
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline ScalarIndexSort<T>::ScalarIndexSort(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: is_built_(false), data_(), space_(space) {
|
||||
if (file_manager_context.Valid()) {
|
||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
||||
file_manager_context, space);
|
||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::BuildV2(const Config& config) {
|
||||
if (is_built_) {
|
||||
return;
|
||||
}
|
||||
auto field_name = file_manager_->GetIndexMeta().field_name;
|
||||
auto reader = space_->ScanData();
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read data");
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
auto total_num_rows = data->num_rows();
|
||||
auto col_data = data->GetColumnByName(field_name);
|
||||
auto nullable =
|
||||
col_data->type()->id() == arrow::Type::NA ? true : false;
|
||||
// will support build scalar index when nullable in the future just skip it
|
||||
// now, not support to build index in nullable field_data
|
||||
// todo: support nullable index
|
||||
AssertInfo(!nullable,
|
||||
"not support to build index in nullable field_data");
|
||||
auto field_data = storage::CreateFieldData(
|
||||
DataType(GetDType<T>()), nullable, 0, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_datas.push_back(field_data);
|
||||
}
|
||||
int64_t total_num_rows = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
total_num_rows += data->get_num_rows();
|
||||
}
|
||||
if (total_num_rows == 0) {
|
||||
PanicInfo(DataIsEmpty, "ScalarIndexSort cannot build null values!");
|
||||
}
|
||||
|
||||
data_.reserve(total_num_rows);
|
||||
int64_t offset = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_num; ++i) {
|
||||
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_.emplace_back(IndexStructure(*value, offset));
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(data_.begin(), data_.end());
|
||||
idx_to_offsets_.resize(total_num_rows);
|
||||
for (size_t i = 0; i < total_num_rows; ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
}
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
ScalarIndexSort<T>::Build(const Config& config) {
|
||||
|
@ -215,21 +148,6 @@ ScalarIndexSort<T>::Upload(const Config& config) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
ScalarIndexSort<T>::UploadV2(const Config& config) {
|
||||
auto binary_set = Serialize(config);
|
||||
file_manager_->AddFileV2(binary_set);
|
||||
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet ret;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
|
||||
|
@ -277,47 +195,6 @@ ScalarIndexSort<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
ScalarIndexSort<T>::LoadV2(const Config& config) {
|
||||
auto blobs = space_->StatisticsBlobs();
|
||||
std::vector<std::string> index_files;
|
||||
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
||||
for (auto& b : blobs) {
|
||||
if (b.name.rfind(prefix, 0) == 0) {
|
||||
index_files.push_back(b.name);
|
||||
}
|
||||
}
|
||||
std::map<std::string, FieldDataPtr> index_datas{};
|
||||
for (auto& file_name : index_files) {
|
||||
auto res = space_->GetBlobByteSize(file_name);
|
||||
if (!res.ok()) {
|
||||
PanicInfo(S3Error, "unable to read index blob");
|
||||
}
|
||||
auto index_blob_data =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(S3Error, "unable to read index blob");
|
||||
}
|
||||
auto raw_index_blob =
|
||||
storage::DeserializeFileData(index_blob_data, res.value());
|
||||
auto key = file_name.substr(file_name.find_last_of('/') + 1);
|
||||
index_datas[key] = raw_index_blob->GetFieldData();
|
||||
}
|
||||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
binary_set.Append(key, buf, size);
|
||||
}
|
||||
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
ScalarIndexSort<T>::In(const size_t n, const T* values) {
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include "index/IndexStructure.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
|
@ -37,10 +36,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
explicit ScalarIndexSort(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config) override;
|
||||
|
||||
|
@ -50,9 +45,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
LoadV2(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return data_.size();
|
||||
|
@ -69,9 +61,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildV2(const Config& config = {}) override;
|
||||
|
||||
const TargetBitmap
|
||||
In(size_t n, const T* values) override;
|
||||
|
||||
|
@ -97,8 +86,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
|
||||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
BinarySet
|
||||
UploadV2(const Config& config = {}) override;
|
||||
|
||||
const bool
|
||||
HasRawData() const override {
|
||||
|
@ -133,7 +120,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
std::vector<int32_t> idx_to_offsets_; // used to retrieve.
|
||||
std::vector<IndexStructure<T>> data_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -148,11 +134,4 @@ CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context =
|
|||
storage::FileManagerContext()) {
|
||||
return std::make_unique<ScalarIndexSort<T>>(file_manager_context);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline ScalarIndexSortPtr<T>
|
||||
CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
return std::make_unique<ScalarIndexSort<T>>(file_manager_context, space);
|
||||
}
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -36,7 +36,6 @@
|
|||
#include "index/Utils.h"
|
||||
#include "index/Index.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
|
@ -48,16 +47,6 @@ StringIndexMarisa::StringIndexMarisa(
|
|||
}
|
||||
}
|
||||
|
||||
StringIndexMarisa::StringIndexMarisa(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: space_(space) {
|
||||
if (file_manager_context.Valid()) {
|
||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
||||
file_manager_context, space_);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t
|
||||
StringIndexMarisa::Size() {
|
||||
return trie_.size();
|
||||
|
@ -68,65 +57,6 @@ valid_str_id(size_t str_id) {
|
|||
return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID;
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::BuildV2(const Config& config) {
|
||||
if (built_) {
|
||||
throw std::runtime_error("index has been built");
|
||||
}
|
||||
auto field_name = file_manager_->GetIndexMeta().field_name;
|
||||
auto reader = space_->ScanData();
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read data");
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
auto total_num_rows = data->num_rows();
|
||||
auto col_data = data->GetColumnByName(field_name);
|
||||
auto nullable =
|
||||
col_data->type()->id() == arrow::Type::NA ? true : false;
|
||||
// will support build scalar index when nullable in the future just skip it
|
||||
// now, not support to build index in nullable field_data
|
||||
// todo: support nullable index
|
||||
AssertInfo(!nullable,
|
||||
"not support to build index in nullable field_data");
|
||||
auto field_data = storage::CreateFieldData(
|
||||
DataType::STRING, nullable, 0, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_datas.push_back(field_data);
|
||||
}
|
||||
int64_t total_num_rows = 0;
|
||||
|
||||
// fill key set.
|
||||
marisa::Keyset keyset;
|
||||
for (auto data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_num; ++i) {
|
||||
keyset.push_back(
|
||||
(*static_cast<const std::string*>(data->RawValue(i))).c_str());
|
||||
}
|
||||
total_num_rows += slice_num;
|
||||
}
|
||||
trie_.build(keyset);
|
||||
|
||||
// fill str_ids_
|
||||
str_ids_.resize(total_num_rows);
|
||||
int64_t offset = 0;
|
||||
for (auto data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_num; ++i) {
|
||||
auto str_id =
|
||||
lookup(*static_cast<const std::string*>(data->RawValue(i)));
|
||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
||||
str_ids_[offset++] = str_id;
|
||||
}
|
||||
}
|
||||
|
||||
// fill str_ids_to_offsets_
|
||||
fill_offsets();
|
||||
|
||||
built_ = true;
|
||||
}
|
||||
void
|
||||
StringIndexMarisa::Build(const Config& config) {
|
||||
if (built_) {
|
||||
|
@ -245,20 +175,6 @@ StringIndexMarisa::Upload(const Config& config) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
BinarySet
|
||||
StringIndexMarisa::UploadV2(const Config& config) {
|
||||
auto binary_set = Serialize(config);
|
||||
file_manager_->AddFileV2(binary_set);
|
||||
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet ret;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
|
||||
const Config& config) {
|
||||
|
@ -322,46 +238,6 @@ StringIndexMarisa::Load(milvus::tracer::TraceContext ctx,
|
|||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::LoadV2(const Config& config) {
|
||||
auto blobs = space_->StatisticsBlobs();
|
||||
std::vector<std::string> index_files;
|
||||
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
||||
for (auto& b : blobs) {
|
||||
if (b.name.rfind(prefix, 0) == 0) {
|
||||
index_files.push_back(b.name);
|
||||
}
|
||||
}
|
||||
std::map<std::string, FieldDataPtr> index_datas{};
|
||||
for (auto& file_name : index_files) {
|
||||
auto res = space_->GetBlobByteSize(file_name);
|
||||
if (!res.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
||||
}
|
||||
auto index_blob_data =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
||||
}
|
||||
auto raw_index_blob =
|
||||
storage::DeserializeFileData(index_blob_data, res.value());
|
||||
index_datas[file_name] = raw_index_blob->GetFieldData();
|
||||
}
|
||||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
auto file_name = key.substr(key.find_last_of('/') + 1);
|
||||
binary_set.Append(file_name, buf, size);
|
||||
}
|
||||
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
StringIndexMarisa::In(size_t n, const std::string* values) {
|
||||
TargetBitmap bitset(str_ids_.size());
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include <map>
|
||||
#include <memory>
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
|
@ -33,10 +32,6 @@ class StringIndexMarisa : public StringIndex {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
explicit StringIndexMarisa(
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
int64_t
|
||||
Size() override;
|
||||
|
||||
|
@ -49,9 +44,6 @@ class StringIndexMarisa : public StringIndex {
|
|||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
LoadV2(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return str_ids_.size();
|
||||
|
@ -71,9 +63,6 @@ class StringIndexMarisa : public StringIndex {
|
|||
void
|
||||
BuildWithFieldData(const std::vector<FieldDataPtr>& field_datas) override;
|
||||
|
||||
void
|
||||
BuildV2(const Config& Config = {}) override;
|
||||
|
||||
const TargetBitmap
|
||||
In(size_t n, const std::string* values) override;
|
||||
|
||||
|
@ -98,9 +87,6 @@ class StringIndexMarisa : public StringIndex {
|
|||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
BinarySet
|
||||
UploadV2(const Config& config = {});
|
||||
|
||||
const bool
|
||||
HasRawData() const override {
|
||||
return true;
|
||||
|
@ -131,7 +117,6 @@ class StringIndexMarisa : public StringIndex {
|
|||
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
||||
bool built_ = false;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
using StringIndexMarisaPtr = std::unique_ptr<StringIndexMarisa>;
|
||||
|
@ -142,10 +127,4 @@ CreateStringIndexMarisa(
|
|||
storage::FileManagerContext()) {
|
||||
return std::make_unique<StringIndexMarisa>(file_manager_context);
|
||||
}
|
||||
|
||||
inline StringIndexPtr
|
||||
CreateStringIndexMarisa(const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
return std::make_unique<StringIndexMarisa>(file_manager_context, space);
|
||||
}
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -73,45 +73,6 @@ VectorDiskAnnIndex<T>::VectorDiskAnnIndex(
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
VectorDiskAnnIndex<T>::VectorDiskAnnIndex(
|
||||
const IndexType& index_type,
|
||||
const MetricType& metric_type,
|
||||
const IndexVersion& version,
|
||||
std::shared_ptr<milvus_storage::Space> space,
|
||||
const storage::FileManagerContext& file_manager_context)
|
||||
: space_(space), VectorIndex(index_type, metric_type) {
|
||||
CheckMetricTypeSupport<T>(metric_type);
|
||||
file_manager_ = std::make_shared<storage::DiskFileManagerImpl>(
|
||||
file_manager_context, file_manager_context.space_);
|
||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
||||
auto local_chunk_manager =
|
||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
|
||||
// As we have guarded dup-load in QueryNode,
|
||||
// this assertion failed only if the Milvus rebooted in the same pod,
|
||||
// need to remove these files then re-load the segment
|
||||
if (local_chunk_manager->Exist(local_index_path_prefix)) {
|
||||
local_chunk_manager->RemoveDir(local_index_path_prefix);
|
||||
}
|
||||
CheckCompatible(version);
|
||||
local_chunk_manager->CreateDir(local_index_path_prefix);
|
||||
auto diskann_index_pack =
|
||||
knowhere::Pack(std::shared_ptr<knowhere::FileManager>(file_manager_));
|
||||
auto get_index_obj = knowhere::IndexFactory::Instance().Create<T>(
|
||||
GetIndexType(), version, diskann_index_pack);
|
||||
if (get_index_obj.has_value()) {
|
||||
index_ = get_index_obj.value();
|
||||
} else {
|
||||
auto err = get_index_obj.error();
|
||||
if (err == knowhere::Status::invalid_index_error) {
|
||||
PanicInfo(ErrorCode::Unsupported, get_index_obj.what());
|
||||
}
|
||||
PanicInfo(ErrorCode::KnowhereError, get_index_obj.what());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
|
||||
|
@ -153,21 +114,6 @@ VectorDiskAnnIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
SetDim(index_.Dim());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::LoadV2(const Config& config) {
|
||||
knowhere::Json load_config = update_load_json(config);
|
||||
|
||||
file_manager_->CacheIndexToDisk();
|
||||
|
||||
auto stat = index_.Deserialize(knowhere::BinarySet(), load_config);
|
||||
if (stat != knowhere::Status::success)
|
||||
PanicInfo(ErrorCode::UnexpectedError,
|
||||
"failed to Deserialize index, " + KnowhereStatusString(stat));
|
||||
|
||||
SetDim(index_.Dim());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
VectorDiskAnnIndex<T>::Upload(const Config& config) {
|
||||
|
@ -185,53 +131,6 @@ VectorDiskAnnIndex<T>::Upload(const Config& config) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
VectorDiskAnnIndex<T>::UploadV2(const Config& config) {
|
||||
return Upload(config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::BuildV2(const Config& config) {
|
||||
knowhere::Json build_config;
|
||||
build_config.update(config);
|
||||
|
||||
auto local_data_path = file_manager_->CacheRawDataToDisk<T>(space_);
|
||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
||||
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
build_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix;
|
||||
|
||||
if (GetIndexType() == knowhere::IndexEnum::INDEX_DISKANN) {
|
||||
auto num_threads = GetValueFromConfig<std::string>(
|
||||
build_config, DISK_ANN_BUILD_THREAD_NUM);
|
||||
AssertInfo(
|
||||
num_threads.has_value(),
|
||||
"param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
|
||||
build_config[DISK_ANN_THREADS_NUM] =
|
||||
std::atoi(num_threads.value().c_str());
|
||||
}
|
||||
|
||||
auto opt_fields = GetValueFromConfig<OptFieldT>(config, VEC_OPT_FIELDS);
|
||||
if (opt_fields.has_value() && index_.IsAdditionalScalarSupported()) {
|
||||
build_config[VEC_OPT_FIELDS_PATH] =
|
||||
file_manager_->CacheOptFieldToDisk(opt_fields.value());
|
||||
// `partition_key_isolation` is already in the config, so it falls through
|
||||
// into the index Build call directly
|
||||
}
|
||||
|
||||
build_config.erase("insert_files");
|
||||
build_config.erase(VEC_OPT_FIELDS);
|
||||
index_.Build({}, build_config);
|
||||
|
||||
auto local_chunk_manager =
|
||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto segment_id = file_manager_->GetFieldDataMeta().segment_id;
|
||||
local_chunk_manager->RemoveDir(
|
||||
storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::Build(const Config& config) {
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
|
||||
#include "index/VectorIndex.h"
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
|
@ -35,14 +34,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
explicit VectorDiskAnnIndex(
|
||||
const IndexType& index_type,
|
||||
const MetricType& metric_type,
|
||||
const IndexVersion& version,
|
||||
std::shared_ptr<milvus_storage::Space> space,
|
||||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config) override { // deprecated
|
||||
BinarySet binary_set;
|
||||
|
@ -58,9 +49,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
BinarySet
|
||||
UploadV2(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return index_.Count();
|
||||
|
@ -73,9 +61,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
LoadV2(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset,
|
||||
const Config& config = {}) override;
|
||||
|
@ -83,9 +68,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildV2(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
Query(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
|
@ -119,7 +101,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||
knowhere::Index<knowhere::IndexNode> index_;
|
||||
std::shared_ptr<storage::DiskFileManagerImpl> file_manager_;
|
||||
uint32_t search_beamwidth_ = 8;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -48,7 +48,6 @@
|
|||
#include "storage/DataCodec.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/ThreadPools.h"
|
||||
#include "storage/space.h"
|
||||
#include "storage/Util.h"
|
||||
#include "monitor/prometheus_client.h"
|
||||
|
||||
|
@ -83,69 +82,6 @@ VectorMemIndex<T>::VectorMemIndex(
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
VectorMemIndex<T>::VectorMemIndex(
|
||||
const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: VectorIndex(create_index_info.index_type, create_index_info.metric_type),
|
||||
space_(space),
|
||||
create_index_info_(create_index_info) {
|
||||
CheckMetricTypeSupport<T>(create_index_info.metric_type);
|
||||
AssertInfo(!is_unsupported(create_index_info.index_type,
|
||||
create_index_info.metric_type),
|
||||
create_index_info.index_type +
|
||||
" doesn't support metric: " + create_index_info.metric_type);
|
||||
if (file_manager_context.Valid()) {
|
||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
||||
file_manager_context, file_manager_context.space_);
|
||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
||||
}
|
||||
auto version = create_index_info.index_engine_version;
|
||||
CheckCompatible(version);
|
||||
auto get_index_obj =
|
||||
knowhere::IndexFactory::Instance().Create<T>(GetIndexType(), version);
|
||||
if (get_index_obj.has_value()) {
|
||||
index_ = get_index_obj.value();
|
||||
} else {
|
||||
auto err = get_index_obj.error();
|
||||
if (err == knowhere::Status::invalid_index_error) {
|
||||
PanicInfo(ErrorCode::Unsupported, get_index_obj.what());
|
||||
}
|
||||
PanicInfo(ErrorCode::KnowhereError, get_index_obj.what());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
VectorMemIndex<T>::UploadV2(const Config& config) {
|
||||
auto binary_set = Serialize(config);
|
||||
file_manager_->AddFileV2(binary_set);
|
||||
|
||||
auto store_version = file_manager_->space()->GetCurrentVersion();
|
||||
std::shared_ptr<uint8_t[]> store_version_data(
|
||||
new uint8_t[sizeof(store_version)]);
|
||||
store_version_data[0] = store_version & 0x00000000000000FF;
|
||||
store_version = store_version >> 8;
|
||||
store_version_data[1] = store_version & 0x00000000000000FF;
|
||||
store_version = store_version >> 8;
|
||||
store_version_data[2] = store_version & 0x00000000000000FF;
|
||||
store_version = store_version >> 8;
|
||||
store_version_data[3] = store_version & 0x00000000000000FF;
|
||||
store_version = store_version >> 8;
|
||||
store_version_data[4] = store_version & 0x00000000000000FF;
|
||||
store_version = store_version >> 8;
|
||||
store_version_data[5] = store_version & 0x00000000000000FF;
|
||||
store_version = store_version >> 8;
|
||||
store_version_data[6] = store_version & 0x00000000000000FF;
|
||||
store_version = store_version >> 8;
|
||||
store_version_data[7] = store_version & 0x00000000000000FF;
|
||||
BinarySet ret;
|
||||
ret.Append("index_store_version", store_version_data, 8);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
||||
VectorMemIndex<T>::VectorIterators(const milvus::DatasetPtr dataset,
|
||||
|
@ -202,105 +138,6 @@ VectorMemIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
|
|||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorMemIndex<T>::LoadV2(const Config& config) {
|
||||
if (config.contains(kMmapFilepath)) {
|
||||
return LoadFromFileV2(config);
|
||||
}
|
||||
|
||||
auto blobs = space_->StatisticsBlobs();
|
||||
std::unordered_set<std::string> pending_index_files;
|
||||
auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
||||
for (auto& blob : blobs) {
|
||||
if (blob.name.rfind(index_prefix, 0) == 0) {
|
||||
pending_index_files.insert(blob.name);
|
||||
}
|
||||
}
|
||||
|
||||
auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META;
|
||||
auto res = space_->GetBlobByteSize(std::string(slice_meta_file));
|
||||
std::map<std::string, FieldDataPtr> index_datas{};
|
||||
|
||||
if (!res.ok() && !res.status().IsFileNotFound()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read blob");
|
||||
}
|
||||
bool slice_meta_exist = res.ok();
|
||||
|
||||
auto read_blob = [&](const std::string& file_name)
|
||||
-> std::unique_ptr<storage::DataCodec> {
|
||||
auto res = space_->GetBlobByteSize(file_name);
|
||||
if (!res.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
||||
}
|
||||
auto index_blob_data =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
||||
}
|
||||
return storage::DeserializeFileData(index_blob_data, res.value());
|
||||
};
|
||||
if (slice_meta_exist) {
|
||||
pending_index_files.erase(slice_meta_file);
|
||||
auto slice_meta_sz = res.value();
|
||||
auto slice_meta_data =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[slice_meta_sz]);
|
||||
auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read slice meta");
|
||||
}
|
||||
auto raw_slice_meta =
|
||||
storage::DeserializeFileData(slice_meta_data, slice_meta_sz);
|
||||
Config meta_data = Config::parse(std::string(
|
||||
static_cast<const char*>(raw_slice_meta->GetFieldData()->Data()),
|
||||
raw_slice_meta->GetFieldData()->Size()));
|
||||
for (auto& item : meta_data[META]) {
|
||||
std::string prefix = item[NAME];
|
||||
int slice_num = item[SLICE_NUM];
|
||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
||||
// todo: support nullable index
|
||||
auto new_field_data = milvus::storage::CreateFieldData(
|
||||
DataType::INT8, false, 1, total_len);
|
||||
for (auto i = 0; i < slice_num; ++i) {
|
||||
std::string file_name =
|
||||
index_prefix + "/" + GenSlicedFileName(prefix, i);
|
||||
auto raw_index_blob = read_blob(file_name);
|
||||
new_field_data->FillFieldData(
|
||||
raw_index_blob->GetFieldData()->Data(),
|
||||
raw_index_blob->GetFieldData()->Size());
|
||||
pending_index_files.erase(file_name);
|
||||
}
|
||||
AssertInfo(
|
||||
new_field_data->IsFull(),
|
||||
"index len is inconsistent after disassemble and assemble");
|
||||
index_datas[prefix] = new_field_data;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pending_index_files.empty()) {
|
||||
for (auto& file_name : pending_index_files) {
|
||||
auto raw_index_blob = read_blob(file_name);
|
||||
index_datas.insert({file_name, raw_index_blob->GetFieldData()});
|
||||
}
|
||||
}
|
||||
LOG_INFO("construct binary set...");
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
LOG_INFO("add index data to binary set: {}", key);
|
||||
auto size = data->Size();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
auto file_name = key.substr(key.find_last_of('/') + 1);
|
||||
binary_set.Append(file_name, buf, size);
|
||||
}
|
||||
|
||||
LOG_INFO("load index into Knowhere...");
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
LOG_INFO("load vector index done");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorMemIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
||||
|
@ -442,58 +279,6 @@ VectorMemIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
|
|||
SetDim(index_.Dim());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorMemIndex<T>::BuildV2(const Config& config) {
|
||||
auto field_name = create_index_info_.field_name;
|
||||
auto field_type = create_index_info_.field_type;
|
||||
auto dim = create_index_info_.dim;
|
||||
auto reader = space_->ScanData();
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
for (auto rec : *reader) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(IndexBuildError,
|
||||
"failed to read data: {}",
|
||||
rec.status().ToString());
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
if (data == nullptr) {
|
||||
break;
|
||||
}
|
||||
auto total_num_rows = data->num_rows();
|
||||
auto col_data = data->GetColumnByName(field_name);
|
||||
// todo: support nullable index
|
||||
auto field_data =
|
||||
storage::CreateFieldData(field_type, false, dim, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_datas.push_back(field_data);
|
||||
}
|
||||
int64_t total_size = 0;
|
||||
int64_t total_num_rows = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
total_size += data->Size();
|
||||
total_num_rows += data->get_num_rows();
|
||||
AssertInfo(dim == 0 || dim == data->get_dim(),
|
||||
"inconsistent dim value between field datas!");
|
||||
}
|
||||
|
||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[total_size]);
|
||||
int64_t offset = 0;
|
||||
for (auto data : field_datas) {
|
||||
std::memcpy(buf.get() + offset, data->Data(), data->Size());
|
||||
offset += data->Size();
|
||||
data.reset();
|
||||
}
|
||||
field_datas.clear();
|
||||
|
||||
Config build_config;
|
||||
build_config.update(config);
|
||||
build_config.erase("insert_files");
|
||||
|
||||
auto dataset = GenDataset(total_num_rows, dim, buf.get());
|
||||
BuildWithDataset(dataset, build_config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorMemIndex<T>::Build(const Config& config) {
|
||||
|
@ -852,109 +637,6 @@ void VectorMemIndex<T>::LoadFromFile(const Config& config) {
|
|||
.count());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorMemIndex<T>::LoadFromFileV2(const Config& config) {
|
||||
auto filepath = GetValueFromConfig<std::string>(config, kMmapFilepath);
|
||||
AssertInfo(filepath.has_value(), "mmap filepath is empty when load index");
|
||||
|
||||
std::filesystem::create_directories(
|
||||
std::filesystem::path(filepath.value()).parent_path());
|
||||
|
||||
auto file = File::Open(filepath.value(), O_CREAT | O_TRUNC | O_RDWR);
|
||||
|
||||
auto blobs = space_->StatisticsBlobs();
|
||||
std::unordered_set<std::string> pending_index_files;
|
||||
auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
||||
for (auto& blob : blobs) {
|
||||
if (blob.name.rfind(index_prefix, 0) == 0) {
|
||||
pending_index_files.insert(blob.name);
|
||||
}
|
||||
}
|
||||
|
||||
auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META;
|
||||
auto res = space_->GetBlobByteSize(std::string(slice_meta_file));
|
||||
|
||||
if (!res.ok() && !res.status().IsFileNotFound()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read blob");
|
||||
}
|
||||
bool slice_meta_exist = res.ok();
|
||||
|
||||
auto read_blob = [&](const std::string& file_name)
|
||||
-> std::unique_ptr<storage::DataCodec> {
|
||||
auto res = space_->GetBlobByteSize(file_name);
|
||||
if (!res.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
||||
}
|
||||
auto index_blob_data =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
||||
}
|
||||
return storage::DeserializeFileData(index_blob_data, res.value());
|
||||
};
|
||||
if (slice_meta_exist) {
|
||||
pending_index_files.erase(slice_meta_file);
|
||||
auto slice_meta_sz = res.value();
|
||||
auto slice_meta_data =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[slice_meta_sz]);
|
||||
auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(DataFormatBroken, "unable to read slice meta");
|
||||
}
|
||||
auto raw_slice_meta =
|
||||
storage::DeserializeFileData(slice_meta_data, slice_meta_sz);
|
||||
Config meta_data = Config::parse(std::string(
|
||||
static_cast<const char*>(raw_slice_meta->GetFieldData()->Data()),
|
||||
raw_slice_meta->GetFieldData()->Size()));
|
||||
for (auto& item : meta_data[META]) {
|
||||
std::string prefix = item[NAME];
|
||||
int slice_num = item[SLICE_NUM];
|
||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
||||
|
||||
for (auto i = 0; i < slice_num; ++i) {
|
||||
std::string file_name =
|
||||
index_prefix + "/" + GenSlicedFileName(prefix, i);
|
||||
auto raw_index_blob = read_blob(file_name);
|
||||
auto written =
|
||||
file.Write(raw_index_blob->GetFieldData()->Data(),
|
||||
raw_index_blob->GetFieldData()->Size());
|
||||
pending_index_files.erase(file_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!pending_index_files.empty()) {
|
||||
for (auto& file_name : pending_index_files) {
|
||||
auto raw_index_blob = read_blob(file_name);
|
||||
file.Write(raw_index_blob->GetFieldData()->Data(),
|
||||
raw_index_blob->GetFieldData()->Size());
|
||||
}
|
||||
}
|
||||
file.Close();
|
||||
|
||||
LOG_INFO("load index into Knowhere...");
|
||||
auto conf = config;
|
||||
conf.erase(kMmapFilepath);
|
||||
conf[kEnableMmap] = true;
|
||||
auto stat = index_.DeserializeFromFile(filepath.value(), conf);
|
||||
if (stat != knowhere::Status::success) {
|
||||
PanicInfo(DataFormatBroken,
|
||||
"failed to Deserialize index: {}",
|
||||
KnowhereStatusString(stat));
|
||||
}
|
||||
|
||||
auto dim = index_.Dim();
|
||||
this->SetDim(index_.Dim());
|
||||
|
||||
auto ok = unlink(filepath->data());
|
||||
AssertInfo(ok == 0,
|
||||
"failed to unlink mmap index file {}: {}",
|
||||
filepath.value(),
|
||||
strerror(errno));
|
||||
LOG_INFO("load vector index done");
|
||||
}
|
||||
template class VectorMemIndex<float>;
|
||||
template class VectorMemIndex<bin1>;
|
||||
template class VectorMemIndex<float16>;
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "knowhere/index/index_factory.h"
|
||||
#include "index/VectorIndex.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/space.h"
|
||||
#include "index/IndexInfo.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
@ -40,9 +39,6 @@ class VectorMemIndex : public VectorIndex {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
explicit VectorMemIndex(const CreateIndexInfo& create_index_info,
|
||||
const storage::FileManagerContext& file_manager,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
BinarySet
|
||||
Serialize(const Config& config) override;
|
||||
|
||||
|
@ -52,9 +48,6 @@ class VectorMemIndex : public VectorIndex {
|
|||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
LoadV2(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset,
|
||||
const Config& config = {}) override;
|
||||
|
@ -62,9 +55,6 @@ class VectorMemIndex : public VectorIndex {
|
|||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildV2(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
AddWithDataset(const DatasetPtr& dataset, const Config& config) override;
|
||||
|
||||
|
@ -91,9 +81,6 @@ class VectorMemIndex : public VectorIndex {
|
|||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
BinarySet
|
||||
UploadV2(const Config& config = {}) override;
|
||||
|
||||
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
||||
VectorIterators(const DatasetPtr dataset,
|
||||
const knowhere::Json& json,
|
||||
|
@ -107,14 +94,10 @@ class VectorMemIndex : public VectorIndex {
|
|||
void
|
||||
LoadFromFile(const Config& config);
|
||||
|
||||
void
|
||||
LoadFromFileV2(const Config& config);
|
||||
|
||||
protected:
|
||||
Config config_;
|
||||
knowhere::Index<knowhere::IndexNode> index_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
|
||||
CreateIndexInfo create_index_info_;
|
||||
};
|
||||
|
|
|
@ -26,9 +26,6 @@ class IndexCreatorBase {
|
|||
virtual void
|
||||
Build() = 0;
|
||||
|
||||
virtual void
|
||||
BuildV2() = 0;
|
||||
|
||||
virtual milvus::BinarySet
|
||||
Serialize() = 0;
|
||||
|
||||
|
@ -38,9 +35,6 @@ class IndexCreatorBase {
|
|||
|
||||
virtual BinarySet
|
||||
Upload() = 0;
|
||||
|
||||
virtual BinarySet
|
||||
UploadV2() = 0;
|
||||
};
|
||||
|
||||
using IndexCreatorBasePtr = std::unique_ptr<IndexCreatorBase>;
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "indexbuilder/type_c.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
|
@ -74,41 +73,6 @@ class IndexFactory {
|
|||
fmt::format("invalid type is {}", invalid_dtype_msg));
|
||||
}
|
||||
}
|
||||
|
||||
IndexCreatorBasePtr
|
||||
CreateIndex(DataType type,
|
||||
const std::string& field_name,
|
||||
const int64_t dim,
|
||||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
auto invalid_dtype_msg =
|
||||
std::string("invalid data type: ") + std::to_string(int(type));
|
||||
|
||||
switch (type) {
|
||||
case DataType::BOOL:
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32:
|
||||
case DataType::INT64:
|
||||
case DataType::FLOAT:
|
||||
case DataType::DOUBLE:
|
||||
case DataType::VARCHAR:
|
||||
case DataType::STRING:
|
||||
return CreateScalarIndex(
|
||||
type, config, file_manager_context, space);
|
||||
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_BINARY:
|
||||
case DataType::VECTOR_FLOAT16:
|
||||
case DataType::VECTOR_BFLOAT16:
|
||||
case DataType::VECTOR_SPARSE_FLOAT:
|
||||
return std::make_unique<VecIndexCreator>(
|
||||
type, field_name, dim, config, file_manager_context, space);
|
||||
default:
|
||||
PanicInfo(ErrorCode::DataTypeInvalid, invalid_dtype_msg);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace milvus::indexbuilder
|
||||
|
|
|
@ -36,18 +36,6 @@ ScalarIndexCreator::ScalarIndexCreator(
|
|||
index_info, file_manager_context);
|
||||
}
|
||||
|
||||
ScalarIndexCreator::ScalarIndexCreator(
|
||||
DataType dtype,
|
||||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: config_(config), dtype_(dtype) {
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = dtype_;
|
||||
index_info.index_type = index_type();
|
||||
index_ = index::IndexFactory::GetInstance().CreateIndex(
|
||||
index_info, file_manager_context, std::move(space));
|
||||
}
|
||||
void
|
||||
ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
|
||||
auto size = dataset->GetRows();
|
||||
|
@ -60,11 +48,6 @@ ScalarIndexCreator::Build() {
|
|||
index_->Build(config_);
|
||||
}
|
||||
|
||||
void
|
||||
ScalarIndexCreator::BuildV2() {
|
||||
index_->BuildV2(config_);
|
||||
}
|
||||
|
||||
milvus::BinarySet
|
||||
ScalarIndexCreator::Serialize() {
|
||||
return index_->Serialize(config_);
|
||||
|
@ -84,10 +67,4 @@ BinarySet
|
|||
ScalarIndexCreator::Upload() {
|
||||
return index_->Upload();
|
||||
}
|
||||
|
||||
BinarySet
|
||||
ScalarIndexCreator::UploadV2() {
|
||||
return index_->UploadV2();
|
||||
}
|
||||
|
||||
} // namespace milvus::indexbuilder
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#include <common/CDataType.h>
|
||||
#include "index/Index.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
|
@ -27,19 +26,12 @@ class ScalarIndexCreator : public IndexCreatorBase {
|
|||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context);
|
||||
|
||||
ScalarIndexCreator(DataType data_type,
|
||||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
void
|
||||
Build(const milvus::DatasetPtr& dataset) override;
|
||||
|
||||
void
|
||||
Build() override;
|
||||
|
||||
void
|
||||
BuildV2() override;
|
||||
|
||||
milvus::BinarySet
|
||||
Serialize() override;
|
||||
|
||||
|
@ -49,9 +41,6 @@ class ScalarIndexCreator : public IndexCreatorBase {
|
|||
BinarySet
|
||||
Upload() override;
|
||||
|
||||
BinarySet
|
||||
UploadV2() override;
|
||||
|
||||
private:
|
||||
std::string
|
||||
index_type();
|
||||
|
@ -72,13 +61,4 @@ CreateScalarIndex(DataType dtype,
|
|||
return std::make_unique<ScalarIndexCreator>(
|
||||
dtype, config, file_manager_context);
|
||||
}
|
||||
|
||||
inline ScalarIndexCreatorPtr
|
||||
CreateScalarIndex(DataType dtype,
|
||||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
return std::make_unique<ScalarIndexCreator>(
|
||||
dtype, config, file_manager_context, space);
|
||||
}
|
||||
} // namespace milvus::indexbuilder
|
||||
|
|
|
@ -24,7 +24,7 @@ VecIndexCreator::VecIndexCreator(
|
|||
DataType data_type,
|
||||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context)
|
||||
: VecIndexCreator(data_type, "", 0, config, file_manager_context, nullptr) {
|
||||
: VecIndexCreator(data_type, "", 0, config, file_manager_context) {
|
||||
}
|
||||
|
||||
VecIndexCreator::VecIndexCreator(
|
||||
|
@ -32,9 +32,8 @@ VecIndexCreator::VecIndexCreator(
|
|||
const std::string& field_name,
|
||||
const int64_t dim,
|
||||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: config_(config), data_type_(data_type), space_(std::move(space)) {
|
||||
const storage::FileManagerContext& file_manager_context)
|
||||
: config_(config), data_type_(data_type) {
|
||||
index::CreateIndexInfo index_info;
|
||||
index_info.field_type = data_type_;
|
||||
index_info.index_type = index::GetIndexTypeFromConfig(config_);
|
||||
|
@ -45,7 +44,7 @@ VecIndexCreator::VecIndexCreator(
|
|||
index_info.dim = dim;
|
||||
|
||||
index_ = index::IndexFactory::GetInstance().CreateIndex(
|
||||
index_info, file_manager_context, space_);
|
||||
index_info, file_manager_context);
|
||||
AssertInfo(index_ != nullptr,
|
||||
"[VecIndexCreator]Index is null after create index");
|
||||
}
|
||||
|
@ -65,11 +64,6 @@ VecIndexCreator::Build() {
|
|||
index_->Build(config_);
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::BuildV2() {
|
||||
index_->BuildV2(config_);
|
||||
}
|
||||
|
||||
milvus::BinarySet
|
||||
VecIndexCreator::Serialize() {
|
||||
return index_->Serialize(config_);
|
||||
|
@ -95,11 +89,6 @@ VecIndexCreator::Upload() {
|
|||
return index_->Upload();
|
||||
}
|
||||
|
||||
BinarySet
|
||||
VecIndexCreator::UploadV2() {
|
||||
return index_->UploadV2();
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::CleanLocalData() {
|
||||
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
#include "index/VectorIndex.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
|
@ -37,17 +36,14 @@ class VecIndexCreator : public IndexCreatorBase {
|
|||
const std::string& field_name,
|
||||
const int64_t dim,
|
||||
Config& config,
|
||||
const storage::FileManagerContext& file_manager_context,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
const storage::FileManagerContext& file_manager_context);
|
||||
|
||||
void
|
||||
Build(const milvus::DatasetPtr& dataset) override;
|
||||
|
||||
void
|
||||
Build() override;
|
||||
|
||||
void
|
||||
BuildV2() override;
|
||||
|
||||
milvus::BinarySet
|
||||
Serialize() override;
|
||||
|
||||
|
@ -65,9 +61,6 @@ class VecIndexCreator : public IndexCreatorBase {
|
|||
BinarySet
|
||||
Upload() override;
|
||||
|
||||
BinarySet
|
||||
UploadV2() override;
|
||||
|
||||
public:
|
||||
void
|
||||
CleanLocalData();
|
||||
|
@ -76,8 +69,6 @@ class VecIndexCreator : public IndexCreatorBase {
|
|||
milvus::index::IndexBasePtr index_ = nullptr;
|
||||
Config config_;
|
||||
DataType data_type_;
|
||||
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
} // namespace milvus::indexbuilder
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#include "fmt/core.h"
|
||||
#include "indexbuilder/type_c.h"
|
||||
#include "log/Log.h"
|
||||
#include "storage/options.h"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <malloc.h>
|
||||
|
@ -31,7 +30,6 @@
|
|||
#include "index/Utils.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/space.h"
|
||||
#include "index/Meta.h"
|
||||
|
||||
using namespace milvus;
|
||||
|
@ -234,107 +232,6 @@ CreateIndex(CIndex* res_index,
|
|||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
CreateIndexV2(CIndex* res_index,
|
||||
const uint8_t* serialized_build_index_info,
|
||||
const uint64_t len) {
|
||||
try {
|
||||
auto build_index_info =
|
||||
std::make_unique<milvus::proto::indexcgo::BuildIndexInfo>();
|
||||
auto res =
|
||||
build_index_info->ParseFromArray(serialized_build_index_info, len);
|
||||
AssertInfo(res, "Unmarshall build index info failed");
|
||||
auto field_type =
|
||||
static_cast<DataType>(build_index_info->field_schema().data_type());
|
||||
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = field_type;
|
||||
index_info.dim = build_index_info->dim();
|
||||
|
||||
auto storage_config =
|
||||
get_storage_config(build_index_info->storage_config());
|
||||
auto config = get_config(build_index_info);
|
||||
// get index type
|
||||
auto index_type = milvus::index::GetValueFromConfig<std::string>(
|
||||
config, "index_type");
|
||||
AssertInfo(index_type.has_value(), "index type is empty");
|
||||
index_info.index_type = index_type.value();
|
||||
|
||||
auto engine_version = build_index_info->current_index_version();
|
||||
index_info.index_engine_version = engine_version;
|
||||
config[milvus::index::INDEX_ENGINE_VERSION] =
|
||||
std::to_string(engine_version);
|
||||
|
||||
// get metric type
|
||||
if (milvus::IsVectorDataType(field_type)) {
|
||||
auto metric_type = milvus::index::GetValueFromConfig<std::string>(
|
||||
config, "metric_type");
|
||||
AssertInfo(metric_type.has_value(), "metric type is empty");
|
||||
index_info.metric_type = metric_type.value();
|
||||
}
|
||||
|
||||
milvus::storage::FieldDataMeta field_meta{
|
||||
build_index_info->collectionid(),
|
||||
build_index_info->partitionid(),
|
||||
build_index_info->segmentid(),
|
||||
build_index_info->field_schema().fieldid(),
|
||||
build_index_info->field_schema()};
|
||||
milvus::storage::IndexMeta index_meta{
|
||||
build_index_info->segmentid(),
|
||||
build_index_info->field_schema().fieldid(),
|
||||
build_index_info->buildid(),
|
||||
build_index_info->index_version(),
|
||||
"",
|
||||
build_index_info->field_schema().name(),
|
||||
field_type,
|
||||
build_index_info->dim(),
|
||||
};
|
||||
|
||||
auto store_space = milvus_storage::Space::Open(
|
||||
build_index_info->store_path(),
|
||||
milvus_storage::Options{nullptr,
|
||||
build_index_info->store_version()});
|
||||
AssertInfo(store_space.ok() && store_space.has_value(),
|
||||
"create space failed: {}",
|
||||
store_space.status().ToString());
|
||||
|
||||
auto index_space = milvus_storage::Space::Open(
|
||||
build_index_info->index_store_path(),
|
||||
milvus_storage::Options{.schema = store_space.value()->schema()});
|
||||
AssertInfo(index_space.ok() && index_space.has_value(),
|
||||
"create space failed: {}",
|
||||
index_space.status().ToString());
|
||||
|
||||
LOG_INFO("init space success");
|
||||
auto chunk_manager =
|
||||
milvus::storage::CreateChunkManager(storage_config);
|
||||
milvus::storage::FileManagerContext fileManagerContext(
|
||||
field_meta,
|
||||
index_meta,
|
||||
chunk_manager,
|
||||
std::move(index_space.value()));
|
||||
|
||||
auto index =
|
||||
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||
field_type,
|
||||
build_index_info->field_schema().name(),
|
||||
build_index_info->dim(),
|
||||
config,
|
||||
fileManagerContext,
|
||||
std::move(store_space.value()));
|
||||
index->BuildV2();
|
||||
*res_index = index.release();
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (SegcoreError& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = e.get_error_code();
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(&e);
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
DeleteIndex(CIndex index) {
|
||||
auto status = CStatus();
|
||||
|
@ -823,29 +720,6 @@ SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set) {
|
|||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
AssertInfo(
|
||||
index,
|
||||
"failed to serialize index to binary set, passed index was null");
|
||||
|
||||
auto real_index =
|
||||
reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
|
||||
|
||||
auto binary =
|
||||
std::make_unique<knowhere::BinarySet>(real_index->UploadV2());
|
||||
*c_binary_set = binary.release();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info,
|
||||
const int64_t field_id,
|
||||
|
|
|
@ -128,14 +128,6 @@ AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info,
|
|||
CStatus
|
||||
SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set);
|
||||
|
||||
CStatus
|
||||
SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set);
|
||||
|
||||
CStatus
|
||||
CreateIndexV2(CIndex* res_index,
|
||||
const uint8_t* serialized_build_index_info,
|
||||
const uint64_t len);
|
||||
|
||||
CStatus
|
||||
AppendIndexStorageInfo(CBuildIndexInfo c_build_index_info,
|
||||
const char* c_data_store_path,
|
||||
|
|
|
@ -43,6 +43,6 @@ set(SEGCORE_FILES
|
|||
reduce/GroupReduce.cpp)
|
||||
add_library(milvus_segcore SHARED ${SEGCORE_FILES})
|
||||
|
||||
target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus-storage milvus_futures)
|
||||
target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus_futures)
|
||||
|
||||
install(TARGETS milvus_segcore DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||
|
|
|
@ -33,8 +33,6 @@
|
|||
#include "storage/RemoteChunkManagerSingleton.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/ThreadPools.h"
|
||||
#include "storage/options.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -280,89 +278,6 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
|
|||
reserved_offset + num_rows);
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::LoadFieldDataV2(const LoadFieldDataInfo& infos) {
|
||||
// schema don't include system field
|
||||
AssertInfo(infos.field_infos.size() == schema_->size() + 2,
|
||||
"lost some field data when load for growing segment");
|
||||
AssertInfo(infos.field_infos.find(TimestampFieldID.get()) !=
|
||||
infos.field_infos.end(),
|
||||
"timestamps field data should be included");
|
||||
AssertInfo(
|
||||
infos.field_infos.find(RowFieldID.get()) != infos.field_infos.end(),
|
||||
"rowID field data should be included");
|
||||
auto primary_field_id =
|
||||
schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(primary_field_id.get() != INVALID_FIELD_ID, "Primary key is -1");
|
||||
AssertInfo(infos.field_infos.find(primary_field_id.get()) !=
|
||||
infos.field_infos.end(),
|
||||
"primary field data should be included");
|
||||
|
||||
size_t num_rows = storage::GetNumRowsForLoadInfo(infos);
|
||||
auto reserved_offset = PreInsert(num_rows);
|
||||
for (auto& [id, info] : infos.field_infos) {
|
||||
auto field_id = FieldId(id);
|
||||
auto field_data_info = FieldDataInfo(field_id.get(), num_rows);
|
||||
auto& pool =
|
||||
ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
|
||||
auto res = milvus_storage::Space::Open(
|
||||
infos.url, milvus_storage::Options{nullptr, infos.storage_version});
|
||||
AssertInfo(res.ok(), "init space failed");
|
||||
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
|
||||
auto load_future = pool.Submit(
|
||||
LoadFieldDatasFromRemote2, space, schema_, field_data_info);
|
||||
auto field_data =
|
||||
milvus::storage::CollectFieldDataChannel(field_data_info.channel);
|
||||
if (field_id == TimestampFieldID) {
|
||||
// step 2: sort timestamp
|
||||
// query node already guarantees that the timestamp is ordered, avoid field data copy in c++
|
||||
|
||||
// step 3: fill into Segment.ConcurrentVector
|
||||
insert_record_.timestamps_.set_data_raw(reserved_offset,
|
||||
field_data);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (field_id == RowFieldID) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!indexing_record_.SyncDataWithIndex(field_id)) {
|
||||
insert_record_.get_data_base(field_id)->set_data_raw(
|
||||
reserved_offset, field_data);
|
||||
}
|
||||
if (segcore_config_.get_enable_interim_segment_index()) {
|
||||
auto offset = reserved_offset;
|
||||
for (auto& data : field_data) {
|
||||
auto row_count = data->get_num_rows();
|
||||
indexing_record_.AppendingIndex(
|
||||
offset, row_count, field_id, data, insert_record_);
|
||||
offset += row_count;
|
||||
}
|
||||
}
|
||||
try_remove_chunks(field_id);
|
||||
|
||||
if (field_id == primary_field_id) {
|
||||
insert_record_.insert_pks(field_data);
|
||||
}
|
||||
|
||||
// update average row data size
|
||||
auto field_meta = (*schema_)[field_id];
|
||||
if (IsVariableDataType(field_meta.get_data_type())) {
|
||||
SegmentInternalInterface::set_field_avg_size(
|
||||
field_id,
|
||||
num_rows,
|
||||
storage::GetByteSizeOfFieldDatas(field_data));
|
||||
}
|
||||
|
||||
// update the mem size
|
||||
stats_.mem_size += storage::GetByteSizeOfFieldDatas(field_data);
|
||||
}
|
||||
|
||||
// step 5: update small indexes
|
||||
insert_record_.ack_responder_.AddSegment(reserved_offset,
|
||||
reserved_offset + num_rows);
|
||||
}
|
||||
SegcoreError
|
||||
SegmentGrowingImpl::Delete(int64_t reserved_begin,
|
||||
int64_t size,
|
||||
|
|
|
@ -64,8 +64,6 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
|
||||
void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) override;
|
||||
void
|
||||
LoadFieldDataV2(const LoadFieldDataInfo& info) override;
|
||||
|
||||
void
|
||||
RemoveDuplicatePkRecords() override;
|
||||
|
|
|
@ -115,9 +115,6 @@ class SegmentInterface {
|
|||
virtual void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) = 0;
|
||||
|
||||
virtual void
|
||||
LoadFieldDataV2(const LoadFieldDataInfo& info) = 0;
|
||||
|
||||
virtual void
|
||||
RemoveDuplicatePkRecords() = 0;
|
||||
|
||||
|
|
|
@ -281,59 +281,6 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) {
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadFieldDataV2(const LoadFieldDataInfo& load_info) {
|
||||
// TODO(SPARSE): support storage v2
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
// only one field for now, parallel load field data in golang
|
||||
size_t num_rows = storage::GetNumRowsForLoadInfo(load_info);
|
||||
|
||||
for (auto& [id, info] : load_info.field_infos) {
|
||||
AssertInfo(info.row_count > 0, "The row count of field data is 0");
|
||||
|
||||
auto field_id = FieldId(id);
|
||||
auto insert_files = info.insert_files;
|
||||
auto field_data_info =
|
||||
FieldDataInfo(field_id.get(), num_rows, load_info.mmap_dir_path);
|
||||
|
||||
LOG_INFO("segment {} loads field {} with num_rows {}",
|
||||
this->get_segment_id(),
|
||||
field_id.get(),
|
||||
num_rows);
|
||||
|
||||
auto parallel_degree = static_cast<uint64_t>(
|
||||
DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
||||
field_data_info.channel->set_capacity(parallel_degree * 2);
|
||||
auto& pool =
|
||||
ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
|
||||
// auto load_future = pool.Submit(
|
||||
// LoadFieldDatasFromRemote, insert_files, field_data_info.channel);
|
||||
|
||||
auto res = milvus_storage::Space::Open(
|
||||
load_info.url,
|
||||
milvus_storage::Options{nullptr, load_info.storage_version});
|
||||
AssertInfo(res.ok(),
|
||||
fmt::format("init space failed: {}, error: {}",
|
||||
load_info.url,
|
||||
res.status().ToString()));
|
||||
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
|
||||
auto load_future = pool.Submit(
|
||||
LoadFieldDatasFromRemote2, space, schema_, field_data_info);
|
||||
LOG_INFO("segment {} submits load field {} task to thread pool",
|
||||
this->get_segment_id(),
|
||||
field_id.get());
|
||||
if (load_info.mmap_dir_path.empty() ||
|
||||
SystemProperty::Instance().IsSystem(field_id)) {
|
||||
LoadFieldData(field_id, field_data_info);
|
||||
} else {
|
||||
MapFieldData(field_id, field_data_info);
|
||||
}
|
||||
LOG_INFO("segment {} loads field {} done",
|
||||
this->get_segment_id(),
|
||||
field_id.get());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::RemoveDuplicatePkRecords() {
|
||||
std::unique_lock lck(mutex_);
|
||||
|
|
|
@ -50,8 +50,6 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
LoadIndex(const LoadIndexInfo& info) override;
|
||||
void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) override;
|
||||
void
|
||||
LoadFieldDataV2(const LoadFieldDataInfo& info) override;
|
||||
// erase duplicate records when sealed segment loaded done
|
||||
void
|
||||
RemoveDuplicatePkRecords() override;
|
||||
|
|
|
@ -780,35 +780,7 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
|
||||
return data_array;
|
||||
}
|
||||
void
|
||||
LoadFieldDatasFromRemote2(std::shared_ptr<milvus_storage::Space> space,
|
||||
SchemaPtr schema,
|
||||
FieldDataInfo& field_data_info) {
|
||||
auto reader = space->ScanData();
|
||||
|
||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(DataFormatBroken, "failed to read data");
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
auto total_num_rows = data->num_rows();
|
||||
for (auto& field : schema->get_fields()) {
|
||||
if (field.second.get_id().get() != field_data_info.field_id) {
|
||||
continue;
|
||||
}
|
||||
auto col_data =
|
||||
data->GetColumnByName(field.second.get_name().get());
|
||||
auto field_data = storage::CreateFieldData(
|
||||
field.second.get_data_type(),
|
||||
field.second.is_nullable(),
|
||||
field.second.is_vector() ? field.second.get_dim() : 0,
|
||||
total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_data_info.channel->push(field_data);
|
||||
}
|
||||
}
|
||||
field_data_info.channel->close();
|
||||
}
|
||||
// init segcore storage config first, and create default remote chunk manager
|
||||
// segcore use default remote chunk manager to load data from minio/s3
|
||||
void
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include "log/Log.h"
|
||||
#include "segcore/DeletedRecord.h"
|
||||
#include "segcore/InsertRecord.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -119,10 +118,6 @@ void
|
|||
LoadFieldDatasFromRemote(const std::vector<std::string>& remote_files,
|
||||
FieldDataChannelPtr channel);
|
||||
|
||||
void
|
||||
LoadFieldDatasFromRemote2(std::shared_ptr<milvus_storage::Space> space,
|
||||
SchemaPtr schema,
|
||||
FieldDataInfo& field_data_info);
|
||||
/**
|
||||
* Returns an index pointing to the first element in the range [first, last) such that `value < element` is true
|
||||
* (i.e. that is strictly greater than value), or last if no such element is found.
|
||||
|
|
|
@ -318,77 +318,6 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) {
|
|||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndexV3(CLoadIndexInfo c_load_index_info) {
|
||||
try {
|
||||
auto load_index_info =
|
||||
(milvus::segcore::LoadIndexInfo*)c_load_index_info;
|
||||
auto& index_params = load_index_info->index_params;
|
||||
auto field_type = load_index_info->field_type;
|
||||
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = load_index_info->field_type;
|
||||
|
||||
// get index type
|
||||
AssertInfo(index_params.find("index_type") != index_params.end(),
|
||||
"index type is empty");
|
||||
index_info.index_type = index_params.at("index_type");
|
||||
|
||||
// get metric type
|
||||
if (milvus::IsVectorDataType(field_type)) {
|
||||
AssertInfo(index_params.find("metric_type") != index_params.end(),
|
||||
"metric type is empty for vector index");
|
||||
index_info.metric_type = index_params.at("metric_type");
|
||||
}
|
||||
|
||||
milvus::storage::FieldDataMeta field_meta{
|
||||
load_index_info->collection_id,
|
||||
load_index_info->partition_id,
|
||||
load_index_info->segment_id,
|
||||
load_index_info->field_id};
|
||||
milvus::storage::IndexMeta index_meta{load_index_info->segment_id,
|
||||
load_index_info->field_id,
|
||||
load_index_info->index_build_id,
|
||||
load_index_info->index_version};
|
||||
auto config = milvus::index::ParseConfigFromIndexParams(
|
||||
load_index_info->index_params);
|
||||
|
||||
auto res = milvus_storage::Space::Open(
|
||||
load_index_info->uri,
|
||||
milvus_storage::Options{nullptr,
|
||||
load_index_info->index_store_version});
|
||||
AssertInfo(res.ok(), "init space failed");
|
||||
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
|
||||
|
||||
milvus::storage::FileManagerContext fileManagerContext(
|
||||
field_meta, index_meta, nullptr, space);
|
||||
load_index_info->index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
index_info, fileManagerContext, space);
|
||||
|
||||
if (!load_index_info->mmap_dir_path.empty() &&
|
||||
load_index_info->index->IsMmapSupported()) {
|
||||
auto filepath =
|
||||
std::filesystem::path(load_index_info->mmap_dir_path) /
|
||||
std::to_string(load_index_info->segment_id) /
|
||||
std::to_string(load_index_info->field_id) /
|
||||
std::to_string(load_index_info->index_id);
|
||||
|
||||
config[kMmapFilepath] = filepath.string();
|
||||
}
|
||||
|
||||
load_index_info->index->LoadV2(config);
|
||||
auto status = CStatus();
|
||||
status.error_code = milvus::Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = milvus::UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
CStatus
|
||||
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) {
|
||||
try {
|
||||
|
|
|
@ -62,9 +62,6 @@ AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* file_path);
|
|||
CStatus
|
||||
AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info);
|
||||
|
||||
CStatus
|
||||
AppendIndexV3(CLoadIndexInfo c_load_index_info);
|
||||
|
||||
CStatus
|
||||
AppendIndexEngineVersionToLoadInfo(CLoadIndexInfo c_load_index_info,
|
||||
int32_t index_engine_version);
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
#include "storage/Util.h"
|
||||
#include "futures/Future.h"
|
||||
#include "futures/Executor.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
////////////////////////////// common interfaces //////////////////////////////
|
||||
CStatus
|
||||
|
@ -339,20 +338,6 @@ RemoveDuplicatePkRecords(CSegmentInterface c_segment) {
|
|||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
LoadFieldDataV2(CSegmentInterface c_segment,
|
||||
CLoadFieldDataInfo c_load_field_data_info) {
|
||||
try {
|
||||
auto segment =
|
||||
reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
||||
auto load_info = (LoadFieldDataInfo*)c_load_field_data_info;
|
||||
segment->LoadFieldDataV2(*load_info);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(&e);
|
||||
}
|
||||
}
|
||||
// just for test
|
||||
CStatus
|
||||
LoadFieldRawData(CSegmentInterface c_segment,
|
||||
|
|
|
@ -102,10 +102,6 @@ CStatus
|
|||
LoadFieldData(CSegmentInterface c_segment,
|
||||
CLoadFieldDataInfo load_field_data_info);
|
||||
|
||||
CStatus
|
||||
LoadFieldDataV2(CSegmentInterface c_segment,
|
||||
CLoadFieldDataInfo load_field_data_info);
|
||||
|
||||
CStatus
|
||||
RemoveDuplicatePkRecords(CSegmentInterface c_segment);
|
||||
|
||||
|
|
|
@ -69,7 +69,6 @@ if (DEFINED AZURE_BUILD_DIR)
|
|||
"-L${AZURE_BUILD_DIR} -lblob-chunk-manager"
|
||||
blob-chunk-manager
|
||||
milvus_common
|
||||
milvus-storage
|
||||
milvus_monitor
|
||||
pthread
|
||||
${CONAN_LIBS}
|
||||
|
@ -77,7 +76,6 @@ if (DEFINED AZURE_BUILD_DIR)
|
|||
else ()
|
||||
target_link_libraries(milvus_storage PUBLIC
|
||||
milvus_common
|
||||
milvus-storage
|
||||
milvus_monitor
|
||||
pthread
|
||||
${CONAN_LIBS}
|
||||
|
|
|
@ -45,16 +45,6 @@
|
|||
#include "storage/Util.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
DiskFileManagerImpl::DiskFileManagerImpl(
|
||||
const FileManagerContext& fileManagerContext,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
||||
fileManagerContext.indexMeta),
|
||||
space_(space) {
|
||||
rcm_ = fileManagerContext.chunkManagerPtr;
|
||||
}
|
||||
|
||||
DiskFileManagerImpl::DiskFileManagerImpl(
|
||||
const FileManagerContext& fileManagerContext)
|
||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
||||
|
@ -78,39 +68,10 @@ std::string
|
|||
DiskFileManagerImpl::GetRemoteIndexPath(const std::string& file_name,
|
||||
int64_t slice_num) const {
|
||||
std::string remote_prefix;
|
||||
if (space_ != nullptr) {
|
||||
remote_prefix = GetRemoteIndexObjectPrefixV2();
|
||||
} else {
|
||||
remote_prefix = GetRemoteIndexObjectPrefix();
|
||||
}
|
||||
remote_prefix = GetRemoteIndexObjectPrefix();
|
||||
return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num);
|
||||
}
|
||||
|
||||
bool
|
||||
DiskFileManagerImpl::AddFileUsingSpace(
|
||||
const std::string& local_file_name,
|
||||
const std::vector<int64_t>& local_file_offsets,
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::vector<int64_t>& remote_file_sizes) {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
for (int64_t i = 0; i < remote_files.size(); ++i) {
|
||||
auto buf =
|
||||
std::shared_ptr<uint8_t[]>(new uint8_t[remote_file_sizes[i]]);
|
||||
local_chunk_manager->Read(local_file_name,
|
||||
local_file_offsets[i],
|
||||
buf.get(),
|
||||
remote_file_sizes[i]);
|
||||
|
||||
auto status =
|
||||
space_->WriteBlob(remote_files[i], buf.get(), remote_file_sizes[i]);
|
||||
if (!status.ok()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
auto local_chunk_manager =
|
||||
|
@ -204,85 +165,17 @@ DiskFileManagerImpl::AddBatchIndexFiles(
|
|||
}
|
||||
|
||||
std::map<std::string, int64_t> res;
|
||||
if (space_ != nullptr) {
|
||||
res = PutIndexData(space_,
|
||||
data_slices,
|
||||
remote_file_sizes,
|
||||
remote_files,
|
||||
field_meta_,
|
||||
index_meta_);
|
||||
} else {
|
||||
res = PutIndexData(rcm_.get(),
|
||||
data_slices,
|
||||
remote_file_sizes,
|
||||
remote_files,
|
||||
field_meta_,
|
||||
index_meta_);
|
||||
}
|
||||
res = PutIndexData(rcm_.get(),
|
||||
data_slices,
|
||||
remote_file_sizes,
|
||||
remote_files,
|
||||
field_meta_,
|
||||
index_meta_);
|
||||
for (auto& re : res) {
|
||||
remote_paths_to_size_[re.first] = re.second;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DiskFileManagerImpl::CacheIndexToDisk() {
|
||||
auto blobs = space_->StatisticsBlobs();
|
||||
std::vector<std::string> remote_files;
|
||||
for (auto& blob : blobs) {
|
||||
remote_files.push_back(blob.name);
|
||||
}
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
|
||||
std::map<std::string, std::vector<int>> index_slices;
|
||||
for (auto& file_path : remote_files) {
|
||||
auto pos = file_path.find_last_of("_");
|
||||
index_slices[file_path.substr(0, pos)].emplace_back(
|
||||
std::stoi(file_path.substr(pos + 1)));
|
||||
}
|
||||
|
||||
for (auto& slices : index_slices) {
|
||||
std::sort(slices.second.begin(), slices.second.end());
|
||||
}
|
||||
|
||||
auto EstimateParallelDegree = [&](const std::string& file) -> uint64_t {
|
||||
auto fileSize = space_->GetBlobByteSize(file);
|
||||
return uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / fileSize.value());
|
||||
};
|
||||
|
||||
for (auto& slices : index_slices) {
|
||||
auto prefix = slices.first;
|
||||
auto local_index_file_name =
|
||||
GetLocalIndexObjectPrefix() +
|
||||
prefix.substr(prefix.find_last_of('/') + 1);
|
||||
local_chunk_manager->CreateFile(local_index_file_name);
|
||||
int64_t offset = 0;
|
||||
std::vector<std::string> batch_remote_files;
|
||||
uint64_t max_parallel_degree = INT_MAX;
|
||||
for (int& iter : slices.second) {
|
||||
if (batch_remote_files.size() == max_parallel_degree) {
|
||||
auto next_offset = CacheBatchIndexFilesToDiskV2(
|
||||
batch_remote_files, local_index_file_name, offset);
|
||||
offset = next_offset;
|
||||
batch_remote_files.clear();
|
||||
}
|
||||
auto origin_file = prefix + "_" + std::to_string(iter);
|
||||
if (batch_remote_files.size() == 0) {
|
||||
// Use first file size as average size to estimate
|
||||
max_parallel_degree = EstimateParallelDegree(origin_file);
|
||||
}
|
||||
batch_remote_files.push_back(origin_file);
|
||||
}
|
||||
if (batch_remote_files.size() > 0) {
|
||||
auto next_offset = CacheBatchIndexFilesToDiskV2(
|
||||
batch_remote_files, local_index_file_name, offset);
|
||||
offset = next_offset;
|
||||
batch_remote_files.clear();
|
||||
}
|
||||
local_paths_.emplace_back(local_index_file_name);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DiskFileManagerImpl::CacheIndexToDisk(
|
||||
const std::vector<std::string>& remote_files) {
|
||||
|
@ -329,111 +222,6 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
DiskFileManagerImpl::CacheBatchIndexFilesToDisk(
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::string& local_file_name,
|
||||
uint64_t local_file_init_offfset) {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto index_datas = GetObjectData(rcm_.get(), remote_files);
|
||||
int batch_size = remote_files.size();
|
||||
AssertInfo(index_datas.size() == batch_size,
|
||||
"inconsistent file num and index data num!");
|
||||
|
||||
uint64_t offset = local_file_init_offfset;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
auto index_data = index_datas[i].get()->GetFieldData();
|
||||
auto index_size = index_data->Size();
|
||||
auto uint8_data =
|
||||
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data()));
|
||||
local_chunk_manager->Write(
|
||||
local_file_name, offset, uint8_data, index_size);
|
||||
offset += index_size;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2(
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::string& local_file_name,
|
||||
uint64_t local_file_init_offfset) {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto index_datas = GetObjectData(space_, remote_files);
|
||||
int batch_size = remote_files.size();
|
||||
AssertInfo(index_datas.size() == batch_size,
|
||||
"inconsistent file num and index data num!");
|
||||
|
||||
uint64_t offset = local_file_init_offfset;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
auto index_data = index_datas[i];
|
||||
auto index_size = index_data->Size();
|
||||
auto uint8_data =
|
||||
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data()));
|
||||
local_chunk_manager->Write(
|
||||
local_file_name, offset, uint8_data, index_size);
|
||||
offset += index_size;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
template <typename DataType>
|
||||
std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk(
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
auto segment_id = GetFieldDataMeta().segment_id;
|
||||
auto field_id = GetFieldDataMeta().field_id;
|
||||
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto local_data_path = storage::GenFieldRawDataPathPrefix(
|
||||
local_chunk_manager, segment_id, field_id) +
|
||||
"raw_data";
|
||||
local_chunk_manager->CreateFile(local_data_path);
|
||||
// file format
|
||||
// num_rows(uint32) | dim(uint32) | index_data ([]uint8_t)
|
||||
uint32_t num_rows = 0;
|
||||
uint32_t dim = 0;
|
||||
int64_t write_offset = sizeof(num_rows) + sizeof(dim);
|
||||
auto reader = space->ScanData();
|
||||
for (auto rec : *reader) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(IndexBuildError,
|
||||
fmt::format("failed to read data: {}",
|
||||
rec.status().ToString()));
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
if (data == nullptr) {
|
||||
break;
|
||||
}
|
||||
auto total_num_rows = data->num_rows();
|
||||
num_rows += total_num_rows;
|
||||
auto col_data = data->GetColumnByName(index_meta_.field_name);
|
||||
auto field_data = storage::CreateFieldData(
|
||||
index_meta_.field_type, false, index_meta_.dim, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
dim = field_data->get_dim();
|
||||
auto data_size =
|
||||
field_data->get_num_rows() * milvus::GetVecRowSize<DataType>(dim);
|
||||
local_chunk_manager->Write(local_data_path,
|
||||
write_offset,
|
||||
const_cast<void*>(field_data->Data()),
|
||||
data_size);
|
||||
write_offset += data_size;
|
||||
}
|
||||
|
||||
// write num_rows and dim value to file header
|
||||
write_offset = 0;
|
||||
local_chunk_manager->Write(
|
||||
local_data_path, write_offset, &num_rows, sizeof(num_rows));
|
||||
write_offset += sizeof(num_rows);
|
||||
local_chunk_manager->Write(
|
||||
local_data_path, write_offset, &dim, sizeof(dim));
|
||||
|
||||
return local_data_path;
|
||||
}
|
||||
|
||||
void
|
||||
SortByPath(std::vector<std::string>& paths) {
|
||||
std::sort(paths.begin(),
|
||||
|
@ -682,92 +470,6 @@ WriteOptFieldsIvfMeta(
|
|||
write_offset += sizeof(num_of_fields);
|
||||
}
|
||||
|
||||
// write optional scalar fields ivf info in the following format without space among them
|
||||
// | (meta)
|
||||
// | version (uint8_t) | num_of_fields (uint32_t) |
|
||||
// | (field_0)
|
||||
// | field_id (int64_t) | num_of_unique_field_data (uint32_t)
|
||||
// | size_0 (uint32_t) | offset_0 (uint32_t)...
|
||||
// | size_1 | offset_0, offset_1, ...
|
||||
std::string
|
||||
DiskFileManagerImpl::CacheOptFieldToDisk(
|
||||
std::shared_ptr<milvus_storage::Space> space, OptFieldT& fields_map) {
|
||||
const uint32_t num_of_fields = fields_map.size();
|
||||
if (0 == num_of_fields) {
|
||||
return "";
|
||||
} else if (num_of_fields > 1) {
|
||||
PanicInfo(
|
||||
ErrorCode::NotImplemented,
|
||||
"vector index build with multiple fields is not supported yet");
|
||||
}
|
||||
if (nullptr == space) {
|
||||
LOG_ERROR("Failed to cache optional field. Space is null");
|
||||
return "";
|
||||
}
|
||||
|
||||
auto segment_id = GetFieldDataMeta().segment_id;
|
||||
auto vec_field_id = GetFieldDataMeta().field_id;
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto local_data_path = storage::GenFieldRawDataPathPrefix(
|
||||
local_chunk_manager, segment_id, vec_field_id) +
|
||||
std::string(VEC_OPT_FIELDS);
|
||||
local_chunk_manager->CreateFile(local_data_path);
|
||||
|
||||
uint64_t write_offset = 0;
|
||||
WriteOptFieldsIvfMeta(
|
||||
local_chunk_manager, local_data_path, num_of_fields, write_offset);
|
||||
|
||||
std::unordered_set<int64_t> actual_field_ids;
|
||||
auto reader = space->ScanData();
|
||||
for (auto& [field_id, tup] : fields_map) {
|
||||
const auto& field_name = std::get<0>(tup);
|
||||
const auto& field_type = std::get<1>(tup);
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
for (auto rec : *reader) {
|
||||
if (!rec.ok()) {
|
||||
PanicInfo(IndexBuildError,
|
||||
fmt::format("failed to read optional field data: {}",
|
||||
rec.status().ToString()));
|
||||
}
|
||||
auto data = rec.ValueUnsafe();
|
||||
if (data == nullptr) {
|
||||
break;
|
||||
}
|
||||
auto total_num_rows = data->num_rows();
|
||||
if (0 == total_num_rows) {
|
||||
LOG_WARN("optional field {} has no data", field_name);
|
||||
return "";
|
||||
}
|
||||
auto col_data = data->GetColumnByName(field_name);
|
||||
auto field_data =
|
||||
storage::CreateFieldData(field_type, false, 1, total_num_rows);
|
||||
field_data->FillFieldData(col_data);
|
||||
field_datas.emplace_back(field_data);
|
||||
}
|
||||
if (WriteOptFieldIvfData(field_type,
|
||||
field_id,
|
||||
local_chunk_manager,
|
||||
local_data_path,
|
||||
field_datas,
|
||||
write_offset)) {
|
||||
actual_field_ids.insert(field_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (actual_field_ids.size() != num_of_fields) {
|
||||
write_offset = 0;
|
||||
WriteOptFieldsIvfMeta(local_chunk_manager,
|
||||
local_data_path,
|
||||
actual_field_ids.size(),
|
||||
write_offset);
|
||||
if (actual_field_ids.empty()) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
return local_data_path;
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
|
||||
const uint32_t num_of_fields = fields_map.size();
|
||||
|
@ -904,17 +606,4 @@ DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
|||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(
|
||||
std::vector<std::string> remote_files);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<float>(
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
|
|
@ -25,8 +25,6 @@
|
|||
#include "storage/IndexData.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
#include "common/Consts.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
@ -35,9 +33,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||
public:
|
||||
explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext);
|
||||
|
||||
explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
virtual ~DiskFileManagerImpl();
|
||||
|
||||
virtual bool
|
||||
|
@ -77,19 +72,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||
void
|
||||
CacheIndexToDisk(const std::vector<std::string>& remote_files);
|
||||
|
||||
void
|
||||
CacheIndexToDisk();
|
||||
|
||||
uint64_t
|
||||
CacheBatchIndexFilesToDisk(const std::vector<std::string>& remote_files,
|
||||
const std::string& local_file_name,
|
||||
uint64_t local_file_init_offfset);
|
||||
|
||||
uint64_t
|
||||
CacheBatchIndexFilesToDiskV2(const std::vector<std::string>& remote_files,
|
||||
const std::string& local_file_name,
|
||||
uint64_t local_file_init_offfset);
|
||||
|
||||
void
|
||||
AddBatchIndexFiles(const std::string& local_file_name,
|
||||
const std::vector<int64_t>& local_file_offsets,
|
||||
|
@ -100,27 +82,12 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||
std::string
|
||||
CacheRawDataToDisk(std::vector<std::string> remote_files);
|
||||
|
||||
template <typename DataType>
|
||||
std::string
|
||||
CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
std::string
|
||||
CacheOptFieldToDisk(OptFieldT& fields_map);
|
||||
|
||||
std::string
|
||||
CacheOptFieldToDisk(std::shared_ptr<milvus_storage::Space> space,
|
||||
OptFieldT& fields_map);
|
||||
|
||||
virtual bool
|
||||
AddFileUsingSpace(const std::string& local_file_name,
|
||||
const std::vector<int64_t>& local_file_offsets,
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::vector<int64_t>& remote_file_sizes);
|
||||
|
||||
std::string
|
||||
GetRemoteIndexPrefix() const {
|
||||
return space_ != nullptr ? GetRemoteIndexObjectPrefixV2()
|
||||
: GetRemoteIndexObjectPrefix();
|
||||
return GetRemoteIndexObjectPrefix();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -141,8 +108,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||
|
||||
// remote file path
|
||||
std::map<std::string, int64_t> remote_paths_to_size_;
|
||||
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskFileManagerImpl>;
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "log/Log.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
|
@ -40,15 +39,6 @@ struct FileManagerContext {
|
|||
chunkManagerPtr(chunkManagerPtr) {
|
||||
}
|
||||
|
||||
FileManagerContext(const FieldDataMeta& fieldDataMeta,
|
||||
const IndexMeta& indexMeta,
|
||||
const ChunkManagerPtr& chunkManagerPtr,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: fieldDataMeta(fieldDataMeta),
|
||||
indexMeta(indexMeta),
|
||||
chunkManagerPtr(chunkManagerPtr),
|
||||
space_(space) {
|
||||
}
|
||||
bool
|
||||
Valid() const {
|
||||
return chunkManagerPtr != nullptr;
|
||||
|
@ -57,7 +47,6 @@ struct FileManagerContext {
|
|||
FieldDataMeta fieldDataMeta;
|
||||
IndexMeta indexMeta;
|
||||
ChunkManagerPtr chunkManagerPtr;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
#define FILEMANAGER_TRY try {
|
||||
|
|
|
@ -26,15 +26,6 @@
|
|||
|
||||
namespace milvus::storage {
|
||||
|
||||
MemFileManagerImpl::MemFileManagerImpl(
|
||||
const FileManagerContext& fileManagerContext,
|
||||
std::shared_ptr<milvus_storage::Space> space)
|
||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
||||
fileManagerContext.indexMeta),
|
||||
space_(space) {
|
||||
rcm_ = fileManagerContext.chunkManagerPtr;
|
||||
}
|
||||
|
||||
MemFileManagerImpl::MemFileManagerImpl(
|
||||
const FileManagerContext& fileManagerContext)
|
||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
||||
|
@ -91,50 +82,6 @@ MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
MemFileManagerImpl::AddFileV2(const BinarySet& binary_set) {
|
||||
std::vector<const uint8_t*> data_slices;
|
||||
std::vector<int64_t> slice_sizes;
|
||||
std::vector<std::string> slice_names;
|
||||
|
||||
auto AddBatchIndexFiles = [&]() {
|
||||
auto res = PutIndexData(space_,
|
||||
data_slices,
|
||||
slice_sizes,
|
||||
slice_names,
|
||||
field_meta_,
|
||||
index_meta_);
|
||||
for (auto& [file, size] : res) {
|
||||
remote_paths_to_size_[file] = size;
|
||||
}
|
||||
};
|
||||
|
||||
auto remotePrefix = GetRemoteIndexObjectPrefixV2();
|
||||
int64_t batch_size = 0;
|
||||
for (auto iter = binary_set.binary_map_.begin();
|
||||
iter != binary_set.binary_map_.end();
|
||||
iter++) {
|
||||
if (batch_size >= DEFAULT_FIELD_MAX_MEMORY_LIMIT) {
|
||||
AddBatchIndexFiles();
|
||||
data_slices.clear();
|
||||
slice_sizes.clear();
|
||||
slice_names.clear();
|
||||
batch_size = 0;
|
||||
}
|
||||
|
||||
data_slices.emplace_back(iter->second->data.get());
|
||||
slice_sizes.emplace_back(iter->second->size);
|
||||
slice_names.emplace_back(remotePrefix + "/" + iter->first);
|
||||
batch_size += iter->second->size;
|
||||
}
|
||||
|
||||
if (data_slices.size() > 0) {
|
||||
AddBatchIndexFiles();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
MemFileManagerImpl::LoadFile(const std::string& filename) noexcept {
|
||||
return true;
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "storage/IndexData.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
|
@ -33,9 +32,6 @@ class MemFileManagerImpl : public FileManagerImpl {
|
|||
public:
|
||||
explicit MemFileManagerImpl(const FileManagerContext& fileManagerContext);
|
||||
|
||||
MemFileManagerImpl(const FileManagerContext& fileManagerContext,
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
virtual bool
|
||||
LoadFile(const std::string& filename) noexcept;
|
||||
|
||||
|
@ -63,14 +59,6 @@ class MemFileManagerImpl : public FileManagerImpl {
|
|||
bool
|
||||
AddFile(const BinarySet& binary_set);
|
||||
|
||||
bool
|
||||
AddFileV2(const BinarySet& binary_set);
|
||||
|
||||
std::shared_ptr<milvus_storage::Space>
|
||||
space() const {
|
||||
return space_;
|
||||
}
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
GetRemotePathsToFileSize() const {
|
||||
return remote_paths_to_size_;
|
||||
|
@ -79,7 +67,6 @@ class MemFileManagerImpl : public FileManagerImpl {
|
|||
private:
|
||||
// remote file path
|
||||
std::map<std::string, int64_t> remote_paths_to_size_;
|
||||
std::shared_ptr<milvus_storage::Space> space_;
|
||||
};
|
||||
|
||||
using MemFileManagerImplPtr = std::shared_ptr<MemFileManagerImpl>;
|
||||
|
|
|
@ -516,22 +516,6 @@ DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
|
|||
return DeserializeFileData(buf, fileSize);
|
||||
}
|
||||
|
||||
std::unique_ptr<DataCodec>
|
||||
DownloadAndDecodeRemoteFileV2(std::shared_ptr<milvus_storage::Space> space,
|
||||
const std::string& file) {
|
||||
auto fileSize = space->GetBlobByteSize(file);
|
||||
if (!fileSize.ok()) {
|
||||
PanicInfo(FileReadFailed, fileSize.status().ToString());
|
||||
}
|
||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[fileSize.value()]);
|
||||
auto status = space->ReadBlob(file, buf.get());
|
||||
if (!status.ok()) {
|
||||
PanicInfo(FileReadFailed, status.ToString());
|
||||
}
|
||||
|
||||
return DeserializeFileData(buf, fileSize.value());
|
||||
}
|
||||
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
||||
uint8_t* buf,
|
||||
|
@ -551,27 +535,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
|||
return std::make_pair(std::move(object_key), serialized_index_size);
|
||||
}
|
||||
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadIndexSlice2(std::shared_ptr<milvus_storage::Space> space,
|
||||
uint8_t* buf,
|
||||
int64_t batch_size,
|
||||
IndexMeta index_meta,
|
||||
FieldDataMeta field_meta,
|
||||
std::string object_key) {
|
||||
// todo: support nullable index
|
||||
auto field_data = CreateFieldData(DataType::INT8, false);
|
||||
field_data->FillFieldData(buf, batch_size);
|
||||
auto indexData = std::make_shared<IndexData>(field_data);
|
||||
indexData->set_index_meta(index_meta);
|
||||
indexData->SetFieldDataMeta(field_meta);
|
||||
auto serialized_index_data = indexData->serialize_to_remote_file();
|
||||
auto serialized_index_size = serialized_index_data.size();
|
||||
auto status = space->WriteBlob(
|
||||
object_key, serialized_index_data.data(), serialized_index_size);
|
||||
AssertInfo(status.ok(), "write to space error: {}", status.ToString());
|
||||
return std::make_pair(std::move(object_key), serialized_index_size);
|
||||
}
|
||||
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
|
||||
void* buf,
|
||||
|
@ -609,36 +572,6 @@ GetObjectData(ChunkManager* remote_chunk_manager,
|
|||
return futures;
|
||||
}
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
GetObjectData(std::shared_ptr<milvus_storage::Space> space,
|
||||
const std::vector<std::string>& remote_files) {
|
||||
auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH);
|
||||
std::vector<std::future<std::unique_ptr<DataCodec>>> futures;
|
||||
for (auto& file : remote_files) {
|
||||
futures.emplace_back(
|
||||
pool.Submit(DownloadAndDecodeRemoteFileV2, space, file));
|
||||
}
|
||||
|
||||
std::vector<FieldDataPtr> datas;
|
||||
std::exception_ptr first_exception = nullptr;
|
||||
for (auto& future : futures) {
|
||||
try {
|
||||
auto res = future.get();
|
||||
datas.emplace_back(res->GetFieldData());
|
||||
} catch (...) {
|
||||
if (!first_exception) {
|
||||
first_exception = std::current_exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
ReleaseArrowUnused();
|
||||
if (first_exception) {
|
||||
std::rethrow_exception(first_exception);
|
||||
}
|
||||
|
||||
return datas;
|
||||
}
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
PutIndexData(ChunkManager* remote_chunk_manager,
|
||||
const std::vector<const uint8_t*>& data_slices,
|
||||
|
@ -687,54 +620,6 @@ PutIndexData(ChunkManager* remote_chunk_manager,
|
|||
return remote_paths_to_size;
|
||||
}
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
PutIndexData(std::shared_ptr<milvus_storage::Space> space,
|
||||
const std::vector<const uint8_t*>& data_slices,
|
||||
const std::vector<int64_t>& slice_sizes,
|
||||
const std::vector<std::string>& slice_names,
|
||||
FieldDataMeta& field_meta,
|
||||
IndexMeta& index_meta) {
|
||||
auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
|
||||
std::vector<std::future<std::pair<std::string, size_t>>> futures;
|
||||
AssertInfo(data_slices.size() == slice_sizes.size(),
|
||||
"inconsistent data slices size {} with slice sizes {}",
|
||||
data_slices.size(),
|
||||
slice_sizes.size());
|
||||
AssertInfo(data_slices.size() == slice_names.size(),
|
||||
"inconsistent data slices size {} with slice names size {}",
|
||||
data_slices.size(),
|
||||
slice_names.size());
|
||||
|
||||
for (int64_t i = 0; i < data_slices.size(); ++i) {
|
||||
futures.push_back(pool.Submit(EncodeAndUploadIndexSlice2,
|
||||
space,
|
||||
const_cast<uint8_t*>(data_slices[i]),
|
||||
slice_sizes[i],
|
||||
index_meta,
|
||||
field_meta,
|
||||
slice_names[i]));
|
||||
}
|
||||
|
||||
std::map<std::string, int64_t> remote_paths_to_size;
|
||||
std::exception_ptr first_exception = nullptr;
|
||||
for (auto& future : futures) {
|
||||
try {
|
||||
auto res = future.get();
|
||||
remote_paths_to_size[res.first] = res.second;
|
||||
} catch (...) {
|
||||
if (!first_exception) {
|
||||
first_exception = std::current_exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
ReleaseArrowUnused();
|
||||
if (first_exception) {
|
||||
std::rethrow_exception(first_exception);
|
||||
}
|
||||
|
||||
return remote_paths_to_size;
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas) {
|
||||
int64_t count = 0;
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
#include "storage/ChunkManager.h"
|
||||
#include "storage/DataCodec.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/space.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
|
@ -89,10 +88,6 @@ std::unique_ptr<DataCodec>
|
|||
DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
|
||||
const std::string& file);
|
||||
|
||||
std::unique_ptr<DataCodec>
|
||||
DownloadAndDecodeRemoteFileV2(std::shared_ptr<milvus_storage::Space> space,
|
||||
const std::string& file);
|
||||
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
||||
uint8_t* buf,
|
||||
|
@ -102,13 +97,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
|||
std::string object_key);
|
||||
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadIndexSlice2(std::shared_ptr<milvus_storage::Space> space,
|
||||
uint8_t* buf,
|
||||
int64_t batch_size,
|
||||
IndexMeta index_meta,
|
||||
FieldDataMeta field_meta,
|
||||
std::string object_key);
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
|
||||
void* buf,
|
||||
int64_t element_count,
|
||||
|
@ -120,10 +108,6 @@ std::vector<std::future<std::unique_ptr<DataCodec>>>
|
|||
GetObjectData(ChunkManager* remote_chunk_manager,
|
||||
const std::vector<std::string>& remote_files);
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
GetObjectData(std::shared_ptr<milvus_storage::Space> space,
|
||||
const std::vector<std::string>& remote_files);
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
PutIndexData(ChunkManager* remote_chunk_manager,
|
||||
const std::vector<const uint8_t*>& data_slices,
|
||||
|
@ -132,13 +116,6 @@ PutIndexData(ChunkManager* remote_chunk_manager,
|
|||
FieldDataMeta& field_meta,
|
||||
IndexMeta& index_meta);
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
PutIndexData(std::shared_ptr<milvus_storage::Space> space,
|
||||
const std::vector<const uint8_t*>& data_slices,
|
||||
const std::vector<int64_t>& slice_sizes,
|
||||
const std::vector<std::string>& slice_names,
|
||||
FieldDataMeta& field_meta,
|
||||
IndexMeta& index_meta);
|
||||
int64_t
|
||||
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas);
|
||||
|
||||
|
|
|
@ -41,8 +41,6 @@ if (USE_OPENDAL)
|
|||
endif()
|
||||
add_subdirectory(tantivy)
|
||||
|
||||
add_subdirectory(milvus-storage)
|
||||
|
||||
if (LINUX)
|
||||
add_subdirectory(jemalloc)
|
||||
endif()
|
||||
|
|
|
@ -1,48 +0,0 @@
|
|||
#-------------------------------------------------------------------------------
|
||||
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
set( MILVUS_STORAGE_VERSION 9d1ad9c)
|
||||
|
||||
message(STATUS "Building milvus-storage-${MILVUS_STORAGE_VERSION} from source")
|
||||
message(STATUS ${CMAKE_BUILD_TYPE})
|
||||
|
||||
# message(FATAL_ERROR ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch)
|
||||
# set(milvus-storage-patch git apply --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch)
|
||||
set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} )
|
||||
FetchContent_Declare(
|
||||
milvus-storage
|
||||
GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git"
|
||||
GIT_TAG ${MILVUS_STORAGE_VERSION}
|
||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-src
|
||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-build
|
||||
SOURCE_SUBDIR cpp
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage_CMakeLists.txt <SOURCE_DIR>/cpp/CMakeLists.txt
|
||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
|
||||
|
||||
FetchContent_MakeAvailable(milvus-storage)
|
||||
# target_compile_features(milvus-storage PUBLIC cxx_std_20)
|
||||
|
||||
# FetchContent_GetProperties( milvus-storage )
|
||||
# if ( NOT milvus-storage_POPULATED )
|
||||
# FetchContent_Populate( milvus-storage)
|
||||
|
||||
# # Adding the following target:
|
||||
# add_subdirectory( ${milvus-storage_SOURCE_DIR}/cpp
|
||||
# ${milvus-storage_BINARY_DIR} )
|
||||
# endif()
|
||||
|
||||
# message(FATAL_ERROR ${milvus-storage_SOURCE_DIR} ${milvus-storage_BINARY_DIR})
|
||||
# get prometheus COMPILE_OPTIONS
|
||||
# get_property( var DIRECTORY "${milvus-storage_SOURCE_DIR}" PROPERTY COMPILE_OPTIONS )
|
||||
message( STATUS "milvus-storage src compile options: ${var}" )
|
||||
# unset(CMAKE_CXX_STANDARD)
|
|
@ -1,34 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.20.0)
|
||||
|
||||
project(milvus-storage VERSION 0.1.0)
|
||||
|
||||
option(WITH_UT "Build the testing tree." ON)
|
||||
option(WITH_ASAN "Build with address sanitizer." OFF)
|
||||
option(USE_OPENDAL "Build with opendal." OFF)
|
||||
|
||||
if (USE_OPENDAL)
|
||||
add_compile_definitions(MILVUS_OPENDAL)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
find_package(Boost REQUIRED)
|
||||
find_package(Arrow REQUIRED)
|
||||
find_package(Protobuf REQUIRED)
|
||||
find_package(glog REQUIRED)
|
||||
find_package(AWSSDK REQUIRED)
|
||||
|
||||
file(GLOB_RECURSE SRC_FILES src/*.cpp src/*.cc)
|
||||
message(STATUS "SRC_FILES: ${SRC_FILES}")
|
||||
add_library(milvus-storage ${SRC_FILES})
|
||||
target_include_directories(milvus-storage PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/milvus-storage ${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
target_link_libraries(milvus-storage PUBLIC arrow::arrow Boost::boost protobuf::protobuf AWS::aws-sdk-cpp-core glog::glog)
|
||||
if (USE_OPENDAL)
|
||||
target_link_libraries(milvus-storage PUBLIC opendal)
|
||||
endif()
|
||||
|
||||
if (WITH_UT)
|
||||
enable_testing()
|
||||
add_subdirectory(test)
|
||||
endif()
|
|
@ -36,9 +36,6 @@
|
|||
#include "storage/InsertData.h"
|
||||
#include "storage/ThreadPool.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/options.h"
|
||||
#include "storage/schema.h"
|
||||
#include "storage/space.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#include "storage/LocalChunkManagerSingleton.h"
|
||||
|
@ -285,62 +282,6 @@ PrepareInsertData(const int64_t opt_field_data_range) -> std::string {
|
|||
return path;
|
||||
}
|
||||
|
||||
auto
|
||||
PrepareInsertDataSpace(const int64_t opt_field_data_range)
|
||||
-> std::pair<std::string, std::shared_ptr<milvus_storage::Space>> {
|
||||
std::string path = kOptFieldPath + "space/" + std::to_string(kOptFieldId);
|
||||
arrow::FieldVector arrow_fields{
|
||||
arrow::field("pk", arrow::int64()),
|
||||
arrow::field("ts", arrow::int64()),
|
||||
arrow::field(kOptFieldName, arrow::int64()),
|
||||
arrow::field("vec", arrow::fixed_size_binary(1))};
|
||||
auto arrow_schema = std::make_shared<arrow::Schema>(arrow_fields);
|
||||
milvus_storage::SchemaOptions schema_options = {
|
||||
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
|
||||
auto schema =
|
||||
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
|
||||
boost::filesystem::remove_all(path);
|
||||
boost::filesystem::create_directories(path);
|
||||
EXPECT_TRUE(schema->Validate().ok());
|
||||
auto opt_space = milvus_storage::Space::Open(
|
||||
"file://" + boost::filesystem::canonical(path).string(),
|
||||
milvus_storage::Options{schema});
|
||||
EXPECT_TRUE(opt_space.has_value());
|
||||
auto space = std::move(opt_space.value());
|
||||
const auto data = PrepareRawFieldData<int64_t>(opt_field_data_range);
|
||||
arrow::Int64Builder pk_builder;
|
||||
arrow::Int64Builder ts_builder;
|
||||
arrow::NumericBuilder<arrow::Int64Type> scalar_builder;
|
||||
arrow::FixedSizeBinaryBuilder vec_builder(arrow::fixed_size_binary(1));
|
||||
const uint8_t kByteZero = 0;
|
||||
for (size_t i = 0; i < kEntityCnt; ++i) {
|
||||
EXPECT_TRUE(pk_builder.Append(i).ok());
|
||||
EXPECT_TRUE(ts_builder.Append(i).ok());
|
||||
EXPECT_TRUE(vec_builder.Append(&kByteZero).ok());
|
||||
}
|
||||
for (size_t i = 0; i < kEntityCnt; ++i) {
|
||||
EXPECT_TRUE(scalar_builder.Append(data[i]).ok());
|
||||
}
|
||||
std::shared_ptr<arrow::Array> pk_array;
|
||||
EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
|
||||
std::shared_ptr<arrow::Array> ts_array;
|
||||
EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
|
||||
std::shared_ptr<arrow::Array> scalar_array;
|
||||
EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok());
|
||||
std::shared_ptr<arrow::Array> vec_array;
|
||||
EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
|
||||
auto batch =
|
||||
arrow::RecordBatch::Make(arrow_schema,
|
||||
kEntityCnt,
|
||||
{pk_array, ts_array, scalar_array, vec_array});
|
||||
milvus_storage::WriteOption write_opt = {kEntityCnt};
|
||||
space->Write(*arrow::RecordBatchReader::Make({batch}, arrow_schema)
|
||||
.ValueOrDie()
|
||||
.get(),
|
||||
write_opt);
|
||||
return {path, std::move(space)};
|
||||
}
|
||||
|
||||
template <DataType DT>
|
||||
auto
|
||||
PrepareOptionalField(const std::shared_ptr<DiskFileManagerImpl>& file_manager,
|
||||
|
@ -400,47 +341,24 @@ CheckOptFieldCorrectness(
|
|||
}
|
||||
} // namespace
|
||||
|
||||
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskFieldEmpty) {
|
||||
auto file_manager = CreateFileManager(cm_);
|
||||
{
|
||||
const auto& [insert_file_space_path, space] =
|
||||
PrepareInsertDataSpace(kOptFieldDataRange);
|
||||
OptFieldT opt_fields;
|
||||
EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty());
|
||||
EXPECT_TRUE(
|
||||
file_manager->CacheOptFieldToDisk(space, opt_fields).empty());
|
||||
}
|
||||
|
||||
{
|
||||
auto opt_fileds =
|
||||
PrepareOptionalField<DataType::INT64>(file_manager, "");
|
||||
auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds);
|
||||
EXPECT_TRUE(res.empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) {
|
||||
auto file_manager = CreateFileManager(cm_);
|
||||
const auto insert_file_path =
|
||||
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
|
||||
const auto& [insert_file_space_path, space] =
|
||||
PrepareInsertDataSpace(kOptFieldDataRange);
|
||||
OptFieldT opt_fields =
|
||||
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
||||
opt_fields[kOptFieldId + 1] = {
|
||||
kOptFieldName + "second", DataType::INT64, {insert_file_space_path}};
|
||||
kOptFieldName + "second", DataType::INT64, {insert_file_path}};
|
||||
EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError);
|
||||
EXPECT_THROW(file_manager->CacheOptFieldToDisk(space, opt_fields),
|
||||
SegcoreError);
|
||||
}
|
||||
|
||||
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
|
||||
auto file_manager = CreateFileManager(cm_);
|
||||
const auto& [insert_file_path, space] =
|
||||
PrepareInsertDataSpace(kOptFieldDataRange);
|
||||
const auto insert_file_path =
|
||||
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
|
||||
auto opt_fileds =
|
||||
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
||||
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
|
||||
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
|
||||
ASSERT_FALSE(res.empty());
|
||||
CheckOptFieldCorrectness(res);
|
||||
}
|
||||
|
@ -477,12 +395,4 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) {
|
|||
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
|
||||
ASSERT_TRUE(res.empty());
|
||||
}
|
||||
|
||||
{
|
||||
const auto& [insert_file_path, space] = PrepareInsertDataSpace(1);
|
||||
auto opt_fileds = PrepareOptionalField<DataType::INT64>(
|
||||
file_manager, insert_file_path);
|
||||
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
|
||||
ASSERT_TRUE(res.empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include "index/IndexFactory.h"
|
||||
#include "common/QueryResult.h"
|
||||
#include "segcore/Types.h"
|
||||
#include "storage/options.h"
|
||||
#include "test_utils/indexbuilder_test_utils.h"
|
||||
#include "test_utils/storage_test_utils.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
|
@ -916,261 +915,4 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) {
|
|||
SearchResult result;
|
||||
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
||||
}
|
||||
#endif
|
||||
|
||||
//class IndexTestV2
|
||||
// : public ::testing::TestWithParam<std::tuple<Param, int64_t, bool>> {
|
||||
// protected:
|
||||
// std::shared_ptr<arrow::Schema>
|
||||
// TestSchema(int vec_size) {
|
||||
// arrow::FieldVector fields;
|
||||
// fields.push_back(arrow::field("pk", arrow::int64()));
|
||||
// fields.push_back(arrow::field("ts", arrow::int64()));
|
||||
// fields.push_back(
|
||||
// arrow::field("vec", arrow::fixed_size_binary(vec_size)));
|
||||
// return std::make_shared<arrow::Schema>(fields);
|
||||
// }
|
||||
//
|
||||
// std::shared_ptr<arrow::RecordBatchReader>
|
||||
// TestRecords(int vec_size, GeneratedData& dataset) {
|
||||
// arrow::Int64Builder pk_builder;
|
||||
// arrow::Int64Builder ts_builder;
|
||||
// arrow::FixedSizeBinaryBuilder vec_builder(
|
||||
// arrow::fixed_size_binary(vec_size));
|
||||
// if (!is_binary) {
|
||||
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
// auto data = reinterpret_cast<char*>(xb_data.data());
|
||||
// for (auto i = 0; i < NB; ++i) {
|
||||
// EXPECT_TRUE(pk_builder.Append(i).ok());
|
||||
// EXPECT_TRUE(ts_builder.Append(i).ok());
|
||||
// EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok());
|
||||
// }
|
||||
// } else {
|
||||
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
// for (auto i = 0; i < NB; ++i) {
|
||||
// EXPECT_TRUE(pk_builder.Append(i).ok());
|
||||
// EXPECT_TRUE(ts_builder.Append(i).ok());
|
||||
// EXPECT_TRUE(
|
||||
// vec_builder.Append(xb_bin_data.data() + i * vec_size).ok());
|
||||
// }
|
||||
// }
|
||||
// std::shared_ptr<arrow::Array> pk_array;
|
||||
// EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
|
||||
// std::shared_ptr<arrow::Array> ts_array;
|
||||
// EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
|
||||
// std::shared_ptr<arrow::Array> vec_array;
|
||||
// EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
|
||||
// auto schema = TestSchema(vec_size);
|
||||
// auto rec_batch = arrow::RecordBatch::Make(
|
||||
// schema, NB, {pk_array, ts_array, vec_array});
|
||||
// auto reader =
|
||||
// arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie();
|
||||
// return reader;
|
||||
// }
|
||||
//
|
||||
// std::shared_ptr<milvus_storage::Space>
|
||||
// TestSpace(int vec_size, GeneratedData& dataset) {
|
||||
// auto arrow_schema = TestSchema(vec_size);
|
||||
// auto schema_options = std::make_shared<milvus_storage::SchemaOptions>();
|
||||
// schema_options->primary_column = "pk";
|
||||
// schema_options->version_column = "ts";
|
||||
// schema_options->vector_column = "vec";
|
||||
// auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
|
||||
// schema_options);
|
||||
// EXPECT_TRUE(schema->Validate().ok());
|
||||
//
|
||||
// auto space_res = milvus_storage::Space::Open(
|
||||
// "file://" + boost::filesystem::canonical(temp_path).string(),
|
||||
// milvus_storage::Options{schema});
|
||||
// EXPECT_TRUE(space_res.has_value());
|
||||
//
|
||||
// auto space = std::move(space_res.value());
|
||||
// auto rec = TestRecords(vec_size, dataset);
|
||||
// auto write_opt = milvus_storage::WriteOption{NB};
|
||||
// space->Write(rec.get(), &write_opt);
|
||||
// return std::move(space);
|
||||
// }
|
||||
//
|
||||
// void
|
||||
// SetUp() override {
|
||||
// temp_path = boost::filesystem::temp_directory_path() /
|
||||
// boost::filesystem::unique_path();
|
||||
// boost::filesystem::create_directory(temp_path);
|
||||
// storage_config_ = get_default_local_storage_config();
|
||||
//
|
||||
// auto param = GetParam();
|
||||
// index_type = std::get<0>(param).first;
|
||||
// metric_type = std::get<0>(param).second;
|
||||
// file_slice_size = std::get<1>(param);
|
||||
// enable_mmap = index_type != knowhere::IndexEnum::INDEX_DISKANN &&
|
||||
// std::get<2>(param);
|
||||
// if (enable_mmap) {
|
||||
// mmap_file_path = boost::filesystem::temp_directory_path() /
|
||||
// boost::filesystem::unique_path();
|
||||
// }
|
||||
// NB = 3000;
|
||||
//
|
||||
// // try to reduce the test time,
|
||||
// // but the large dataset is needed for the case below.
|
||||
// auto test_name = std::string(
|
||||
// testing::UnitTest::GetInstance()->current_test_info()->name());
|
||||
// if (test_name == "Mmap" &&
|
||||
// index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
||||
// NB = 270000;
|
||||
// }
|
||||
// build_conf = generate_build_conf(index_type, metric_type);
|
||||
// load_conf = generate_load_conf(index_type, metric_type, NB);
|
||||
// search_conf = generate_search_conf(index_type, metric_type);
|
||||
// range_search_conf = generate_range_search_conf(index_type, metric_type);
|
||||
//
|
||||
// std::map<knowhere::MetricType, bool> is_binary_map = {
|
||||
// {knowhere::IndexEnum::INDEX_FAISS_IDMAP, false},
|
||||
// {knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false},
|
||||
// {knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, false},
|
||||
// {knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, false},
|
||||
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true},
|
||||
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, true},
|
||||
// {knowhere::IndexEnum::INDEX_HNSW, false},
|
||||
// {knowhere::IndexEnum::INDEX_DISKANN, false},
|
||||
// };
|
||||
//
|
||||
// is_binary = is_binary_map[index_type];
|
||||
// int vec_size;
|
||||
// if (is_binary) {
|
||||
// vec_size = DIM / 8;
|
||||
// vec_field_data_type = milvus::DataType::VECTOR_BINARY;
|
||||
// } else {
|
||||
// vec_size = DIM * 4;
|
||||
// vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
||||
// }
|
||||
//
|
||||
// auto dataset = GenDataset(NB, metric_type, is_binary);
|
||||
// space = TestSpace(vec_size, dataset);
|
||||
//
|
||||
// if (!is_binary) {
|
||||
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
// xq_dataset = knowhere::GenDataSet(
|
||||
// NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
// } else {
|
||||
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
// xq_dataset = knowhere::GenDataSet(
|
||||
// NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// void
|
||||
// TearDown() override {
|
||||
// boost::filesystem::remove_all(temp_path);
|
||||
// if (enable_mmap) {
|
||||
// boost::filesystem::remove_all(mmap_file_path);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// protected:
|
||||
// std::string index_type, metric_type;
|
||||
// bool is_binary;
|
||||
// milvus::Config build_conf;
|
||||
// milvus::Config load_conf;
|
||||
// milvus::Config search_conf;
|
||||
// milvus::Config range_search_conf;
|
||||
// milvus::DataType vec_field_data_type;
|
||||
// knowhere::DataSetPtr xb_dataset;
|
||||
// FixedVector<float> xb_data;
|
||||
// FixedVector<uint8_t> xb_bin_data;
|
||||
// knowhere::DataSetPtr xq_dataset;
|
||||
// int64_t query_offset = 100;
|
||||
// int64_t NB = 3000;
|
||||
// StorageConfig storage_config_;
|
||||
//
|
||||
// boost::filesystem::path temp_path;
|
||||
// std::shared_ptr<milvus_storage::Space> space;
|
||||
// int64_t file_slice_size = DEFAULT_INDEX_FILE_SLICE_SIZE;
|
||||
// bool enable_mmap;
|
||||
// boost::filesystem::path mmap_file_path;
|
||||
//};
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(
|
||||
// IndexTypeParameters,
|
||||
// IndexTestV2,
|
||||
// testing::Combine(
|
||||
// ::testing::Values(
|
||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP,
|
||||
// knowhere::metric::L2),
|
||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ,
|
||||
// knowhere::metric::L2),
|
||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
||||
// knowhere::metric::L2),
|
||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
|
||||
// knowhere::metric::L2),
|
||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
// knowhere::metric::JACCARD),
|
||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
||||
// knowhere::metric::JACCARD),
|
||||
//#ifdef BUILD_DISK_ANN
|
||||
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
|
||||
//#endif
|
||||
// std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2)),
|
||||
// testing::Values(DEFAULT_INDEX_FILE_SLICE_SIZE, 5000L),
|
||||
// testing::Bool()));
|
||||
//
|
||||
//TEST_P(IndexTestV2, BuildAndQuery) {
|
||||
// FILE_SLICE_SIZE = file_slice_size;
|
||||
// milvus::index::CreateIndexInfo create_index_info;
|
||||
// create_index_info.index_type = index_type;
|
||||
// create_index_info.metric_type = metric_type;
|
||||
// create_index_info.field_type = vec_field_data_type;
|
||||
// create_index_info.field_name = "vec";
|
||||
// create_index_info.dim = DIM;
|
||||
// create_index_info.index_engine_version =
|
||||
// knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
// index::IndexBasePtr index;
|
||||
//
|
||||
// milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
||||
// milvus::storage::IndexMeta index_meta{.segment_id = 3,
|
||||
// .field_id = 100,
|
||||
// .build_id = 1000,
|
||||
// .index_version = 1,
|
||||
// .field_name = "vec",
|
||||
// .field_type = vec_field_data_type,
|
||||
// .dim = DIM};
|
||||
// auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_);
|
||||
// milvus::storage::FileManagerContext file_manager_context(
|
||||
// field_data_meta, index_meta, chunk_manager, space);
|
||||
// index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
// create_index_info, file_manager_context, space);
|
||||
//
|
||||
// auto build_conf = generate_build_conf(index_type, metric_type);
|
||||
// index->BuildV2(build_conf);
|
||||
// milvus::index::IndexBasePtr new_index;
|
||||
// milvus::index::VectorIndex* vec_index = nullptr;
|
||||
//
|
||||
// auto binary_set = index->UploadV2();
|
||||
// index.reset();
|
||||
//
|
||||
// new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
// create_index_info, file_manager_context, space);
|
||||
// vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
||||
//
|
||||
// load_conf = generate_load_conf(index_type, metric_type, 0);
|
||||
// if (enable_mmap) {
|
||||
// load_conf[kMmapFilepath] = mmap_file_path.string();
|
||||
// }
|
||||
// ASSERT_NO_THROW(vec_index->LoadV2(load_conf));
|
||||
// EXPECT_EQ(vec_index->Count(), NB);
|
||||
// EXPECT_EQ(vec_index->GetDim(), DIM);
|
||||
//
|
||||
// milvus::SearchInfo search_info;
|
||||
// search_info.topk_ = K;
|
||||
// search_info.metric_type_ = metric_type;
|
||||
// search_info.search_params_ = search_conf;
|
||||
// auto result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
// EXPECT_EQ(result->total_nq_, NQ);
|
||||
// EXPECT_EQ(result->unity_topK_, K);
|
||||
// EXPECT_EQ(result->distances_.size(), NQ * K);
|
||||
// EXPECT_EQ(result->seg_offsets_.size(), NQ * K);
|
||||
// if (!is_binary) {
|
||||
// EXPECT_EQ(result->seg_offsets_[0], query_offset);
|
||||
// }
|
||||
// search_info.search_params_ = range_search_conf;
|
||||
// vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
//}
|
||||
#endif
|
|
@ -301,31 +301,6 @@ TestRecords(int vec_size, GeneratedData& dataset, std::vector<T>& scalars) {
|
|||
return reader;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<milvus_storage::Space>
|
||||
TestSpace(boost::filesystem::path& temp_path,
|
||||
int vec_size,
|
||||
GeneratedData& dataset,
|
||||
std::vector<T>& scalars) {
|
||||
auto arrow_schema = TestSchema<T>(vec_size);
|
||||
milvus_storage::SchemaOptions schema_options{
|
||||
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
|
||||
auto schema =
|
||||
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
|
||||
EXPECT_TRUE(schema->Validate().ok());
|
||||
|
||||
auto space_res = milvus_storage::Space::Open(
|
||||
"file://" + boost::filesystem::canonical(temp_path).string(),
|
||||
milvus_storage::Options{schema});
|
||||
EXPECT_TRUE(space_res.has_value());
|
||||
|
||||
auto space = std::move(space_res.value());
|
||||
auto rec = TestRecords<T>(vec_size, dataset, scalars);
|
||||
auto write_opt = milvus_storage::WriteOption{nb};
|
||||
space->Write(*rec, write_opt);
|
||||
return std::move(space);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct TypedScalarIndexTestV2<int8_t>::Helper {
|
||||
using C = arrow::Int8Type;
|
||||
|
|
|
@ -349,116 +349,5 @@ TEST_F(StringIndexMarisaTest, BaseIndexCodec) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
using milvus::segcore::GeneratedData;
|
||||
class StringIndexMarisaTestV2 : public StringIndexBaseTest {
|
||||
std::shared_ptr<arrow::Schema>
|
||||
TestSchema(int vec_size) {
|
||||
arrow::FieldVector fields;
|
||||
fields.push_back(arrow::field("pk", arrow::int64()));
|
||||
fields.push_back(arrow::field("ts", arrow::int64()));
|
||||
fields.push_back(arrow::field("scalar", arrow::utf8()));
|
||||
fields.push_back(
|
||||
arrow::field("vec", arrow::fixed_size_binary(vec_size)));
|
||||
return std::make_shared<arrow::Schema>(fields);
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::RecordBatchReader>
|
||||
TestRecords(int vec_size,
|
||||
GeneratedData& dataset,
|
||||
std::vector<std::string>& scalars) {
|
||||
arrow::Int64Builder pk_builder;
|
||||
arrow::Int64Builder ts_builder;
|
||||
arrow::StringBuilder scalar_builder;
|
||||
arrow::FixedSizeBinaryBuilder vec_builder(
|
||||
arrow::fixed_size_binary(vec_size));
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto data = reinterpret_cast<char*>(xb_data.data());
|
||||
for (auto i = 0; i < nb; ++i) {
|
||||
EXPECT_TRUE(pk_builder.Append(i).ok());
|
||||
EXPECT_TRUE(ts_builder.Append(i).ok());
|
||||
EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok());
|
||||
}
|
||||
for (auto& v : scalars) {
|
||||
EXPECT_TRUE(scalar_builder.Append(v).ok());
|
||||
}
|
||||
std::shared_ptr<arrow::Array> pk_array;
|
||||
EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
|
||||
std::shared_ptr<arrow::Array> ts_array;
|
||||
EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
|
||||
std::shared_ptr<arrow::Array> scalar_array;
|
||||
EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok());
|
||||
std::shared_ptr<arrow::Array> vec_array;
|
||||
EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
|
||||
auto schema = TestSchema(vec_size);
|
||||
auto rec_batch = arrow::RecordBatch::Make(
|
||||
schema, nb, {pk_array, ts_array, scalar_array, vec_array});
|
||||
auto reader =
|
||||
arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie();
|
||||
return reader;
|
||||
}
|
||||
|
||||
std::shared_ptr<milvus_storage::Space>
|
||||
TestSpace(int vec_size,
|
||||
GeneratedData& dataset,
|
||||
std::vector<std::string>& scalars) {
|
||||
auto arrow_schema = TestSchema(vec_size);
|
||||
milvus_storage::SchemaOptions schema_options{.primary_column = "pk",
|
||||
.version_column = "ts",
|
||||
.vector_column = "vec"};
|
||||
auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
|
||||
schema_options);
|
||||
EXPECT_TRUE(schema->Validate().ok());
|
||||
|
||||
auto space_res = milvus_storage::Space::Open(
|
||||
"file://" + boost::filesystem::canonical(temp_path).string(),
|
||||
milvus_storage::Options{schema});
|
||||
EXPECT_TRUE(space_res.has_value());
|
||||
|
||||
auto space = std::move(space_res.value());
|
||||
auto rec = TestRecords(vec_size, dataset, scalars);
|
||||
auto write_opt = milvus_storage::WriteOption{nb};
|
||||
space->Write(*rec, write_opt);
|
||||
return std::move(space);
|
||||
}
|
||||
void
|
||||
SetUp() override {
|
||||
StringIndexBaseTest::SetUp();
|
||||
temp_path = boost::filesystem::temp_directory_path() /
|
||||
boost::filesystem::unique_path();
|
||||
boost::filesystem::create_directory(temp_path);
|
||||
|
||||
auto vec_size = DIM * 4;
|
||||
auto vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
||||
auto dataset = ::GenDataset(nb, knowhere::metric::L2, false);
|
||||
|
||||
space = TestSpace(vec_size, dataset, strs);
|
||||
}
|
||||
void
|
||||
TearDown() override {
|
||||
boost::filesystem::remove_all(temp_path);
|
||||
}
|
||||
|
||||
protected:
|
||||
boost::filesystem::path temp_path;
|
||||
std::shared_ptr<milvus_storage::Space> space;
|
||||
};
|
||||
|
||||
TEST_F(StringIndexMarisaTestV2, Base) {
|
||||
auto storage_config = get_default_local_storage_config();
|
||||
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config);
|
||||
milvus::storage::FileManagerContext file_manager_context(
|
||||
{}, {.field_name = "scalar"}, chunk_manager, space);
|
||||
auto index =
|
||||
milvus::index::CreateStringIndexMarisa(file_manager_context, space);
|
||||
index->BuildV2();
|
||||
index->UploadV2();
|
||||
|
||||
auto new_index =
|
||||
milvus::index::CreateStringIndexMarisa(file_manager_context, space);
|
||||
new_index->LoadV2();
|
||||
ASSERT_EQ(strs.size(), index->Count());
|
||||
}
|
||||
|
||||
} // namespace index
|
||||
} // namespace milvus
|
||||
|
|
|
@ -543,10 +543,6 @@ func (s *Server) SaveBinlogPaths(ctx context.Context, req *datapb.SaveBinlogPath
|
|||
UpdateCheckPointOperator(req.GetSegmentID(), req.GetCheckPoints()),
|
||||
)
|
||||
|
||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
operators = append(operators, UpdateStorageVersionOperator(req.GetSegmentID(), req.GetStorageVersion()))
|
||||
}
|
||||
|
||||
// Update segment info in memory and meta.
|
||||
if err := s.meta.UpdateSegmentsInfo(operators...); err != nil {
|
||||
log.Error("save binlog and checkpoints failed", zap.Error(err))
|
||||
|
@ -882,18 +878,6 @@ func (s *Server) GetRecoveryInfoV2(ctx context.Context, req *datapb.GetRecoveryI
|
|||
continue
|
||||
}
|
||||
|
||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
segmentInfos = append(segmentInfos, &datapb.SegmentInfo{
|
||||
ID: segment.ID,
|
||||
PartitionID: segment.PartitionID,
|
||||
CollectionID: segment.CollectionID,
|
||||
InsertChannel: segment.InsertChannel,
|
||||
NumOfRows: segment.NumOfRows,
|
||||
Level: segment.GetLevel(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
binlogs := segment.GetBinlogs()
|
||||
if len(binlogs) == 0 && segment.GetLevel() != datapb.SegmentLevel_L0 {
|
||||
continue
|
||||
|
|
|
@ -25,10 +25,8 @@ import (
|
|||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/types"
|
||||
itypeutil "github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/indexparams"
|
||||
|
@ -201,68 +199,27 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule
|
|||
}
|
||||
}
|
||||
|
||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
storePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segment.GetID())
|
||||
if err != nil {
|
||||
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||
return true
|
||||
}
|
||||
indexStorePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue()+"/index", segment.GetID())
|
||||
if err != nil {
|
||||
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||
return true
|
||||
}
|
||||
|
||||
it.req = &indexpb.CreateJobRequest{
|
||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
||||
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
||||
BuildID: it.taskID,
|
||||
IndexVersion: segIndex.IndexVersion + 1,
|
||||
StorageConfig: storageConfig,
|
||||
IndexParams: indexParams,
|
||||
TypeParams: typeParams,
|
||||
NumRows: segIndex.NumRows,
|
||||
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
||||
CollectionID: segment.GetCollectionID(),
|
||||
PartitionID: segment.GetPartitionID(),
|
||||
SegmentID: segment.GetID(),
|
||||
FieldID: fieldID,
|
||||
FieldName: field.GetName(),
|
||||
FieldType: field.GetDataType(),
|
||||
StorePath: storePath,
|
||||
StoreVersion: segment.GetStorageVersion(),
|
||||
IndexStorePath: indexStorePath,
|
||||
Dim: int64(dim),
|
||||
DataIds: binlogIDs,
|
||||
OptionalScalarFields: optionalFields,
|
||||
Field: field,
|
||||
PartitionKeyIsolation: partitionKeyIsolation,
|
||||
}
|
||||
} else {
|
||||
it.req = &indexpb.CreateJobRequest{
|
||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
||||
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
||||
BuildID: it.taskID,
|
||||
IndexVersion: segIndex.IndexVersion + 1,
|
||||
StorageConfig: storageConfig,
|
||||
IndexParams: indexParams,
|
||||
TypeParams: typeParams,
|
||||
NumRows: segIndex.NumRows,
|
||||
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
||||
CollectionID: segment.GetCollectionID(),
|
||||
PartitionID: segment.GetPartitionID(),
|
||||
SegmentID: segment.GetID(),
|
||||
FieldID: fieldID,
|
||||
FieldName: field.GetName(),
|
||||
FieldType: field.GetDataType(),
|
||||
Dim: int64(dim),
|
||||
DataIds: binlogIDs,
|
||||
OptionalScalarFields: optionalFields,
|
||||
Field: field,
|
||||
PartitionKeyIsolation: partitionKeyIsolation,
|
||||
}
|
||||
it.req = &indexpb.CreateJobRequest{
|
||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
||||
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
||||
BuildID: it.taskID,
|
||||
IndexVersion: segIndex.IndexVersion + 1,
|
||||
StorageConfig: storageConfig,
|
||||
IndexParams: indexParams,
|
||||
TypeParams: typeParams,
|
||||
NumRows: segIndex.NumRows,
|
||||
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
||||
CollectionID: segment.GetCollectionID(),
|
||||
PartitionID: segment.GetPartitionID(),
|
||||
SegmentID: segment.GetID(),
|
||||
FieldID: fieldID,
|
||||
FieldName: field.GetName(),
|
||||
FieldType: field.GetDataType(),
|
||||
Dim: int64(dim),
|
||||
DataIds: binlogIDs,
|
||||
OptionalScalarFields: optionalFields,
|
||||
Field: field,
|
||||
PartitionKeyIsolation: partitionKeyIsolation,
|
||||
}
|
||||
|
||||
log.Ctx(ctx).Info("index task pre check successfully", zap.Int64("taskID", it.GetTaskID()))
|
||||
|
|
|
@ -911,15 +911,6 @@ func (s *taskSchedulerSuite) Test_scheduler() {
|
|||
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
|
||||
s.scheduler(handler)
|
||||
})
|
||||
|
||||
s.Run("test scheduler with indexBuilderV2", func() {
|
||||
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true")
|
||||
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
|
||||
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
|
||||
|
||||
s.scheduler(handler)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
|
||||
|
@ -1289,26 +1280,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
|
|||
|
||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("True")
|
||||
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("False")
|
||||
err := Params.Save("common.storage.scheme", "fake")
|
||||
defer Params.Reset("common.storage.scheme")
|
||||
Params.CommonCfg.EnableStorageV2.SwapTempValue("True")
|
||||
defer Params.CommonCfg.EnableStorageV2.SwapTempValue("False")
|
||||
scheduler.Start()
|
||||
|
||||
// get collection info failed --> init
|
||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
||||
|
||||
// partition key field is nil, get collection info failed --> init
|
||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
||||
ID: collID,
|
||||
Schema: &schemapb.CollectionSchema{
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
|
||||
},
|
||||
},
|
||||
}, nil).Once()
|
||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
||||
|
||||
// get collection info success, get dim failed --> init
|
||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
||||
ID: collID,
|
||||
|
@ -1318,38 +1294,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
|
|||
{FieldID: s.fieldID, Name: "vec"},
|
||||
},
|
||||
},
|
||||
}, nil).Twice()
|
||||
|
||||
// peek client success, update version success, get collection info success, get dim success, get storage uri failed --> init
|
||||
s.NoError(err)
|
||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
|
||||
return &collectionInfo{
|
||||
ID: collID,
|
||||
Schema: &schemapb.CollectionSchema{
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64},
|
||||
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}).Twice()
|
||||
s.NoError(err)
|
||||
}, nil).Once()
|
||||
|
||||
// assign failed --> retry
|
||||
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
||||
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
|
||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
|
||||
Params.Reset("common.storage.scheme")
|
||||
return &collectionInfo{
|
||||
ID: collID,
|
||||
Schema: &schemapb.CollectionSchema{
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64},
|
||||
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}).Once()
|
||||
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
||||
|
||||
// retry --> init
|
||||
|
|
|
@ -30,12 +30,12 @@ import (
|
|||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus-storage/go/common/log"
|
||||
"github.com/milvus-io/milvus/internal/datanode/allocator"
|
||||
"github.com/milvus-io/milvus/internal/datanode/io"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
||||
|
|
|
@ -24,8 +24,6 @@ import (
|
|||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
|
@ -113,54 +111,3 @@ func LoadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *s
|
|||
log.Info("Successfully load pk stats", zap.Any("time", time.Since(startTs)), zap.Uint("size", size))
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func LoadStatsV2(storageCache *metacache.StorageV2Cache, segment *datapb.SegmentInfo, schema *schemapb.CollectionSchema) ([]*storage.PkStatistics, error) {
|
||||
space, err := storageCache.GetOrCreateSpace(segment.ID, syncmgr.SpaceCreatorFunc(segment.ID, schema, storageCache.ArrowSchema()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
getResult := func(stats []*storage.PrimaryKeyStats) []*storage.PkStatistics {
|
||||
result := make([]*storage.PkStatistics, 0, len(stats))
|
||||
for _, stat := range stats {
|
||||
pkStat := &storage.PkStatistics{
|
||||
PkFilter: stat.BF,
|
||||
MinPK: stat.MinPk,
|
||||
MaxPK: stat.MaxPk,
|
||||
}
|
||||
result = append(result, pkStat)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
blobs := space.StatisticsBlobs()
|
||||
deserBlobs := make([]*storage.Blob, 0)
|
||||
for _, b := range blobs {
|
||||
if b.Name == storage.CompoundStatsType.LogIdx() {
|
||||
blobData := make([]byte, b.Size)
|
||||
_, err = space.ReadBlob(b.Name, blobData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats, err := storage.DeserializeStatsList(&storage.Blob{Value: blobData})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return getResult(stats), nil
|
||||
}
|
||||
}
|
||||
|
||||
for _, b := range blobs {
|
||||
blobData := make([]byte, b.Size)
|
||||
_, err = space.ReadBlob(b.Name, blobData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
deserBlobs = append(deserBlobs, &storage.Blob{Value: blobData})
|
||||
}
|
||||
stats, err := storage.DeserializeStats(deserBlobs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return getResult(stats), nil
|
||||
}
|
||||
|
|
|
@ -32,7 +32,6 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
|
@ -52,10 +51,6 @@ func NewSyncTask(ctx context.Context,
|
|||
insertData *storage.InsertData,
|
||||
deleteData *storage.DeleteData,
|
||||
) (syncmgr.Task, error) {
|
||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
return nil, merr.WrapErrImportFailed("storage v2 is not supported") // TODO: dyh, resolve storage v2
|
||||
}
|
||||
|
||||
metaCache := metaCaches[vchannel]
|
||||
if _, ok := metaCache.GetSegmentByID(segmentID); !ok {
|
||||
metaCache.AddSegment(&datapb.SegmentInfo{
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metacache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
)
|
||||
|
||||
type StorageV2Cache struct {
|
||||
arrowSchema *arrow.Schema
|
||||
spaceMu sync.Mutex
|
||||
spaces map[int64]*milvus_storage.Space
|
||||
}
|
||||
|
||||
func (s *StorageV2Cache) ArrowSchema() *arrow.Schema {
|
||||
return s.arrowSchema
|
||||
}
|
||||
|
||||
func (s *StorageV2Cache) GetOrCreateSpace(segmentID int64, creator func() (*milvus_storage.Space, error)) (*milvus_storage.Space, error) {
|
||||
s.spaceMu.Lock()
|
||||
defer s.spaceMu.Unlock()
|
||||
space, ok := s.spaces[segmentID]
|
||||
if ok {
|
||||
return space, nil
|
||||
}
|
||||
space, err := creator()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.spaces[segmentID] = space
|
||||
return space, nil
|
||||
}
|
||||
|
||||
// only for unit test
|
||||
func (s *StorageV2Cache) SetSpace(segmentID int64, space *milvus_storage.Space) {
|
||||
s.spaceMu.Lock()
|
||||
defer s.spaceMu.Unlock()
|
||||
s.spaces[segmentID] = space
|
||||
}
|
||||
|
||||
func NewStorageV2Cache(schema *schemapb.CollectionSchema) (*StorageV2Cache, error) {
|
||||
arrowSchema, err := typeutil.ConvertToArrowSchema(schema.Fields)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &StorageV2Cache{
|
||||
arrowSchema: arrowSchema,
|
||||
spaces: make(map[int64]*milvus_storage.Space),
|
||||
}, nil
|
||||
}
|
|
@ -30,7 +30,6 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/writebuffer"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/flowgraph"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
|
@ -129,12 +128,12 @@ func (dsService *DataSyncService) GetMetaCache() metacache.MetaCache {
|
|||
return dsService.metacache
|
||||
}
|
||||
|
||||
func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo, storageV2Cache *metacache.StorageV2Cache) (metacache.MetaCache, error) {
|
||||
func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
|
||||
tickler.SetTotal(int32(len(unflushed) + len(flushed)))
|
||||
return initMetaCache(initCtx, storageV2Cache, params.ChunkManager, info, tickler, unflushed, flushed)
|
||||
return initMetaCache(initCtx, params.ChunkManager, info, tickler, unflushed, flushed)
|
||||
}
|
||||
|
||||
func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2Cache, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
|
||||
func initMetaCache(initCtx context.Context, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
|
||||
// tickler will update addSegment progress to watchInfo
|
||||
futures := make([]*conc.Future[any], 0, len(unflushed)+len(flushed))
|
||||
segmentPks := typeutil.NewConcurrentMap[int64, []*storage.PkStatistics]()
|
||||
|
@ -152,11 +151,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C
|
|||
future := io.GetOrCreateStatsPool().Submit(func() (any, error) {
|
||||
var stats []*storage.PkStatistics
|
||||
var err error
|
||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
stats, err = compaction.LoadStatsV2(storageV2Cache, segment, info.GetSchema())
|
||||
} else {
|
||||
stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs())
|
||||
}
|
||||
stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -190,7 +185,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C
|
|||
return metacache, nil
|
||||
}
|
||||
|
||||
func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) {
|
||||
func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) {
|
||||
var (
|
||||
channelName = info.GetVchan().GetChannelName()
|
||||
collectionID = info.GetVchan().GetCollectionID()
|
||||
|
@ -204,7 +199,7 @@ func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams,
|
|||
serverID: params.Session.ServerID,
|
||||
}
|
||||
|
||||
err := params.WriteBufferManager.Register(channelName, metacache, storageV2Cache,
|
||||
err := params.WriteBufferManager.Register(channelName, metacache,
|
||||
writebuffer.WithMetaWriter(syncmgr.BrokerMetaWriter(params.Broker, config.serverID)),
|
||||
writebuffer.WithIDAllocator(params.Allocator))
|
||||
if err != nil {
|
||||
|
@ -287,21 +282,13 @@ func NewDataSyncService(initCtx context.Context, pipelineParams *util.PipelinePa
|
|||
return nil, err
|
||||
}
|
||||
|
||||
var storageCache *metacache.StorageV2Cache
|
||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
storageCache, err = metacache.NewStorageV2Cache(info.Schema)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// init metaCache meta
|
||||
metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos, storageCache)
|
||||
metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, storageCache, unflushedSegmentInfos, flushedSegmentInfos)
|
||||
return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, unflushedSegmentInfos, flushedSegmentInfos)
|
||||
}
|
||||
|
||||
func NewDataSyncServiceWithMetaCache(metaCache metacache.MetaCache) *DataSyncService {
|
||||
|
|
|
@ -289,7 +289,7 @@ func TestGetChannelWithTickler(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed, nil)
|
||||
metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, metaCache)
|
||||
assert.Equal(t, int64(1), metaCache.Collection())
|
||||
|
|
|
@ -20,7 +20,6 @@ import (
|
|||
// MetaWriter is the interface for SyncManager to write segment sync meta.
|
||||
type MetaWriter interface {
|
||||
UpdateSync(context.Context, *SyncTask) error
|
||||
UpdateSyncV2(*SyncTaskV2) error
|
||||
DropChannel(context.Context, string) error
|
||||
}
|
||||
|
||||
|
@ -138,82 +137,6 @@ func (b *brokerMetaWriter) UpdateSync(ctx context.Context, pack *SyncTask) error
|
|||
return nil
|
||||
}
|
||||
|
||||
func (b *brokerMetaWriter) UpdateSyncV2(pack *SyncTaskV2) error {
|
||||
checkPoints := []*datapb.CheckPoint{}
|
||||
|
||||
// only current segment checkpoint info,
|
||||
segment, ok := pack.metacache.GetSegmentByID(pack.segmentID)
|
||||
if !ok {
|
||||
return merr.WrapErrSegmentNotFound(pack.segmentID)
|
||||
}
|
||||
checkPoints = append(checkPoints, &datapb.CheckPoint{
|
||||
SegmentID: pack.segmentID,
|
||||
NumOfRows: segment.FlushedRows() + pack.batchSize,
|
||||
Position: pack.checkpoint,
|
||||
})
|
||||
|
||||
startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing),
|
||||
metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition {
|
||||
return &datapb.SegmentStartPosition{
|
||||
SegmentID: info.SegmentID(),
|
||||
StartPosition: info.StartPosition(),
|
||||
}
|
||||
})
|
||||
log.Info("SaveBinlogPath",
|
||||
zap.Int64("SegmentID", pack.segmentID),
|
||||
zap.Int64("CollectionID", pack.collectionID),
|
||||
zap.Any("startPos", startPos),
|
||||
zap.Any("checkPoints", checkPoints),
|
||||
zap.String("vChannelName", pack.channelName),
|
||||
)
|
||||
|
||||
req := &datapb.SaveBinlogPathsRequest{
|
||||
Base: commonpbutil.NewMsgBase(
|
||||
commonpbutil.WithSourceID(b.serverID),
|
||||
),
|
||||
SegmentID: pack.segmentID,
|
||||
CollectionID: pack.collectionID,
|
||||
|
||||
CheckPoints: checkPoints,
|
||||
StorageVersion: pack.storageVersion,
|
||||
|
||||
StartPositions: startPos,
|
||||
Flushed: pack.isFlush,
|
||||
Dropped: pack.isDrop,
|
||||
Channel: pack.channelName,
|
||||
}
|
||||
err := retry.Do(context.Background(), func() error {
|
||||
err := b.broker.SaveBinlogPaths(context.Background(), req)
|
||||
// Segment not found during stale segment flush. Segment might get compacted already.
|
||||
// Stop retry and still proceed to the end, ignoring this error.
|
||||
if !pack.isFlush && errors.Is(err, merr.ErrSegmentNotFound) {
|
||||
log.Warn("stale segment not found, could be compacted",
|
||||
zap.Int64("segmentID", pack.segmentID))
|
||||
log.Warn("failed to SaveBinlogPaths",
|
||||
zap.Int64("segmentID", pack.segmentID),
|
||||
zap.Error(err))
|
||||
return nil
|
||||
}
|
||||
// meta error, datanode handles a virtual channel does not belong here
|
||||
if errors.IsAny(err, merr.ErrSegmentNotFound, merr.ErrChannelNotFound) {
|
||||
log.Warn("meta error found, skip sync and start to drop virtual channel", zap.String("channel", pack.channelName))
|
||||
return nil
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}, b.opts...)
|
||||
if err != nil {
|
||||
log.Warn("failed to SaveBinlogPaths",
|
||||
zap.Int64("segmentID", pack.segmentID),
|
||||
zap.Error(err))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *brokerMetaWriter) DropChannel(ctx context.Context, channelName string) error {
|
||||
err := retry.Handle(ctx, func() (bool, error) {
|
||||
status, err := b.broker.DropVirtualChannel(context.Background(), &datapb.DropVirtualChannelRequest{
|
||||
|
|
|
@ -67,34 +67,6 @@ func (s *MetaWriterSuite) TestReturnError() {
|
|||
s.Error(err)
|
||||
}
|
||||
|
||||
func (s *MetaWriterSuite) TestNormalSaveV2() {
|
||||
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil)
|
||||
|
||||
bfs := metacache.NewBloomFilterSet()
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
||||
metacache.UpdateNumOfRows(1000)(seg)
|
||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
|
||||
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
||||
task := NewSyncTaskV2()
|
||||
task.WithMetaCache(s.metacache)
|
||||
err := s.writer.UpdateSyncV2(task)
|
||||
s.NoError(err)
|
||||
}
|
||||
|
||||
func (s *MetaWriterSuite) TestReturnErrorV2() {
|
||||
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(errors.New("mocked"))
|
||||
|
||||
bfs := metacache.NewBloomFilterSet()
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
||||
metacache.UpdateNumOfRows(1000)(seg)
|
||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
|
||||
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
||||
task := NewSyncTaskV2()
|
||||
task.WithMetaCache(s.metacache)
|
||||
err := s.writer.UpdateSyncV2(task)
|
||||
s.Error(err)
|
||||
}
|
||||
|
||||
func TestMetaWriter(t *testing.T) {
|
||||
suite.Run(t, new(MetaWriterSuite))
|
||||
}
|
||||
|
|
|
@ -107,48 +107,6 @@ func (_c *MockMetaWriter_UpdateSync_Call) RunAndReturn(run func(context.Context,
|
|||
return _c
|
||||
}
|
||||
|
||||
// UpdateSyncV2 provides a mock function with given fields: _a0
|
||||
func (_m *MockMetaWriter) UpdateSyncV2(_a0 *SyncTaskV2) error {
|
||||
ret := _m.Called(_a0)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(*SyncTaskV2) error); ok {
|
||||
r0 = rf(_a0)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// MockMetaWriter_UpdateSyncV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateSyncV2'
|
||||
type MockMetaWriter_UpdateSyncV2_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// UpdateSyncV2 is a helper method to define mock.On call
|
||||
// - _a0 *SyncTaskV2
|
||||
func (_e *MockMetaWriter_Expecter) UpdateSyncV2(_a0 interface{}) *MockMetaWriter_UpdateSyncV2_Call {
|
||||
return &MockMetaWriter_UpdateSyncV2_Call{Call: _e.mock.On("UpdateSyncV2", _a0)}
|
||||
}
|
||||
|
||||
func (_c *MockMetaWriter_UpdateSyncV2_Call) Run(run func(_a0 *SyncTaskV2)) *MockMetaWriter_UpdateSyncV2_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(*SyncTaskV2))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockMetaWriter_UpdateSyncV2_Call) Return(_a0 error) *MockMetaWriter_UpdateSyncV2_Call {
|
||||
_c.Call.Return(_a0)
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockMetaWriter_UpdateSyncV2_Call) RunAndReturn(run func(*SyncTaskV2) error) *MockMetaWriter_UpdateSyncV2_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
||||
// NewMockMetaWriter creates a new instance of MockMetaWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
|
||||
// The first argument is typically a *testing.T value.
|
||||
func NewMockMetaWriter(t interface {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Code generated by mockery v2.30.1. DO NOT EDIT.
|
||||
// Code generated by mockery v2.32.4. DO NOT EDIT.
|
||||
|
||||
package syncmgr
|
||||
|
||||
|
|
|
@ -1,256 +0,0 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package syncmgr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
||||
"github.com/milvus-io/milvus/internal/allocator"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
iTypeutil "github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/metrics"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
type storageV2Serializer struct {
|
||||
*storageV1Serializer
|
||||
|
||||
arrowSchema *arrow.Schema
|
||||
storageV2Cache *metacache.StorageV2Cache
|
||||
inCodec *storage.InsertCodec
|
||||
metacache metacache.MetaCache
|
||||
}
|
||||
|
||||
func NewStorageV2Serializer(
|
||||
storageV2Cache *metacache.StorageV2Cache,
|
||||
allocator allocator.Interface,
|
||||
metacache metacache.MetaCache,
|
||||
metaWriter MetaWriter,
|
||||
) (*storageV2Serializer, error) {
|
||||
v1Serializer, err := NewStorageSerializer(allocator, metacache, metaWriter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &storageV2Serializer{
|
||||
storageV1Serializer: v1Serializer,
|
||||
storageV2Cache: storageV2Cache,
|
||||
arrowSchema: storageV2Cache.ArrowSchema(),
|
||||
metacache: metacache,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *storageV2Serializer) EncodeBuffer(ctx context.Context, pack *SyncPack) (Task, error) {
|
||||
task := NewSyncTaskV2()
|
||||
tr := timerecord.NewTimeRecorder("storage_serializer_v2")
|
||||
metricSegLevel := pack.level.String()
|
||||
|
||||
space, err := s.storageV2Cache.GetOrCreateSpace(pack.segmentID, SpaceCreatorFunc(pack.segmentID, s.schema, s.arrowSchema))
|
||||
if err != nil {
|
||||
log.Warn("failed to get or create space", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
task.space = space
|
||||
if len(pack.insertData) > 0 {
|
||||
insertReader, err := s.serializeInsertData(pack)
|
||||
if err != nil {
|
||||
log.Warn("failed to serialize insert data with storagev2", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
task.reader = insertReader
|
||||
|
||||
singlePKStats, batchStatsBlob, err := s.serializeStatslog(pack)
|
||||
if err != nil {
|
||||
log.Warn("failed to serialized statslog", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
task.statsBlob = batchStatsBlob
|
||||
s.metacache.UpdateSegments(metacache.RollStats(singlePKStats), metacache.WithSegmentIDs(pack.segmentID))
|
||||
}
|
||||
|
||||
if pack.isFlush {
|
||||
if pack.level != datapb.SegmentLevel_L0 {
|
||||
mergedStatsBlob, err := s.serializeMergedPkStats(pack)
|
||||
if err != nil {
|
||||
log.Warn("failed to serialize merged stats log", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
task.mergedStatsBlob = mergedStatsBlob
|
||||
}
|
||||
task.WithFlush()
|
||||
}
|
||||
|
||||
if pack.deltaData != nil {
|
||||
deltaReader, err := s.serializeDeltaData(pack)
|
||||
if err != nil {
|
||||
log.Warn("failed to serialize delta data", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
task.deleteReader = deltaReader
|
||||
}
|
||||
|
||||
if pack.isDrop {
|
||||
task.WithDrop()
|
||||
}
|
||||
|
||||
s.setTaskMeta(task, pack)
|
||||
metrics.DataNodeEncodeBufferLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metricSegLevel).Observe(float64(tr.RecordSpan().Milliseconds()))
|
||||
return task, nil
|
||||
}
|
||||
|
||||
func (s *storageV2Serializer) setTaskMeta(task *SyncTaskV2, pack *SyncPack) {
|
||||
task.WithCollectionID(pack.collectionID).
|
||||
WithPartitionID(pack.partitionID).
|
||||
WithChannelName(pack.channelName).
|
||||
WithSegmentID(pack.segmentID).
|
||||
WithBatchSize(pack.batchSize).
|
||||
WithSchema(s.metacache.Schema()).
|
||||
WithStartPosition(pack.startPosition).
|
||||
WithCheckpoint(pack.checkpoint).
|
||||
WithLevel(pack.level).
|
||||
WithTimeRange(pack.tsFrom, pack.tsTo).
|
||||
WithMetaCache(s.metacache).
|
||||
WithMetaWriter(s.metaWriter).
|
||||
WithFailureCallback(func(err error) {
|
||||
// TODO could change to unsub channel in the future
|
||||
panic(err)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *storageV2Serializer) serializeInsertData(pack *SyncPack) (array.RecordReader, error) {
|
||||
builder := array.NewRecordBuilder(memory.DefaultAllocator, s.arrowSchema)
|
||||
defer builder.Release()
|
||||
|
||||
for _, chunk := range pack.insertData {
|
||||
if err := iTypeutil.BuildRecord(builder, chunk, s.schema.GetFields()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
rec := builder.NewRecord()
|
||||
defer rec.Release()
|
||||
|
||||
itr, err := array.NewRecordReader(s.arrowSchema, []arrow.Record{rec})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
itr.Retain()
|
||||
|
||||
return itr, nil
|
||||
}
|
||||
|
||||
func (s *storageV2Serializer) serializeDeltaData(pack *SyncPack) (array.RecordReader, error) {
|
||||
fields := make([]*schemapb.FieldSchema, 0, 2)
|
||||
tsField := &schemapb.FieldSchema{
|
||||
FieldID: common.TimeStampField,
|
||||
Name: common.TimeStampFieldName,
|
||||
DataType: schemapb.DataType_Int64,
|
||||
}
|
||||
fields = append(fields, s.pkField, tsField)
|
||||
|
||||
deltaArrowSchema, err := iTypeutil.ConvertToArrowSchema(fields)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
builder := array.NewRecordBuilder(memory.DefaultAllocator, deltaArrowSchema)
|
||||
defer builder.Release()
|
||||
|
||||
switch s.pkField.GetDataType() {
|
||||
case schemapb.DataType_Int64:
|
||||
pb := builder.Field(0).(*array.Int64Builder)
|
||||
for _, pk := range pack.deltaData.Pks {
|
||||
pb.Append(pk.GetValue().(int64))
|
||||
}
|
||||
case schemapb.DataType_VarChar:
|
||||
pb := builder.Field(0).(*array.StringBuilder)
|
||||
for _, pk := range pack.deltaData.Pks {
|
||||
pb.Append(pk.GetValue().(string))
|
||||
}
|
||||
default:
|
||||
return nil, merr.WrapErrParameterInvalidMsg("unexpected pk type %v", s.pkField.GetDataType())
|
||||
}
|
||||
|
||||
for _, ts := range pack.deltaData.Tss {
|
||||
builder.Field(1).(*array.Int64Builder).Append(int64(ts))
|
||||
}
|
||||
|
||||
rec := builder.NewRecord()
|
||||
defer rec.Release()
|
||||
|
||||
reader, err := array.NewRecordReader(deltaArrowSchema, []arrow.Record{rec})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reader.Retain()
|
||||
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
func SpaceCreatorFunc(segmentID int64, collSchema *schemapb.CollectionSchema, arrowSchema *arrow.Schema) func() (*milvus_storage.Space, error) {
|
||||
return func() (*milvus_storage.Space, error) {
|
||||
url, err := iTypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segmentID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pkSchema, err := typeutil.GetPrimaryFieldSchema(collSchema)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vecSchema, err := typeutil.GetVectorFieldSchema(collSchema)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
space, err := milvus_storage.Open(
|
||||
url,
|
||||
options.NewSpaceOptionBuilder().
|
||||
SetSchema(schema.NewSchema(
|
||||
arrowSchema,
|
||||
&schema.SchemaOptions{
|
||||
PrimaryColumn: pkSchema.Name,
|
||||
VectorColumn: vecSchema.Name,
|
||||
VersionColumn: common.TimeStampFieldName,
|
||||
},
|
||||
)).
|
||||
Build(),
|
||||
)
|
||||
return space, err
|
||||
}
|
||||
}
|
|
@ -1,366 +0,0 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package syncmgr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/samber/lo"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
||||
"github.com/milvus-io/milvus/internal/allocator"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
||||
)
|
||||
|
||||
type StorageV2SerializerSuite struct {
|
||||
suite.Suite
|
||||
|
||||
collectionID int64
|
||||
partitionID int64
|
||||
segmentID int64
|
||||
channelName string
|
||||
|
||||
schema *schemapb.CollectionSchema
|
||||
storageCache *metacache.StorageV2Cache
|
||||
mockAllocator *allocator.MockAllocator
|
||||
mockCache *metacache.MockMetaCache
|
||||
mockMetaWriter *MockMetaWriter
|
||||
|
||||
serializer *storageV2Serializer
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) SetupSuite() {
|
||||
paramtable.Get().Init(paramtable.NewBaseTable())
|
||||
|
||||
s.collectionID = rand.Int63n(100) + 1000
|
||||
s.partitionID = rand.Int63n(100) + 2000
|
||||
s.segmentID = rand.Int63n(1000) + 10000
|
||||
s.channelName = fmt.Sprintf("by-dev-rootcoord-dml0_%d_v1", s.collectionID)
|
||||
s.schema = &schemapb.CollectionSchema{
|
||||
Name: "sync_task_test_col",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: common.RowIDField, DataType: schemapb.DataType_Int64, Name: common.RowIDFieldName},
|
||||
{FieldID: common.TimeStampField, DataType: schemapb.DataType_Int64, Name: common.TimeStampFieldName},
|
||||
{
|
||||
FieldID: 100,
|
||||
Name: "pk",
|
||||
DataType: schemapb.DataType_Int64,
|
||||
IsPrimaryKey: true,
|
||||
},
|
||||
{
|
||||
FieldID: 101,
|
||||
Name: "vector",
|
||||
DataType: schemapb.DataType_FloatVector,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: common.DimKey, Value: "128"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
s.mockAllocator = allocator.NewMockAllocator(s.T())
|
||||
s.mockCache = metacache.NewMockMetaCache(s.T())
|
||||
s.mockMetaWriter = NewMockMetaWriter(s.T())
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) SetupTest() {
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.schema)
|
||||
s.Require().NoError(err)
|
||||
s.storageCache = storageCache
|
||||
|
||||
s.mockCache.EXPECT().Collection().Return(s.collectionID)
|
||||
s.mockCache.EXPECT().Schema().Return(s.schema)
|
||||
|
||||
s.serializer, err = NewStorageV2Serializer(storageCache, s.mockAllocator, s.mockCache, s.mockMetaWriter)
|
||||
s.Require().NoError(err)
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) getSpace() *milvus_storage.Space {
|
||||
tmpDir := s.T().TempDir()
|
||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
||||
SetSchema(schema.NewSchema(s.storageCache.ArrowSchema(), &schema.SchemaOptions{
|
||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
||||
})).Build())
|
||||
s.Require().NoError(err)
|
||||
return space
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) getBasicPack() *SyncPack {
|
||||
pack := &SyncPack{}
|
||||
|
||||
pack.WithCollectionID(s.collectionID).
|
||||
WithPartitionID(s.partitionID).
|
||||
WithSegmentID(s.segmentID).
|
||||
WithChannelName(s.channelName).
|
||||
WithCheckpoint(&msgpb.MsgPosition{
|
||||
Timestamp: 1000,
|
||||
ChannelName: s.channelName,
|
||||
})
|
||||
|
||||
return pack
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) getEmptyInsertBuffer() *storage.InsertData {
|
||||
buf, err := storage.NewInsertData(s.schema)
|
||||
s.Require().NoError(err)
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) getInsertBuffer() *storage.InsertData {
|
||||
buf := s.getEmptyInsertBuffer()
|
||||
|
||||
// generate data
|
||||
for i := 0; i < 10; i++ {
|
||||
data := make(map[storage.FieldID]any)
|
||||
data[common.RowIDField] = int64(i + 1)
|
||||
data[common.TimeStampField] = int64(i + 1)
|
||||
data[100] = int64(i + 1)
|
||||
vector := lo.RepeatBy(128, func(_ int) float32 {
|
||||
return rand.Float32()
|
||||
})
|
||||
data[101] = vector
|
||||
err := buf.Append(data)
|
||||
s.Require().NoError(err)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) getDeleteBuffer() *storage.DeleteData {
|
||||
buf := &storage.DeleteData{}
|
||||
for i := 0; i < 10; i++ {
|
||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
||||
ts := tsoutil.ComposeTSByTime(time.Now(), 0)
|
||||
buf.Append(pk, ts)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) getDeleteBufferZeroTs() *storage.DeleteData {
|
||||
buf := &storage.DeleteData{}
|
||||
for i := 0; i < 10; i++ {
|
||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
||||
buf.Append(pk, 0)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) getBfs() *metacache.BloomFilterSet {
|
||||
bfs := metacache.NewBloomFilterSet()
|
||||
fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{
|
||||
FieldID: 101,
|
||||
Name: "ID",
|
||||
IsPrimaryKey: true,
|
||||
DataType: schemapb.DataType_Int64,
|
||||
}, 16)
|
||||
s.Require().NoError(err)
|
||||
|
||||
ids := []int64{1, 2, 3, 4, 5, 6, 7}
|
||||
for _, id := range ids {
|
||||
err = fd.AppendRow(id)
|
||||
s.Require().NoError(err)
|
||||
}
|
||||
|
||||
bfs.UpdatePKRange(fd)
|
||||
return bfs
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) TestSerializeInsert() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
s.storageCache.SetSpace(s.segmentID, s.getSpace())
|
||||
|
||||
s.Run("no_data", func() {
|
||||
pack := s.getBasicPack()
|
||||
pack.WithTimeRange(50, 100)
|
||||
pack.WithDrop()
|
||||
|
||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.NoError(err)
|
||||
taskV1, ok := task.(*SyncTaskV2)
|
||||
s.Require().True(ok)
|
||||
s.Equal(s.collectionID, taskV1.collectionID)
|
||||
s.Equal(s.partitionID, taskV1.partitionID)
|
||||
s.Equal(s.channelName, taskV1.channelName)
|
||||
s.Equal(&msgpb.MsgPosition{
|
||||
Timestamp: 1000,
|
||||
ChannelName: s.channelName,
|
||||
}, taskV1.checkpoint)
|
||||
s.EqualValues(50, taskV1.tsFrom)
|
||||
s.EqualValues(100, taskV1.tsTo)
|
||||
s.True(taskV1.isDrop)
|
||||
})
|
||||
|
||||
s.Run("empty_insert_data", func() {
|
||||
pack := s.getBasicPack()
|
||||
pack.WithTimeRange(50, 100)
|
||||
pack.WithInsertData([]*storage.InsertData{s.getEmptyInsertBuffer()}).WithBatchSize(0)
|
||||
|
||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.Error(err)
|
||||
})
|
||||
|
||||
s.Run("with_normal_data", func() {
|
||||
pack := s.getBasicPack()
|
||||
pack.WithTimeRange(50, 100)
|
||||
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
|
||||
|
||||
s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return().Once()
|
||||
|
||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.NoError(err)
|
||||
|
||||
taskV2, ok := task.(*SyncTaskV2)
|
||||
s.Require().True(ok)
|
||||
s.Equal(s.collectionID, taskV2.collectionID)
|
||||
s.Equal(s.partitionID, taskV2.partitionID)
|
||||
s.Equal(s.channelName, taskV2.channelName)
|
||||
s.Equal(&msgpb.MsgPosition{
|
||||
Timestamp: 1000,
|
||||
ChannelName: s.channelName,
|
||||
}, taskV2.checkpoint)
|
||||
s.EqualValues(50, taskV2.tsFrom)
|
||||
s.EqualValues(100, taskV2.tsTo)
|
||||
s.NotNil(taskV2.reader)
|
||||
s.NotNil(taskV2.statsBlob)
|
||||
})
|
||||
|
||||
s.Run("with_flush_segment_not_found", func() {
|
||||
pack := s.getBasicPack()
|
||||
pack.WithFlush()
|
||||
|
||||
s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(nil, false).Once()
|
||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.Error(err)
|
||||
})
|
||||
|
||||
s.Run("with_flush", func() {
|
||||
pack := s.getBasicPack()
|
||||
pack.WithTimeRange(50, 100)
|
||||
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
|
||||
pack.WithFlush()
|
||||
|
||||
bfs := s.getBfs()
|
||||
segInfo := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
||||
metacache.UpdateNumOfRows(1000)(segInfo)
|
||||
s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Run(func(action metacache.SegmentAction, filters ...metacache.SegmentFilter) {
|
||||
action(segInfo)
|
||||
}).Return().Once()
|
||||
s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(segInfo, true).Once()
|
||||
|
||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.NoError(err)
|
||||
|
||||
taskV2, ok := task.(*SyncTaskV2)
|
||||
s.Require().True(ok)
|
||||
s.Equal(s.collectionID, taskV2.collectionID)
|
||||
s.Equal(s.partitionID, taskV2.partitionID)
|
||||
s.Equal(s.channelName, taskV2.channelName)
|
||||
s.Equal(&msgpb.MsgPosition{
|
||||
Timestamp: 1000,
|
||||
ChannelName: s.channelName,
|
||||
}, taskV2.checkpoint)
|
||||
s.EqualValues(50, taskV2.tsFrom)
|
||||
s.EqualValues(100, taskV2.tsTo)
|
||||
s.NotNil(taskV2.mergedStatsBlob)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) TestSerializeDelete() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
s.Run("serialize_failed", func() {
|
||||
pkField := s.serializer.pkField
|
||||
s.serializer.pkField = &schemapb.FieldSchema{}
|
||||
defer func() {
|
||||
s.serializer.pkField = pkField
|
||||
}()
|
||||
pack := s.getBasicPack()
|
||||
pack.WithDeleteData(s.getDeleteBufferZeroTs())
|
||||
pack.WithTimeRange(50, 100)
|
||||
|
||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.Error(err)
|
||||
})
|
||||
|
||||
s.Run("serialize_failed_bad_pk", func() {
|
||||
pkField := s.serializer.pkField
|
||||
s.serializer.pkField = &schemapb.FieldSchema{
|
||||
DataType: schemapb.DataType_Array,
|
||||
}
|
||||
defer func() {
|
||||
s.serializer.pkField = pkField
|
||||
}()
|
||||
pack := s.getBasicPack()
|
||||
pack.WithDeleteData(s.getDeleteBufferZeroTs())
|
||||
pack.WithTimeRange(50, 100)
|
||||
|
||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.Error(err)
|
||||
})
|
||||
|
||||
s.Run("serialize_normal", func() {
|
||||
pack := s.getBasicPack()
|
||||
pack.WithDeleteData(s.getDeleteBuffer())
|
||||
pack.WithTimeRange(50, 100)
|
||||
|
||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
||||
s.NoError(err)
|
||||
|
||||
taskV2, ok := task.(*SyncTaskV2)
|
||||
s.Require().True(ok)
|
||||
s.Equal(s.collectionID, taskV2.collectionID)
|
||||
s.Equal(s.partitionID, taskV2.partitionID)
|
||||
s.Equal(s.channelName, taskV2.channelName)
|
||||
s.Equal(&msgpb.MsgPosition{
|
||||
Timestamp: 1000,
|
||||
ChannelName: s.channelName,
|
||||
}, taskV2.checkpoint)
|
||||
s.EqualValues(50, taskV2.tsFrom)
|
||||
s.EqualValues(100, taskV2.tsTo)
|
||||
s.NotNil(taskV2.deleteReader)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *StorageV2SerializerSuite) TestBadSchema() {
|
||||
mockCache := metacache.NewMockMetaCache(s.T())
|
||||
mockCache.EXPECT().Collection().Return(s.collectionID).Once()
|
||||
mockCache.EXPECT().Schema().Return(&schemapb.CollectionSchema{}).Once()
|
||||
_, err := NewStorageV2Serializer(s.storageCache, s.mockAllocator, mockCache, s.mockMetaWriter)
|
||||
s.Error(err)
|
||||
}
|
||||
|
||||
func TestStorageV2Serializer(t *testing.T) {
|
||||
suite.Run(t, new(StorageV2SerializerSuite))
|
||||
}
|
|
@ -99,7 +99,6 @@ func (mgr *syncManager) SyncData(ctx context.Context, task Task, callbacks ...fu
|
|||
switch t := task.(type) {
|
||||
case *SyncTask:
|
||||
t.WithChunkManager(mgr.chunkManager)
|
||||
case *SyncTaskV2:
|
||||
}
|
||||
|
||||
return mgr.safeSubmitTask(ctx, task, callbacks...)
|
||||
|
|
|
@ -1,235 +0,0 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package syncmgr
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/allocator"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/retry"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
type SyncTaskV2 struct {
|
||||
*SyncTask
|
||||
arrowSchema *arrow.Schema
|
||||
reader array.RecordReader
|
||||
statsBlob *storage.Blob
|
||||
deleteReader array.RecordReader
|
||||
storageVersion int64
|
||||
space *milvus_storage.Space
|
||||
|
||||
failureCallback func(err error)
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) getLogger() *log.MLogger {
|
||||
return log.Ctx(context.Background()).With(
|
||||
zap.Int64("collectionID", t.collectionID),
|
||||
zap.Int64("partitionID", t.partitionID),
|
||||
zap.Int64("segmentID", t.segmentID),
|
||||
zap.String("channel", t.channelName),
|
||||
)
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) handleError(err error) {
|
||||
if t.failureCallback != nil {
|
||||
t.failureCallback(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) Run(ctx context.Context) error {
|
||||
log := t.getLogger()
|
||||
var err error
|
||||
|
||||
_, ok := t.metacache.GetSegmentByID(t.segmentID)
|
||||
if !ok {
|
||||
log.Warn("failed to sync data, segment not found in metacache")
|
||||
t.handleError(err)
|
||||
return merr.WrapErrSegmentNotFound(t.segmentID)
|
||||
}
|
||||
|
||||
if err = t.writeSpace(); err != nil {
|
||||
t.handleError(err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err = t.writeMeta(); err != nil {
|
||||
t.handleError(err)
|
||||
return err
|
||||
}
|
||||
|
||||
actions := []metacache.SegmentAction{metacache.FinishSyncing(t.batchSize)}
|
||||
switch {
|
||||
case t.isDrop:
|
||||
actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Dropped))
|
||||
case t.isFlush:
|
||||
actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Flushed))
|
||||
}
|
||||
|
||||
t.metacache.UpdateSegments(metacache.MergeSegmentAction(actions...), metacache.WithSegmentIDs(t.segmentID))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) writeSpace() error {
|
||||
defer func() {
|
||||
if t.reader != nil {
|
||||
t.reader.Release()
|
||||
}
|
||||
if t.deleteReader != nil {
|
||||
t.deleteReader.Release()
|
||||
}
|
||||
}()
|
||||
|
||||
txn := t.space.NewTransaction()
|
||||
if t.reader != nil {
|
||||
txn.Write(t.reader, &options.DefaultWriteOptions)
|
||||
}
|
||||
if t.deleteReader != nil {
|
||||
txn.Delete(t.deleteReader)
|
||||
}
|
||||
if t.statsBlob != nil {
|
||||
txn.WriteBlob(t.statsBlob.Value, t.statsBlob.Key, false)
|
||||
}
|
||||
|
||||
return txn.Commit()
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) writeMeta() error {
|
||||
t.storageVersion = t.space.GetCurrentVersion()
|
||||
return t.metaWriter.UpdateSyncV2(t)
|
||||
}
|
||||
|
||||
func NewSyncTaskV2() *SyncTaskV2 {
|
||||
return &SyncTaskV2{
|
||||
SyncTask: NewSyncTask(),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithChunkManager(cm storage.ChunkManager) *SyncTaskV2 {
|
||||
t.chunkManager = cm
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithAllocator(allocator allocator.Interface) *SyncTaskV2 {
|
||||
t.allocator = allocator
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithStartPosition(start *msgpb.MsgPosition) *SyncTaskV2 {
|
||||
t.startPosition = start
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithCheckpoint(cp *msgpb.MsgPosition) *SyncTaskV2 {
|
||||
t.checkpoint = cp
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithCollectionID(collID int64) *SyncTaskV2 {
|
||||
t.collectionID = collID
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithPartitionID(partID int64) *SyncTaskV2 {
|
||||
t.partitionID = partID
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithSegmentID(segID int64) *SyncTaskV2 {
|
||||
t.segmentID = segID
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithChannelName(chanName string) *SyncTaskV2 {
|
||||
t.channelName = chanName
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithSchema(schema *schemapb.CollectionSchema) *SyncTaskV2 {
|
||||
t.schema = schema
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithTimeRange(from, to typeutil.Timestamp) *SyncTaskV2 {
|
||||
t.tsFrom, t.tsTo = from, to
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithFlush() *SyncTaskV2 {
|
||||
t.isFlush = true
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithDrop() *SyncTaskV2 {
|
||||
t.isDrop = true
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithMetaCache(metacache metacache.MetaCache) *SyncTaskV2 {
|
||||
t.metacache = metacache
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithMetaWriter(metaWriter MetaWriter) *SyncTaskV2 {
|
||||
t.metaWriter = metaWriter
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithWriteRetryOptions(opts ...retry.Option) *SyncTaskV2 {
|
||||
t.writeRetryOpts = opts
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithFailureCallback(callback func(error)) *SyncTaskV2 {
|
||||
t.failureCallback = callback
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithBatchSize(batchSize int64) *SyncTaskV2 {
|
||||
t.batchSize = batchSize
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithSpace(space *milvus_storage.Space) *SyncTaskV2 {
|
||||
t.space = space
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithArrowSchema(arrowSchema *arrow.Schema) *SyncTaskV2 {
|
||||
t.arrowSchema = arrowSchema
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *SyncTaskV2) WithLevel(level datapb.SegmentLevel) *SyncTaskV2 {
|
||||
t.level = level
|
||||
return t
|
||||
}
|
|
@ -1,403 +0,0 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package syncmgr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
"github.com/samber/lo"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
||||
"github.com/milvus-io/milvus/internal/allocator"
|
||||
"github.com/milvus-io/milvus/internal/datanode/broker"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
||||
)
|
||||
|
||||
type SyncTaskSuiteV2 struct {
|
||||
suite.Suite
|
||||
|
||||
collectionID int64
|
||||
partitionID int64
|
||||
segmentID int64
|
||||
channelName string
|
||||
|
||||
metacache *metacache.MockMetaCache
|
||||
allocator *allocator.MockGIDAllocator
|
||||
schema *schemapb.CollectionSchema
|
||||
arrowSchema *arrow.Schema
|
||||
broker *broker.MockBroker
|
||||
|
||||
space *milvus_storage.Space
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) SetupSuite() {
|
||||
paramtable.Get().Init(paramtable.NewBaseTable())
|
||||
|
||||
s.collectionID = 100
|
||||
s.partitionID = 101
|
||||
s.segmentID = 1001
|
||||
s.channelName = "by-dev-rootcoord-dml_0_100v0"
|
||||
|
||||
s.schema = &schemapb.CollectionSchema{
|
||||
Name: "sync_task_test_col",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64},
|
||||
{FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64},
|
||||
{
|
||||
FieldID: 100,
|
||||
Name: "pk",
|
||||
DataType: schemapb.DataType_Int64,
|
||||
IsPrimaryKey: true,
|
||||
},
|
||||
{
|
||||
FieldID: 101,
|
||||
Name: "vector",
|
||||
DataType: schemapb.DataType_FloatVector,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: common.DimKey, Value: "128"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
arrowSchema, err := typeutil.ConvertToArrowSchema(s.schema.Fields)
|
||||
s.NoError(err)
|
||||
s.arrowSchema = arrowSchema
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) SetupTest() {
|
||||
s.allocator = allocator.NewMockGIDAllocator()
|
||||
s.allocator.AllocF = func(count uint32) (int64, int64, error) {
|
||||
return time.Now().Unix(), int64(count), nil
|
||||
}
|
||||
s.allocator.AllocOneF = func() (allocator.UniqueID, error) {
|
||||
return time.Now().Unix(), nil
|
||||
}
|
||||
|
||||
s.broker = broker.NewMockBroker(s.T())
|
||||
s.metacache = metacache.NewMockMetaCache(s.T())
|
||||
|
||||
tmpDir := s.T().TempDir()
|
||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
||||
SetSchema(schema.NewSchema(s.arrowSchema, &schema.SchemaOptions{
|
||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
||||
})).Build())
|
||||
s.Require().NoError(err)
|
||||
s.space = space
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) getEmptyInsertBuffer() *storage.InsertData {
|
||||
buf, err := storage.NewInsertData(s.schema)
|
||||
s.Require().NoError(err)
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) getInsertBuffer() *storage.InsertData {
|
||||
buf := s.getEmptyInsertBuffer()
|
||||
|
||||
// generate data
|
||||
for i := 0; i < 10; i++ {
|
||||
data := make(map[storage.FieldID]any)
|
||||
data[common.RowIDField] = int64(i + 1)
|
||||
data[common.TimeStampField] = int64(i + 1)
|
||||
data[100] = int64(i + 1)
|
||||
vector := lo.RepeatBy(128, func(_ int) float32 {
|
||||
return rand.Float32()
|
||||
})
|
||||
data[101] = vector
|
||||
err := buf.Append(data)
|
||||
s.Require().NoError(err)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) getDeleteBuffer() *storage.DeleteData {
|
||||
buf := &storage.DeleteData{}
|
||||
for i := 0; i < 10; i++ {
|
||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
||||
ts := tsoutil.ComposeTSByTime(time.Now(), 0)
|
||||
buf.Append(pk, ts)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) getDeleteBufferZeroTs() *storage.DeleteData {
|
||||
buf := &storage.DeleteData{}
|
||||
for i := 0; i < 10; i++ {
|
||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
||||
buf.Append(pk, 0)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) getSuiteSyncTask() *SyncTaskV2 {
|
||||
pack := &SyncPack{}
|
||||
|
||||
pack.WithCollectionID(s.collectionID).
|
||||
WithPartitionID(s.partitionID).
|
||||
WithSegmentID(s.segmentID).
|
||||
WithChannelName(s.channelName).
|
||||
WithCheckpoint(&msgpb.MsgPosition{
|
||||
Timestamp: 1000,
|
||||
ChannelName: s.channelName,
|
||||
})
|
||||
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
|
||||
pack.WithDeleteData(s.getDeleteBuffer())
|
||||
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.schema)
|
||||
s.Require().NoError(err)
|
||||
|
||||
s.metacache.EXPECT().Collection().Return(s.collectionID)
|
||||
s.metacache.EXPECT().Schema().Return(s.schema)
|
||||
serializer, err := NewStorageV2Serializer(storageCache, s.allocator, s.metacache, nil)
|
||||
s.Require().NoError(err)
|
||||
task, err := serializer.EncodeBuffer(context.Background(), pack)
|
||||
s.Require().NoError(err)
|
||||
taskV2, ok := task.(*SyncTaskV2)
|
||||
s.Require().True(ok)
|
||||
taskV2.WithMetaCache(s.metacache)
|
||||
|
||||
return taskV2
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) TestRunNormal() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil)
|
||||
bfs := metacache.NewBloomFilterSet()
|
||||
fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{
|
||||
FieldID: 101,
|
||||
Name: "ID",
|
||||
IsPrimaryKey: true,
|
||||
DataType: schemapb.DataType_Int64,
|
||||
}, 16)
|
||||
s.Require().NoError(err)
|
||||
|
||||
ids := []int64{1, 2, 3, 4, 5, 6, 7}
|
||||
for _, id := range ids {
|
||||
err = fd.AppendRow(id)
|
||||
s.Require().NoError(err)
|
||||
}
|
||||
|
||||
bfs.UpdatePKRange(fd)
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
||||
metacache.UpdateNumOfRows(1000)(seg)
|
||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
|
||||
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
||||
s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
|
||||
|
||||
s.Run("without_insert_delete", func() {
|
||||
task := s.getSuiteSyncTask()
|
||||
task.WithMetaWriter(BrokerMetaWriter(s.broker, 1))
|
||||
task.WithTimeRange(50, 100)
|
||||
task.WithCheckpoint(&msgpb.MsgPosition{
|
||||
ChannelName: s.channelName,
|
||||
MsgID: []byte{1, 2, 3, 4},
|
||||
Timestamp: 100,
|
||||
})
|
||||
|
||||
err := task.Run(ctx)
|
||||
s.NoError(err)
|
||||
})
|
||||
|
||||
s.Run("with_insert_delete_cp", func() {
|
||||
task := s.getSuiteSyncTask()
|
||||
task.WithTimeRange(50, 100)
|
||||
task.WithMetaWriter(BrokerMetaWriter(s.broker, 1))
|
||||
task.WithCheckpoint(&msgpb.MsgPosition{
|
||||
ChannelName: s.channelName,
|
||||
MsgID: []byte{1, 2, 3, 4},
|
||||
Timestamp: 100,
|
||||
})
|
||||
|
||||
err := task.Run(ctx)
|
||||
s.NoError(err)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) TestBuildRecord() {
|
||||
fieldSchemas := []*schemapb.FieldSchema{
|
||||
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
|
||||
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
|
||||
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
|
||||
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
|
||||
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
|
||||
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
|
||||
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
|
||||
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
|
||||
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
|
||||
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
|
||||
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
||||
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
|
||||
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
|
||||
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
||||
}
|
||||
|
||||
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
|
||||
s.NoError(err)
|
||||
|
||||
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
|
||||
defer b.Release()
|
||||
|
||||
data := &storage.InsertData{
|
||||
Data: map[int64]storage.FieldData{
|
||||
1: &storage.BoolFieldData{Data: []bool{true, false}},
|
||||
2: &storage.Int8FieldData{Data: []int8{3, 4}},
|
||||
3: &storage.Int16FieldData{Data: []int16{3, 4}},
|
||||
4: &storage.Int32FieldData{Data: []int32{3, 4}},
|
||||
5: &storage.Int64FieldData{Data: []int64{3, 4}},
|
||||
6: &storage.FloatFieldData{Data: []float32{3, 4}},
|
||||
7: &storage.DoubleFieldData{Data: []float64{3, 4}},
|
||||
8: &storage.StringFieldData{Data: []string{"3", "4"}},
|
||||
9: &storage.StringFieldData{Data: []string{"3", "4"}},
|
||||
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
|
||||
11: &storage.FloatVectorFieldData{
|
||||
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
|
||||
Dim: 4,
|
||||
},
|
||||
12: &storage.ArrayFieldData{
|
||||
ElementType: schemapb.DataType_Int32,
|
||||
Data: []*schemapb.ScalarField{
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
13: &storage.JSONFieldData{
|
||||
Data: [][]byte{
|
||||
[]byte(`{"batch":2}`),
|
||||
[]byte(`{"key":"world"}`),
|
||||
},
|
||||
},
|
||||
14: &storage.Float16VectorFieldData{
|
||||
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
|
||||
Dim: 4,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err = typeutil.BuildRecord(b, data, fieldSchemas)
|
||||
s.NoError(err)
|
||||
s.EqualValues(2, b.NewRecord().NumRows())
|
||||
}
|
||||
|
||||
func (s *SyncTaskSuiteV2) TestBuildRecordNullable() {
|
||||
fieldSchemas := []*schemapb.FieldSchema{
|
||||
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
|
||||
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
|
||||
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
|
||||
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
|
||||
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
|
||||
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
|
||||
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
|
||||
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
|
||||
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
|
||||
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
|
||||
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
||||
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
|
||||
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
|
||||
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
||||
}
|
||||
|
||||
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
|
||||
s.NoError(err)
|
||||
|
||||
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
|
||||
defer b.Release()
|
||||
|
||||
data := &storage.InsertData{
|
||||
Data: map[int64]storage.FieldData{
|
||||
1: &storage.BoolFieldData{Data: []bool{true, false}, ValidData: []bool{true, true}},
|
||||
2: &storage.Int8FieldData{Data: []int8{3, 4}, ValidData: []bool{true, true}},
|
||||
3: &storage.Int16FieldData{Data: []int16{3, 4}, ValidData: []bool{true, true}},
|
||||
4: &storage.Int32FieldData{Data: []int32{3, 4}, ValidData: []bool{true, true}},
|
||||
5: &storage.Int64FieldData{Data: []int64{3, 4}, ValidData: []bool{true, true}},
|
||||
6: &storage.FloatFieldData{Data: []float32{3, 4}, ValidData: []bool{true, true}},
|
||||
7: &storage.DoubleFieldData{Data: []float64{3, 4}, ValidData: []bool{true, true}},
|
||||
8: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
|
||||
9: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
|
||||
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
|
||||
11: &storage.FloatVectorFieldData{
|
||||
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
|
||||
Dim: 4,
|
||||
},
|
||||
12: &storage.ArrayFieldData{
|
||||
ElementType: schemapb.DataType_Int32,
|
||||
Data: []*schemapb.ScalarField{
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
|
||||
},
|
||||
},
|
||||
},
|
||||
ValidData: []bool{true, true},
|
||||
},
|
||||
13: &storage.JSONFieldData{
|
||||
Data: [][]byte{
|
||||
[]byte(`{"batch":2}`),
|
||||
[]byte(`{"key":"world"}`),
|
||||
},
|
||||
ValidData: []bool{true, true},
|
||||
},
|
||||
14: &storage.Float16VectorFieldData{
|
||||
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
|
||||
Dim: 4,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err = typeutil.BuildRecord(b, data, fieldSchemas)
|
||||
s.NoError(err)
|
||||
s.EqualValues(2, b.NewRecord().NumRows())
|
||||
}
|
||||
|
||||
func TestSyncTaskV2(t *testing.T) {
|
||||
suite.Run(t, new(SyncTaskSuiteV2))
|
||||
}
|
|
@ -19,8 +19,8 @@ type bfWriteBuffer struct {
|
|||
metacache metacache.MetaCache
|
||||
}
|
||||
|
||||
func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
||||
base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option)
|
||||
func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
||||
base, err := newWriteBufferBase(channel, metacache, syncMgr, option)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -13,16 +13,11 @@ import (
|
|||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
||||
"github.com/milvus-io/milvus/internal/datanode/broker"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/metrics"
|
||||
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
||||
|
@ -41,7 +36,6 @@ type BFWriteBufferSuite struct {
|
|||
metacacheInt64 *metacache.MockMetaCache
|
||||
metacacheVarchar *metacache.MockMetaCache
|
||||
broker *broker.MockBroker
|
||||
storageV2Cache *metacache.StorageV2Cache
|
||||
}
|
||||
|
||||
func (s *BFWriteBufferSuite) SetupSuite() {
|
||||
|
@ -89,10 +83,6 @@ func (s *BFWriteBufferSuite) SetupSuite() {
|
|||
},
|
||||
},
|
||||
}
|
||||
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
|
||||
s.Require().NoError(err)
|
||||
s.storageV2Cache = storageCache
|
||||
}
|
||||
|
||||
func (s *BFWriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
|
||||
|
@ -201,16 +191,11 @@ func (s *BFWriteBufferSuite) SetupTest() {
|
|||
s.metacacheVarchar.EXPECT().Collection().Return(s.collID).Maybe()
|
||||
|
||||
s.broker = broker.NewMockBroker(s.T())
|
||||
var err error
|
||||
s.storageV2Cache, err = metacache.NewStorageV2Cache(s.collInt64Schema)
|
||||
s.Require().NoError(err)
|
||||
}
|
||||
|
||||
func (s *BFWriteBufferSuite) TestBufferData() {
|
||||
s.Run("normal_run_int64", func() {
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
|
||||
s.Require().NoError(err)
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{})
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{})
|
||||
s.NoError(err)
|
||||
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||
|
@ -237,9 +222,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
|
|||
})
|
||||
|
||||
s.Run("normal_run_varchar", func() {
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema)
|
||||
s.Require().NoError(err)
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{})
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{})
|
||||
s.NoError(err)
|
||||
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||
|
@ -261,9 +244,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
|
|||
})
|
||||
|
||||
s.Run("int_pk_type_not_match", func() {
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
|
||||
s.Require().NoError(err)
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{})
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{})
|
||||
s.NoError(err)
|
||||
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||
|
@ -281,9 +262,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
|
|||
})
|
||||
|
||||
s.Run("varchar_pk_not_match", func() {
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema)
|
||||
s.Require().NoError(err)
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{})
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{})
|
||||
s.NoError(err)
|
||||
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||
|
@ -305,7 +284,7 @@ func (s *BFWriteBufferSuite) TestAutoSync() {
|
|||
paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1")
|
||||
|
||||
s.Run("normal_auto_sync", func() {
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, nil, s.syncMgr, &writeBufferOption{
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{
|
||||
syncPolicies: []SyncPolicy{
|
||||
GetFullBufferPolicy(),
|
||||
GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)),
|
||||
|
@ -340,92 +319,11 @@ func (s *BFWriteBufferSuite) TestAutoSync() {
|
|||
})
|
||||
}
|
||||
|
||||
func (s *BFWriteBufferSuite) TestBufferDataWithStorageV2() {
|
||||
params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true")
|
||||
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
||||
params.Params.CommonCfg.StorageScheme.SwapTempValue("file")
|
||||
tmpDir := s.T().TempDir()
|
||||
arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields)
|
||||
s.Require().NoError(err)
|
||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
||||
SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{
|
||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
||||
})).Build())
|
||||
s.Require().NoError(err)
|
||||
s.storageV2Cache.SetSpace(1000, space)
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{})
|
||||
s.NoError(err)
|
||||
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||
s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false)
|
||||
s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return()
|
||||
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
|
||||
|
||||
pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64)
|
||||
delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
|
||||
|
||||
err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
|
||||
s.NoError(err)
|
||||
}
|
||||
|
||||
func (s *BFWriteBufferSuite) TestAutoSyncWithStorageV2() {
|
||||
params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true")
|
||||
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
||||
paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1")
|
||||
tmpDir := s.T().TempDir()
|
||||
arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields)
|
||||
s.Require().NoError(err)
|
||||
|
||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
||||
SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{
|
||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
||||
})).Build())
|
||||
s.Require().NoError(err)
|
||||
s.storageV2Cache.SetSpace(1002, space)
|
||||
|
||||
s.Run("normal_auto_sync", func() {
|
||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{
|
||||
syncPolicies: []SyncPolicy{
|
||||
GetFullBufferPolicy(),
|
||||
GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)),
|
||||
GetSealedSegmentsPolicy(s.metacacheInt64),
|
||||
},
|
||||
})
|
||||
s.NoError(err)
|
||||
|
||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||
seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1002}, metacache.NewBloomFilterSet())
|
||||
segCompacted := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||
|
||||
s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg, segCompacted})
|
||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false).Once()
|
||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once()
|
||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true)
|
||||
s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002})
|
||||
s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return()
|
||||
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
|
||||
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return()
|
||||
s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything, mock.Anything).Return(nil)
|
||||
|
||||
pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64)
|
||||
delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
|
||||
|
||||
metrics.DataNodeFlowGraphBufferDataSize.Reset()
|
||||
err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
|
||||
s.NoError(err)
|
||||
|
||||
value, err := metrics.DataNodeFlowGraphBufferDataSize.GetMetricWithLabelValues(fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(s.metacacheInt64.Collection()))
|
||||
s.NoError(err)
|
||||
s.MetricsEqual(value, 0)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *BFWriteBufferSuite) TestCreateFailure() {
|
||||
metacache := metacache.NewMockMetaCache(s.T())
|
||||
metacache.EXPECT().Collection().Return(s.collID)
|
||||
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
|
||||
_, err := NewBFWriteBuffer(s.channelName, metacache, s.storageV2Cache, s.syncMgr, &writeBufferOption{})
|
||||
_, err := NewBFWriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{})
|
||||
s.Error(err)
|
||||
}
|
||||
|
||||
|
|
|
@ -33,11 +33,11 @@ type l0WriteBuffer struct {
|
|||
idAllocator allocator.Interface
|
||||
}
|
||||
|
||||
func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
||||
func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
||||
if option.idAllocator == nil {
|
||||
return nil, merr.WrapErrServiceInternal("id allocator is nil when creating l0 write buffer")
|
||||
}
|
||||
base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option)
|
||||
base, err := newWriteBufferBase(channel, metacache, syncMgr, option)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -28,13 +28,12 @@ import (
|
|||
|
||||
type L0WriteBufferSuite struct {
|
||||
testutils.PromMetricsSuite
|
||||
channelName string
|
||||
collID int64
|
||||
collSchema *schemapb.CollectionSchema
|
||||
syncMgr *syncmgr.MockSyncManager
|
||||
metacache *metacache.MockMetaCache
|
||||
allocator *allocator.MockGIDAllocator
|
||||
storageCache *metacache.StorageV2Cache
|
||||
channelName string
|
||||
collID int64
|
||||
collSchema *schemapb.CollectionSchema
|
||||
syncMgr *syncmgr.MockSyncManager
|
||||
metacache *metacache.MockMetaCache
|
||||
allocator *allocator.MockGIDAllocator
|
||||
}
|
||||
|
||||
func (s *L0WriteBufferSuite) SetupSuite() {
|
||||
|
@ -61,10 +60,6 @@ func (s *L0WriteBufferSuite) SetupSuite() {
|
|||
},
|
||||
}
|
||||
s.channelName = "by-dev-rootcoord-dml_0v0"
|
||||
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
||||
s.Require().NoError(err)
|
||||
s.storageCache = storageCache
|
||||
}
|
||||
|
||||
func (s *L0WriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
|
||||
|
@ -173,7 +168,7 @@ func (s *L0WriteBufferSuite) SetupTest() {
|
|||
|
||||
func (s *L0WriteBufferSuite) TestBufferData() {
|
||||
s.Run("normal_run", func() {
|
||||
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
||||
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||
idAllocator: s.allocator,
|
||||
})
|
||||
s.NoError(err)
|
||||
|
@ -202,7 +197,7 @@ func (s *L0WriteBufferSuite) TestBufferData() {
|
|||
})
|
||||
|
||||
s.Run("pk_type_not_match", func() {
|
||||
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
||||
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||
idAllocator: s.allocator,
|
||||
})
|
||||
s.NoError(err)
|
||||
|
@ -225,7 +220,7 @@ func (s *L0WriteBufferSuite) TestCreateFailure() {
|
|||
metacache := metacache.NewMockMetaCache(s.T())
|
||||
metacache.EXPECT().Collection().Return(s.collID)
|
||||
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
|
||||
_, err := NewL0WriteBuffer(s.channelName, metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
||||
_, err := NewL0WriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{
|
||||
idAllocator: s.allocator,
|
||||
})
|
||||
s.Error(err)
|
||||
|
|
|
@ -23,7 +23,7 @@ import (
|
|||
//go:generate mockery --name=BufferManager --structname=MockBufferManager --output=./ --filename=mock_manager.go --with-expecter --inpackage
|
||||
type BufferManager interface {
|
||||
// Register adds a WriteBuffer with provided schema & options.
|
||||
Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error
|
||||
Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error
|
||||
// SealSegments notifies writeBuffer corresponding to provided channel to seal segments.
|
||||
// which will cause segment start flush procedure.
|
||||
SealSegments(ctx context.Context, channel string, segmentIDs []int64) error
|
||||
|
@ -140,7 +140,7 @@ func (m *bufferManager) Stop() {
|
|||
}
|
||||
|
||||
// Register a new WriteBuffer for channel.
|
||||
func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error {
|
||||
func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error {
|
||||
m.mut.Lock()
|
||||
defer m.mut.Unlock()
|
||||
|
||||
|
@ -148,7 +148,7 @@ func (m *bufferManager) Register(channel string, metacache metacache.MetaCache,
|
|||
if ok {
|
||||
return merr.WrapErrChannelReduplicate(channel)
|
||||
}
|
||||
buf, err := NewWriteBuffer(channel, metacache, storageV2Cache, m.syncMgr, opts...)
|
||||
buf, err := NewWriteBuffer(channel, metacache, m.syncMgr, opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -73,13 +73,10 @@ func (s *ManagerSuite) SetupTest() {
|
|||
func (s *ManagerSuite) TestRegister() {
|
||||
manager := s.manager
|
||||
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
||||
s.Require().NoError(err)
|
||||
|
||||
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
|
||||
err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
|
||||
s.NoError(err)
|
||||
|
||||
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
|
||||
err = manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
|
||||
s.Error(err)
|
||||
s.ErrorIs(err, merr.ErrChannelReduplicate)
|
||||
}
|
||||
|
@ -183,9 +180,7 @@ func (s *ManagerSuite) TestRemoveChannel() {
|
|||
})
|
||||
|
||||
s.Run("remove_channel", func() {
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
||||
s.Require().NoError(err)
|
||||
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
|
||||
err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
|
||||
s.Require().NoError(err)
|
||||
|
||||
s.NotPanics(func() {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Code generated by mockery v2.30.1. DO NOT EDIT.
|
||||
// Code generated by mockery v2.32.4. DO NOT EDIT.
|
||||
|
||||
package writebuffer
|
||||
|
||||
|
@ -278,20 +278,20 @@ func (_c *MockBufferManager_NotifyCheckpointUpdated_Call) RunAndReturn(run func(
|
|||
return _c
|
||||
}
|
||||
|
||||
// Register provides a mock function with given fields: channel, _a1, storageV2Cache, opts
|
||||
func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error {
|
||||
// Register provides a mock function with given fields: channel, _a1, opts
|
||||
func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption) error {
|
||||
_va := make([]interface{}, len(opts))
|
||||
for _i := range opts {
|
||||
_va[_i] = opts[_i]
|
||||
}
|
||||
var _ca []interface{}
|
||||
_ca = append(_ca, channel, _a1, storageV2Cache)
|
||||
_ca = append(_ca, channel, _a1)
|
||||
_ca = append(_ca, _va...)
|
||||
ret := _m.Called(_ca...)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error); ok {
|
||||
r0 = rf(channel, _a1, storageV2Cache, opts...)
|
||||
if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, ...WriteBufferOption) error); ok {
|
||||
r0 = rf(channel, _a1, opts...)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
|
@ -307,22 +307,21 @@ type MockBufferManager_Register_Call struct {
|
|||
// Register is a helper method to define mock.On call
|
||||
// - channel string
|
||||
// - _a1 metacache.MetaCache
|
||||
// - storageV2Cache *metacache.StorageV2Cache
|
||||
// - opts ...WriteBufferOption
|
||||
func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, storageV2Cache interface{}, opts ...interface{}) *MockBufferManager_Register_Call {
|
||||
func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, opts ...interface{}) *MockBufferManager_Register_Call {
|
||||
return &MockBufferManager_Register_Call{Call: _e.mock.On("Register",
|
||||
append([]interface{}{channel, _a1, storageV2Cache}, opts...)...)}
|
||||
append([]interface{}{channel, _a1}, opts...)...)}
|
||||
}
|
||||
|
||||
func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call {
|
||||
func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
variadicArgs := make([]WriteBufferOption, len(args)-3)
|
||||
for i, a := range args[3:] {
|
||||
variadicArgs := make([]WriteBufferOption, len(args)-2)
|
||||
for i, a := range args[2:] {
|
||||
if a != nil {
|
||||
variadicArgs[i] = a.(WriteBufferOption)
|
||||
}
|
||||
}
|
||||
run(args[0].(string), args[1].(metacache.MetaCache), args[2].(*metacache.StorageV2Cache), variadicArgs...)
|
||||
run(args[0].(string), args[1].(metacache.MetaCache), variadicArgs...)
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
@ -332,7 +331,7 @@ func (_c *MockBufferManager_Register_Call) Return(_a0 error) *MockBufferManager_
|
|||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error) *MockBufferManager_Register_Call {
|
||||
func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, ...WriteBufferOption) error) *MockBufferManager_Register_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/metrics"
|
||||
|
@ -100,7 +99,7 @@ func (c *checkpointCandidates) GetEarliestWithDefault(def *checkpointCandidate)
|
|||
return result
|
||||
}
|
||||
|
||||
func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) {
|
||||
func NewWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) {
|
||||
option := defaultWBOption(metacache)
|
||||
for _, opt := range opts {
|
||||
opt(option)
|
||||
|
@ -108,9 +107,9 @@ func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cach
|
|||
|
||||
switch option.deletePolicy {
|
||||
case DeletePolicyBFPkOracle:
|
||||
return NewBFWriteBuffer(channel, metacache, storageV2Cache, syncMgr, option)
|
||||
return NewBFWriteBuffer(channel, metacache, syncMgr, option)
|
||||
case DeletePolicyL0Delta:
|
||||
return NewL0WriteBuffer(channel, metacache, storageV2Cache, syncMgr, option)
|
||||
return NewL0WriteBuffer(channel, metacache, syncMgr, option)
|
||||
default:
|
||||
return nil, merr.WrapErrParameterInvalid("valid delete policy config", option.deletePolicy)
|
||||
}
|
||||
|
@ -140,34 +139,23 @@ type writeBufferBase struct {
|
|||
checkpoint *msgpb.MsgPosition
|
||||
flushTimestamp *atomic.Uint64
|
||||
|
||||
storagev2Cache *metacache.StorageV2Cache
|
||||
|
||||
// pre build logger
|
||||
logger *log.MLogger
|
||||
cpRatedLogger *log.MLogger
|
||||
}
|
||||
|
||||
func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) {
|
||||
func newWriteBufferBase(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) {
|
||||
flushTs := atomic.NewUint64(nonFlushTS)
|
||||
flushTsPolicy := GetFlushTsPolicy(flushTs, metacache)
|
||||
option.syncPolicies = append(option.syncPolicies, flushTsPolicy)
|
||||
|
||||
var serializer syncmgr.Serializer
|
||||
var err error
|
||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
serializer, err = syncmgr.NewStorageV2Serializer(
|
||||
storageV2Cache,
|
||||
option.idAllocator,
|
||||
metacache,
|
||||
option.metaWriter,
|
||||
)
|
||||
} else {
|
||||
serializer, err = syncmgr.NewStorageSerializer(
|
||||
option.idAllocator,
|
||||
metacache,
|
||||
option.metaWriter,
|
||||
)
|
||||
}
|
||||
serializer, err = syncmgr.NewStorageSerializer(
|
||||
option.idAllocator,
|
||||
metacache,
|
||||
option.metaWriter,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -201,7 +189,6 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2
|
|||
syncCheckpoint: newCheckpointCandiates(),
|
||||
syncPolicies: option.syncPolicies,
|
||||
flushTimestamp: flushTs,
|
||||
storagev2Cache: storageV2Cache,
|
||||
}
|
||||
|
||||
wb.logger = log.With(zap.Int64("collectionID", wb.collectionID),
|
||||
|
@ -660,8 +647,6 @@ func (wb *writeBufferBase) Close(ctx context.Context, drop bool) {
|
|||
switch t := syncTask.(type) {
|
||||
case *syncmgr.SyncTask:
|
||||
t.WithDrop()
|
||||
case *syncmgr.SyncTaskV2:
|
||||
t.WithDrop()
|
||||
}
|
||||
|
||||
f := wb.syncMgr.SyncData(ctx, syncTask, func(err error) error {
|
||||
|
|
|
@ -22,13 +22,12 @@ import (
|
|||
|
||||
type WriteBufferSuite struct {
|
||||
suite.Suite
|
||||
collID int64
|
||||
channelName string
|
||||
collSchema *schemapb.CollectionSchema
|
||||
wb *writeBufferBase
|
||||
syncMgr *syncmgr.MockSyncManager
|
||||
metacache *metacache.MockMetaCache
|
||||
storageCache *metacache.StorageV2Cache
|
||||
collID int64
|
||||
channelName string
|
||||
collSchema *schemapb.CollectionSchema
|
||||
wb *writeBufferBase
|
||||
syncMgr *syncmgr.MockSyncManager
|
||||
metacache *metacache.MockMetaCache
|
||||
}
|
||||
|
||||
func (s *WriteBufferSuite) SetupSuite() {
|
||||
|
@ -47,14 +46,12 @@ func (s *WriteBufferSuite) SetupSuite() {
|
|||
}
|
||||
|
||||
func (s *WriteBufferSuite) SetupTest() {
|
||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
||||
s.Require().NoError(err)
|
||||
s.storageCache = storageCache
|
||||
s.syncMgr = syncmgr.NewMockSyncManager(s.T())
|
||||
s.metacache = metacache.NewMockMetaCache(s.T())
|
||||
s.metacache.EXPECT().Schema().Return(s.collSchema).Maybe()
|
||||
s.metacache.EXPECT().Collection().Return(s.collID).Maybe()
|
||||
s.wb, err = newWriteBufferBase(s.channelName, s.metacache, storageCache, s.syncMgr, &writeBufferOption{
|
||||
var err error
|
||||
s.wb, err = newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||
return metacache.NewBloomFilterSet()
|
||||
},
|
||||
|
@ -66,7 +63,7 @@ func (s *WriteBufferSuite) TestDefaultOption() {
|
|||
s.Run("default BFPkOracle", func() {
|
||||
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "false")
|
||||
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr)
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr)
|
||||
s.NoError(err)
|
||||
_, ok := wb.(*bfWriteBuffer)
|
||||
s.True(ok)
|
||||
|
@ -75,7 +72,7 @@ func (s *WriteBufferSuite) TestDefaultOption() {
|
|||
s.Run("default L0Delta policy", func() {
|
||||
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "true")
|
||||
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator()))
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator()))
|
||||
s.NoError(err)
|
||||
_, ok := wb.(*l0WriteBuffer)
|
||||
s.True(ok)
|
||||
|
@ -83,18 +80,18 @@ func (s *WriteBufferSuite) TestDefaultOption() {
|
|||
}
|
||||
|
||||
func (s *WriteBufferSuite) TestWriteBufferType() {
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
||||
s.NoError(err)
|
||||
|
||||
_, ok := wb.(*bfWriteBuffer)
|
||||
s.True(ok)
|
||||
|
||||
wb, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator()))
|
||||
wb, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator()))
|
||||
s.NoError(err)
|
||||
_, ok = wb.(*l0WriteBuffer)
|
||||
s.True(ok)
|
||||
|
||||
_, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(""))
|
||||
_, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(""))
|
||||
s.Error(err)
|
||||
}
|
||||
|
||||
|
@ -114,7 +111,7 @@ func (s *WriteBufferSuite) TestFlushSegments() {
|
|||
s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return()
|
||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything, mock.Anything, mock.Anything).Return(nil, true)
|
||||
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
||||
s.NoError(err)
|
||||
|
||||
err = wb.SealSegments(context.Background(), []int64{segmentID})
|
||||
|
@ -265,7 +262,7 @@ func (s *WriteBufferSuite) TestGetCheckpoint() {
|
|||
}
|
||||
|
||||
func (s *WriteBufferSuite) TestSyncSegmentsError() {
|
||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||
return metacache.NewBloomFilterSet()
|
||||
},
|
||||
|
@ -298,7 +295,7 @@ func (s *WriteBufferSuite) TestSyncSegmentsError() {
|
|||
}
|
||||
|
||||
func (s *WriteBufferSuite) TestEvictBuffer() {
|
||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||
return metacache.NewBloomFilterSet()
|
||||
},
|
||||
|
@ -367,7 +364,7 @@ func (s *WriteBufferSuite) TestEvictBuffer() {
|
|||
}
|
||||
|
||||
func (s *WriteBufferSuite) TestDropPartitions() {
|
||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||
return metacache.NewBloomFilterSet()
|
||||
},
|
||||
|
|
|
@ -97,12 +97,7 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest
|
|||
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
|
||||
return merr.Status(err), nil
|
||||
}
|
||||
var task task
|
||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
task = newIndexBuildTaskV2(taskCtx, taskCancel, req, i)
|
||||
} else {
|
||||
task = newIndexBuildTask(taskCtx, taskCancel, req, cm, i)
|
||||
}
|
||||
task := newIndexBuildTask(taskCtx, taskCancel, req, cm, i)
|
||||
ret := merr.Success()
|
||||
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
|
||||
log.Warn("IndexNode failed to schedule",
|
||||
|
@ -327,12 +322,7 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
|
|||
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
|
||||
return merr.Status(err), nil
|
||||
}
|
||||
var task task
|
||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
task = newIndexBuildTaskV2(taskCtx, taskCancel, indexRequest, i)
|
||||
} else {
|
||||
task = newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i)
|
||||
}
|
||||
task := newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i)
|
||||
ret := merr.Success()
|
||||
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
|
||||
log.Warn("IndexNode failed to schedule",
|
||||
|
|
|
@ -43,187 +43,6 @@ import (
|
|||
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
||||
)
|
||||
|
||||
type indexBuildTaskV2 struct {
|
||||
*indexBuildTask
|
||||
}
|
||||
|
||||
func newIndexBuildTaskV2(ctx context.Context,
|
||||
cancel context.CancelFunc,
|
||||
req *indexpb.CreateJobRequest,
|
||||
node *IndexNode,
|
||||
) *indexBuildTaskV2 {
|
||||
t := &indexBuildTaskV2{
|
||||
indexBuildTask: &indexBuildTask{
|
||||
ident: fmt.Sprintf("%s/%d", req.GetClusterID(), req.GetBuildID()),
|
||||
cancel: cancel,
|
||||
ctx: ctx,
|
||||
req: req,
|
||||
tr: timerecord.NewTimeRecorder(fmt.Sprintf("IndexBuildID: %d, ClusterID: %s", req.GetBuildID(), req.GetClusterID())),
|
||||
node: node,
|
||||
},
|
||||
}
|
||||
|
||||
t.parseParams()
|
||||
return t
|
||||
}
|
||||
|
||||
func (it *indexBuildTaskV2) parseParams() {
|
||||
// fill field for requests before v2.5.0
|
||||
if it.req.GetField() == nil || it.req.GetField().GetDataType() == schemapb.DataType_None {
|
||||
it.req.Field = &schemapb.FieldSchema{
|
||||
FieldID: it.req.GetFieldID(),
|
||||
Name: it.req.GetFieldName(),
|
||||
DataType: it.req.GetFieldType(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (it *indexBuildTaskV2) Execute(ctx context.Context) error {
|
||||
log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()),
|
||||
zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()),
|
||||
zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion()))
|
||||
|
||||
indexType := it.newIndexParams[common.IndexTypeKey]
|
||||
if indexType == indexparamcheck.IndexDISKANN {
|
||||
// check index node support disk index
|
||||
if !Params.IndexNodeCfg.EnableDisk.GetAsBool() {
|
||||
log.Warn("IndexNode don't support build disk index",
|
||||
zap.String("index type", it.newIndexParams[common.IndexTypeKey]),
|
||||
zap.Bool("enable disk", Params.IndexNodeCfg.EnableDisk.GetAsBool()))
|
||||
return merr.WrapErrIndexNotSupported("disk index")
|
||||
}
|
||||
|
||||
// check load size and size of field data
|
||||
localUsedSize, err := indexcgowrapper.GetLocalUsedSize(paramtable.Get().LocalStorageCfg.Path.GetValue())
|
||||
if err != nil {
|
||||
log.Warn("IndexNode get local used size failed")
|
||||
return err
|
||||
}
|
||||
fieldDataSize, err := estimateFieldDataSize(it.req.GetDim(), it.req.GetNumRows(), it.req.GetField().GetDataType())
|
||||
if err != nil {
|
||||
log.Warn("IndexNode get local used size failed")
|
||||
return err
|
||||
}
|
||||
usedLocalSizeWhenBuild := int64(float64(fieldDataSize)*diskUsageRatio) + localUsedSize
|
||||
maxUsedLocalSize := int64(Params.IndexNodeCfg.DiskCapacityLimit.GetAsFloat() * Params.IndexNodeCfg.MaxDiskUsagePercentage.GetAsFloat())
|
||||
|
||||
if usedLocalSizeWhenBuild > maxUsedLocalSize {
|
||||
log.Warn("IndexNode don't has enough disk size to build disk ann index",
|
||||
zap.Int64("usedLocalSizeWhenBuild", usedLocalSizeWhenBuild),
|
||||
zap.Int64("maxUsedLocalSize", maxUsedLocalSize))
|
||||
return merr.WrapErrServiceDiskLimitExceeded(float32(usedLocalSizeWhenBuild), float32(maxUsedLocalSize))
|
||||
}
|
||||
|
||||
err = indexparams.SetDiskIndexBuildParams(it.newIndexParams, int64(fieldDataSize))
|
||||
if err != nil {
|
||||
log.Warn("failed to fill disk index params", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
storageConfig := &indexcgopb.StorageConfig{
|
||||
Address: it.req.GetStorageConfig().GetAddress(),
|
||||
AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(),
|
||||
SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(),
|
||||
UseSSL: it.req.GetStorageConfig().GetUseSSL(),
|
||||
BucketName: it.req.GetStorageConfig().GetBucketName(),
|
||||
RootPath: it.req.GetStorageConfig().GetRootPath(),
|
||||
UseIAM: it.req.GetStorageConfig().GetUseIAM(),
|
||||
IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(),
|
||||
StorageType: it.req.GetStorageConfig().GetStorageType(),
|
||||
UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(),
|
||||
Region: it.req.GetStorageConfig().GetRegion(),
|
||||
CloudProvider: it.req.GetStorageConfig().GetCloudProvider(),
|
||||
RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(),
|
||||
SslCACert: it.req.GetStorageConfig().GetSslCACert(),
|
||||
}
|
||||
|
||||
optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields()))
|
||||
for _, optField := range it.req.GetOptionalScalarFields() {
|
||||
optFields = append(optFields, &indexcgopb.OptionalFieldInfo{
|
||||
FieldID: optField.GetFieldID(),
|
||||
FieldName: optField.GetFieldName(),
|
||||
FieldType: optField.GetFieldType(),
|
||||
DataPaths: optField.GetDataPaths(),
|
||||
})
|
||||
}
|
||||
|
||||
buildIndexParams := &indexcgopb.BuildIndexInfo{
|
||||
ClusterID: it.req.GetClusterID(),
|
||||
BuildID: it.req.GetBuildID(),
|
||||
CollectionID: it.req.GetCollectionID(),
|
||||
PartitionID: it.req.GetPartitionID(),
|
||||
SegmentID: it.req.GetSegmentID(),
|
||||
IndexVersion: it.req.GetIndexVersion(),
|
||||
CurrentIndexVersion: it.req.GetCurrentIndexVersion(),
|
||||
NumRows: it.req.GetNumRows(),
|
||||
Dim: it.req.GetDim(),
|
||||
IndexFilePrefix: it.req.GetIndexFilePrefix(),
|
||||
InsertFiles: it.req.GetDataPaths(),
|
||||
FieldSchema: it.req.GetField(),
|
||||
StorageConfig: storageConfig,
|
||||
IndexParams: mapToKVPairs(it.newIndexParams),
|
||||
TypeParams: mapToKVPairs(it.newTypeParams),
|
||||
StorePath: it.req.GetStorePath(),
|
||||
StoreVersion: it.req.GetStoreVersion(),
|
||||
IndexStorePath: it.req.GetIndexStorePath(),
|
||||
OptFields: optFields,
|
||||
PartitionKeyIsolation: it.req.GetPartitionKeyIsolation(),
|
||||
}
|
||||
|
||||
var err error
|
||||
it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexParams)
|
||||
if err != nil {
|
||||
if it.index != nil && it.index.CleanLocalData() != nil {
|
||||
log.Warn("failed to clean cached data on disk after build index failed")
|
||||
}
|
||||
log.Warn("failed to build index", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
buildIndexLatency := it.tr.RecordSpan()
|
||||
metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(buildIndexLatency.Milliseconds()))
|
||||
|
||||
log.Info("Successfully build index")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (it *indexBuildTaskV2) PostExecute(ctx context.Context) error {
|
||||
log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()),
|
||||
zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()),
|
||||
zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion()))
|
||||
|
||||
gcIndex := func() {
|
||||
if err := it.index.Delete(); err != nil {
|
||||
log.Warn("IndexNode indexBuildTask Execute CIndexDelete failed", zap.Error(err))
|
||||
}
|
||||
}
|
||||
version, err := it.index.UpLoadV2()
|
||||
if err != nil {
|
||||
log.Warn("failed to upload index", zap.Error(err))
|
||||
gcIndex()
|
||||
return err
|
||||
}
|
||||
|
||||
encodeIndexFileDur := it.tr.Record("index serialize and upload done")
|
||||
metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(encodeIndexFileDur.Seconds())
|
||||
|
||||
// early release index for gc, and we can ensure that Delete is idempotent.
|
||||
gcIndex()
|
||||
|
||||
// use serialized size before encoding
|
||||
var serializedSize uint64
|
||||
saveFileKeys := make([]string, 0)
|
||||
|
||||
it.node.storeIndexFilesAndStatisticV2(it.req.GetClusterID(), it.req.GetBuildID(), saveFileKeys, serializedSize, it.req.GetCurrentIndexVersion(), version)
|
||||
log.Debug("save index files done", zap.Strings("IndexFiles", saveFileKeys))
|
||||
saveIndexFileDur := it.tr.RecordSpan()
|
||||
metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(saveIndexFileDur.Seconds())
|
||||
it.tr.Elapse("index building all done")
|
||||
log.Info("Successfully save index files")
|
||||
return nil
|
||||
}
|
||||
|
||||
// IndexBuildTask is used to record the information of the index tasks.
|
||||
type indexBuildTask struct {
|
||||
ident string
|
||||
|
|
|
@ -20,21 +20,14 @@ import (
|
|||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
||||
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/dependency"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/util/metautil"
|
||||
"github.com/milvus-io/milvus/pkg/util/metric"
|
||||
|
@ -139,105 +132,6 @@ func TestIndexBuildTask(t *testing.T) {
|
|||
suite.Run(t, new(IndexBuildTaskSuite))
|
||||
}
|
||||
|
||||
type IndexBuildTaskV2Suite struct {
|
||||
suite.Suite
|
||||
schema *schemapb.CollectionSchema
|
||||
arrowSchema *arrow.Schema
|
||||
space *milvus_storage.Space
|
||||
}
|
||||
|
||||
func (suite *IndexBuildTaskV2Suite) SetupSuite() {
|
||||
paramtable.Init()
|
||||
}
|
||||
|
||||
func (suite *IndexBuildTaskV2Suite) SetupTest() {
|
||||
suite.schema = &schemapb.CollectionSchema{
|
||||
Name: "test",
|
||||
Description: "test",
|
||||
AutoID: false,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 1, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
||||
{FieldID: 2, Name: "ts", DataType: schemapb.DataType_Int64},
|
||||
{FieldID: 3, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
|
||||
},
|
||||
}
|
||||
|
||||
var err error
|
||||
suite.arrowSchema, err = typeutil.ConvertToArrowSchema(suite.schema.Fields)
|
||||
suite.NoError(err)
|
||||
|
||||
tmpDir := suite.T().TempDir()
|
||||
opt := options.NewSpaceOptionBuilder().
|
||||
SetSchema(schema.NewSchema(
|
||||
suite.arrowSchema,
|
||||
&schema.SchemaOptions{
|
||||
PrimaryColumn: "pk",
|
||||
VectorColumn: "vec",
|
||||
VersionColumn: "ts",
|
||||
})).
|
||||
Build()
|
||||
suite.space, err = milvus_storage.Open("file://"+tmpDir, opt)
|
||||
suite.NoError(err)
|
||||
|
||||
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.arrowSchema)
|
||||
defer b.Release()
|
||||
b.Field(0).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
||||
b.Field(1).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
||||
fb := b.Field(2).(*array.FixedSizeBinaryBuilder)
|
||||
fb.Reserve(1)
|
||||
fb.Append([]byte{1, 2, 3, 4})
|
||||
|
||||
rec := b.NewRecord()
|
||||
defer rec.Release()
|
||||
reader, err := array.NewRecordReader(suite.arrowSchema, []arrow.Record{rec})
|
||||
suite.NoError(err)
|
||||
err = suite.space.Write(reader, &options.DefaultWriteOptions)
|
||||
suite.NoError(err)
|
||||
}
|
||||
|
||||
func (suite *IndexBuildTaskV2Suite) TestBuildIndex() {
|
||||
req := &indexpb.CreateJobRequest{
|
||||
BuildID: 1,
|
||||
IndexVersion: 1,
|
||||
IndexID: 0,
|
||||
IndexName: "",
|
||||
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}, {Key: common.DimKey, Value: "1"}},
|
||||
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}},
|
||||
NumRows: 10,
|
||||
StorageConfig: &indexpb.StorageConfig{
|
||||
RootPath: "/tmp/milvus/data",
|
||||
StorageType: "local",
|
||||
},
|
||||
CollectionID: 1,
|
||||
PartitionID: 1,
|
||||
SegmentID: 1,
|
||||
FieldID: 3,
|
||||
FieldName: "vec",
|
||||
FieldType: schemapb.DataType_FloatVector,
|
||||
StorePath: "file://" + suite.space.Path(),
|
||||
StoreVersion: suite.space.GetCurrentVersion(),
|
||||
IndexStorePath: "file://" + suite.space.Path(),
|
||||
Dim: 4,
|
||||
OptionalScalarFields: []*indexpb.OptionalFieldInfo{
|
||||
{FieldID: 1, FieldName: "pk", FieldType: 5, DataIds: []int64{0}},
|
||||
},
|
||||
}
|
||||
|
||||
task := newIndexBuildTaskV2(context.Background(), nil, req, NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)))
|
||||
|
||||
var err error
|
||||
err = task.PreExecute(context.Background())
|
||||
suite.NoError(err)
|
||||
err = task.Execute(context.Background())
|
||||
suite.NoError(err)
|
||||
err = task.PostExecute(context.Background())
|
||||
suite.NoError(err)
|
||||
}
|
||||
|
||||
func TestIndexBuildTaskV2Suite(t *testing.T) {
|
||||
suite.Run(t, new(IndexBuildTaskV2Suite))
|
||||
}
|
||||
|
||||
type AnalyzeTaskSuite struct {
|
||||
suite.Suite
|
||||
schema *schemapb.CollectionSchema
|
||||
|
|
|
@ -222,13 +222,9 @@ func (li *LoadIndexInfo) appendIndexData(ctx context.Context, indexKeys []string
|
|||
|
||||
var status C.CStatus
|
||||
GetLoadPool().Submit(func() (any, error) {
|
||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
status = C.AppendIndexV3(li.cLoadIndexInfo)
|
||||
} else {
|
||||
traceCtx := ParseCTraceContext(ctx)
|
||||
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
||||
runtime.KeepAlive(traceCtx)
|
||||
}
|
||||
traceCtx := ParseCTraceContext(ctx)
|
||||
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
||||
runtime.KeepAlive(traceCtx)
|
||||
return nil, nil
|
||||
}).Await()
|
||||
|
||||
|
@ -265,13 +261,9 @@ func (li *LoadIndexInfo) finish(ctx context.Context, info *cgopb.LoadIndexInfo)
|
|||
}
|
||||
|
||||
_, _ = GetLoadPool().Submit(func() (any, error) {
|
||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
status = C.AppendIndexV3(li.cLoadIndexInfo)
|
||||
} else {
|
||||
traceCtx := ParseCTraceContext(ctx)
|
||||
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
||||
runtime.KeepAlive(traceCtx)
|
||||
}
|
||||
traceCtx := ParseCTraceContext(ctx)
|
||||
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
||||
runtime.KeepAlive(traceCtx)
|
||||
return nil, nil
|
||||
}).Await()
|
||||
|
||||
|
|
|
@ -29,12 +29,10 @@ import "C"
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"runtime"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.uber.org/atomic"
|
||||
|
@ -44,8 +42,6 @@ import (
|
|||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/proto/cgopb"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
|
@ -55,7 +51,6 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/querynodev2/segments/state"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/cgo"
|
||||
typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/metrics"
|
||||
|
@ -259,7 +254,6 @@ type LocalSegment struct {
|
|||
lastDeltaTimestamp *atomic.Uint64
|
||||
fields *typeutil.ConcurrentMap[int64, *FieldInfo]
|
||||
fieldIndexes *typeutil.ConcurrentMap[int64, *IndexedFieldInfo]
|
||||
space *milvus_storage.Space
|
||||
}
|
||||
|
||||
func NewSegment(ctx context.Context,
|
||||
|
@ -336,76 +330,6 @@ func NewSegment(ctx context.Context,
|
|||
return segment, nil
|
||||
}
|
||||
|
||||
func NewSegmentV2(
|
||||
ctx context.Context,
|
||||
collection *Collection,
|
||||
segmentType SegmentType,
|
||||
version int64,
|
||||
loadInfo *querypb.SegmentLoadInfo,
|
||||
) (Segment, error) {
|
||||
/*
|
||||
CSegmentInterface
|
||||
NewSegment(CCollection collection, uint64_t segment_id, SegmentType seg_type);
|
||||
*/
|
||||
if loadInfo.GetLevel() == datapb.SegmentLevel_L0 {
|
||||
return NewL0Segment(collection, segmentType, version, loadInfo)
|
||||
}
|
||||
base, err := newBaseSegment(collection, segmentType, version, loadInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var segmentPtr C.CSegmentInterface
|
||||
var status C.CStatus
|
||||
var locker *state.LoadStateLock
|
||||
switch segmentType {
|
||||
case SegmentTypeSealed:
|
||||
status = C.NewSegment(collection.collectionPtr, C.Sealed, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr)
|
||||
locker = state.NewLoadStateLock(state.LoadStateOnlyMeta)
|
||||
case SegmentTypeGrowing:
|
||||
status = C.NewSegment(collection.collectionPtr, C.Growing, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr)
|
||||
locker = state.NewLoadStateLock(state.LoadStateDataLoaded)
|
||||
default:
|
||||
return nil, fmt.Errorf("illegal segment type %d when create segment %d", segmentType, loadInfo.GetSegmentID())
|
||||
}
|
||||
|
||||
if err := HandleCStatus(ctx, &status, "NewSegmentFailed"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Info("create segment",
|
||||
zap.Int64("collectionID", loadInfo.GetCollectionID()),
|
||||
zap.Int64("partitionID", loadInfo.GetPartitionID()),
|
||||
zap.Int64("segmentID", loadInfo.GetSegmentID()),
|
||||
zap.String("segmentType", segmentType.String()))
|
||||
|
||||
url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), loadInfo.GetSegmentID())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(loadInfo.GetStorageVersion()).Build())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
segment := &LocalSegment{
|
||||
baseSegment: base,
|
||||
ptrLock: locker,
|
||||
ptr: segmentPtr,
|
||||
lastDeltaTimestamp: atomic.NewUint64(0),
|
||||
fields: typeutil.NewConcurrentMap[int64, *FieldInfo](),
|
||||
fieldIndexes: typeutil.NewConcurrentMap[int64, *IndexedFieldInfo](),
|
||||
space: space,
|
||||
memSize: atomic.NewInt64(-1),
|
||||
rowNum: atomic.NewInt64(-1),
|
||||
insertCount: atomic.NewInt64(0),
|
||||
}
|
||||
|
||||
if err := segment.initializeSegment(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return segment, nil
|
||||
}
|
||||
|
||||
func (s *LocalSegment) initializeSegment() error {
|
||||
loadInfo := s.loadInfo.Load()
|
||||
indexedFieldInfos, fieldBinlogs := separateIndexAndBinlog(loadInfo)
|
||||
|
@ -932,18 +856,7 @@ func (s *LocalSegment) LoadMultiFieldData(ctx context.Context) error {
|
|||
|
||||
var status C.CStatus
|
||||
GetLoadPool().Submit(func() (any, error) {
|
||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
loadFieldDataInfo.appendURI(uri)
|
||||
loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion())
|
||||
status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||
} else {
|
||||
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||
}
|
||||
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||
return nil, nil
|
||||
}).Await()
|
||||
if err := HandleCStatus(ctx, &status, "LoadMultiFieldData failed",
|
||||
|
@ -1019,18 +932,7 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun
|
|||
var status C.CStatus
|
||||
GetLoadPool().Submit(func() (any, error) {
|
||||
log.Info("submitted loadFieldData task to load pool")
|
||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
loadFieldDataInfo.appendURI(uri)
|
||||
loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion())
|
||||
status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||
} else {
|
||||
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||
}
|
||||
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||
return nil, nil
|
||||
}).Await()
|
||||
if err := HandleCStatus(ctx, &status, "LoadFieldData failed",
|
||||
|
@ -1046,95 +948,6 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *LocalSegment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error {
|
||||
deleteReader, err := s.space.ScanDelete()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !deleteReader.Schema().HasField(common.TimeStampFieldName) {
|
||||
return fmt.Errorf("can not read timestamp field in space")
|
||||
}
|
||||
pkFieldSchema, err := typeutil.GetPrimaryFieldSchema(schema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ids := &schemapb.IDs{}
|
||||
var pkint64s []int64
|
||||
var pkstrings []string
|
||||
var tss []int64
|
||||
for deleteReader.Next() {
|
||||
rec := deleteReader.Record()
|
||||
indices := rec.Schema().FieldIndices(common.TimeStampFieldName)
|
||||
tss = append(tss, rec.Column(indices[0]).(*array.Int64).Int64Values()...)
|
||||
indices = rec.Schema().FieldIndices(pkFieldSchema.Name)
|
||||
switch pkFieldSchema.DataType {
|
||||
case schemapb.DataType_Int64:
|
||||
pkint64s = append(pkint64s, rec.Column(indices[0]).(*array.Int64).Int64Values()...)
|
||||
case schemapb.DataType_VarChar:
|
||||
columnData := rec.Column(indices[0]).(*array.String)
|
||||
for i := 0; i < columnData.Len(); i++ {
|
||||
pkstrings = append(pkstrings, columnData.Value(i))
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType)
|
||||
}
|
||||
}
|
||||
if err := deleteReader.Err(); err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
|
||||
switch pkFieldSchema.DataType {
|
||||
case schemapb.DataType_Int64:
|
||||
ids.IdField = &schemapb.IDs_IntId{
|
||||
IntId: &schemapb.LongArray{
|
||||
Data: pkint64s,
|
||||
},
|
||||
}
|
||||
case schemapb.DataType_VarChar:
|
||||
ids.IdField = &schemapb.IDs_StrId{
|
||||
StrId: &schemapb.StringArray{
|
||||
Data: pkstrings,
|
||||
},
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType)
|
||||
}
|
||||
|
||||
idsBlob, err := proto.Marshal(ids)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(tss) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
loadInfo := C.CLoadDeletedRecordInfo{
|
||||
timestamps: unsafe.Pointer(&tss[0]),
|
||||
primary_keys: (*C.uint8_t)(unsafe.Pointer(&idsBlob[0])),
|
||||
primary_keys_size: C.uint64_t(len(idsBlob)),
|
||||
row_count: C.int64_t(len(tss)),
|
||||
}
|
||||
/*
|
||||
CStatus
|
||||
LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_record_info)
|
||||
*/
|
||||
var status C.CStatus
|
||||
GetDynamicPool().Submit(func() (any, error) {
|
||||
status = C.LoadDeletedRecord(s.ptr, loadInfo)
|
||||
return nil, nil
|
||||
}).Await()
|
||||
|
||||
if err := HandleCStatus(ctx, &status, "LoadDeletedRecord failed"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info("load deleted record done",
|
||||
zap.Int("rowNum", len(tss)),
|
||||
zap.String("segmentType", s.Type().String()))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *LocalSegment) AddFieldDataInfo(ctx context.Context, rowCount int64, fields []*datapb.FieldBinlog) error {
|
||||
if !s.ptrLock.RLockIf(state.IsNotReleased) {
|
||||
return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released")
|
||||
|
@ -1331,13 +1144,6 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn
|
|||
IndexStoreVersion: indexInfo.GetIndexStoreVersion(),
|
||||
}
|
||||
|
||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
indexInfoProto.Uri = uri
|
||||
}
|
||||
newLoadIndexInfoSpan := tr.RecordSpan()
|
||||
|
||||
// 2.
|
||||
|
|
|
@ -20,7 +20,6 @@ import (
|
|||
"context"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
||||
|
@ -79,7 +78,6 @@ type Segment interface {
|
|||
Insert(ctx context.Context, rowIDs []int64, timestamps []typeutil.Timestamp, record *segcorepb.InsertRecord) error
|
||||
Delete(ctx context.Context, primaryKeys []storage.PrimaryKey, timestamps []typeutil.Timestamp) error
|
||||
LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error
|
||||
LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error // storageV2
|
||||
LastDeltaTimestamp() uint64
|
||||
Release(ctx context.Context, opts ...releaseOption)
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ import (
|
|||
"github.com/samber/lo"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
||||
|
@ -161,10 +160,6 @@ func (s *L0Segment) LoadDeltaData(ctx context.Context, deltaData *storage.Delete
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *L0Segment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error {
|
||||
return merr.WrapErrServiceInternal("not implemented")
|
||||
}
|
||||
|
||||
func (s *L0Segment) DeleteRecords() ([]storage.PrimaryKey, []uint64) {
|
||||
s.dataGuard.RLock()
|
||||
defer s.dataGuard.RUnlock()
|
||||
|
|
|
@ -27,7 +27,6 @@ import "C"
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"path"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
|
@ -43,14 +42,11 @@ import (
|
|||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querynodev2/pkoracle"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/metrics"
|
||||
|
@ -126,406 +122,6 @@ type resourceEstimateFactor struct {
|
|||
deltaDataExpansionFactor float64
|
||||
}
|
||||
|
||||
type segmentLoaderV2 struct {
|
||||
*segmentLoader
|
||||
}
|
||||
|
||||
func NewLoaderV2(
|
||||
manager *Manager,
|
||||
cm storage.ChunkManager,
|
||||
) *segmentLoaderV2 {
|
||||
return &segmentLoaderV2{
|
||||
segmentLoader: NewLoader(manager, cm),
|
||||
}
|
||||
}
|
||||
|
||||
func (loader *segmentLoaderV2) LoadDelta(ctx context.Context, collectionID int64, segment Segment) error {
|
||||
collection := loader.manager.Collection.Get(collectionID)
|
||||
if collection == nil {
|
||||
err := merr.WrapErrCollectionNotFound(collectionID)
|
||||
log.Warn("failed to get collection while loading delta", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
return segment.LoadDeltaData2(ctx, collection.Schema())
|
||||
}
|
||||
|
||||
func (loader *segmentLoaderV2) Load(ctx context.Context,
|
||||
collectionID int64,
|
||||
segmentType SegmentType,
|
||||
version int64,
|
||||
segments ...*querypb.SegmentLoadInfo,
|
||||
) ([]Segment, error) {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.Int64("collectionID", collectionID),
|
||||
zap.String("segmentType", segmentType.String()),
|
||||
)
|
||||
|
||||
if len(segments) == 0 {
|
||||
log.Info("no segment to load")
|
||||
return nil, nil
|
||||
}
|
||||
// Filter out loaded & loading segments
|
||||
infos := loader.prepare(ctx, segmentType, segments...)
|
||||
defer loader.unregister(infos...)
|
||||
|
||||
log = log.With(
|
||||
zap.Int64s("requestSegments", lo.Map(segments, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
|
||||
zap.Int64s("preparedSegments", lo.Map(infos, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
|
||||
)
|
||||
|
||||
// continue to wait other task done
|
||||
log.Info("start loading...", zap.Int("segmentNum", len(segments)), zap.Int("afterFilter", len(infos)))
|
||||
|
||||
// Check memory & storage limit
|
||||
requestResourceResult, err := loader.requestResource(ctx, infos...)
|
||||
if err != nil {
|
||||
log.Warn("request resource failed", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
defer loader.freeRequest(requestResourceResult.Resource)
|
||||
|
||||
newSegments := typeutil.NewConcurrentMap[int64, Segment]()
|
||||
loaded := typeutil.NewConcurrentMap[int64, Segment]()
|
||||
defer func() {
|
||||
newSegments.Range(func(_ int64, s Segment) bool {
|
||||
s.Release(context.Background())
|
||||
return true
|
||||
})
|
||||
debug.FreeOSMemory()
|
||||
}()
|
||||
|
||||
for _, info := range infos {
|
||||
loadInfo := info
|
||||
|
||||
collection := loader.manager.Collection.Get(loadInfo.GetCollectionID())
|
||||
if collection == nil {
|
||||
err := merr.WrapErrCollectionNotFound(loadInfo.GetCollectionID())
|
||||
log.Warn("failed to get collection", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
segment, err := NewSegmentV2(ctx, collection, segmentType, version, loadInfo)
|
||||
if err != nil {
|
||||
log.Warn("load segment failed when create new segment",
|
||||
zap.Int64("partitionID", loadInfo.GetPartitionID()),
|
||||
zap.Int64("segmentID", loadInfo.GetSegmentID()),
|
||||
zap.Error(err),
|
||||
)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newSegments.Insert(loadInfo.GetSegmentID(), segment)
|
||||
}
|
||||
|
||||
loadSegmentFunc := func(idx int) error {
|
||||
loadInfo := infos[idx]
|
||||
partitionID := loadInfo.PartitionID
|
||||
segmentID := loadInfo.SegmentID
|
||||
segment, _ := newSegments.Get(segmentID)
|
||||
|
||||
metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Inc()
|
||||
defer metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Dec()
|
||||
tr := timerecord.NewTimeRecorder("loadDurationPerSegment")
|
||||
|
||||
var err error
|
||||
if loadInfo.GetLevel() == datapb.SegmentLevel_L0 {
|
||||
err = loader.LoadDelta(ctx, collectionID, segment)
|
||||
} else {
|
||||
err = loader.LoadSegment(ctx, segment.(*LocalSegment), loadInfo)
|
||||
}
|
||||
if err != nil {
|
||||
log.Warn("load segment failed when load data into memory",
|
||||
zap.Int64("partitionID", partitionID),
|
||||
zap.Int64("segmentID", segmentID),
|
||||
zap.Error(err),
|
||||
)
|
||||
return err
|
||||
}
|
||||
loader.manager.Segment.Put(ctx, segmentType, segment)
|
||||
newSegments.GetAndRemove(segmentID)
|
||||
loaded.Insert(segmentID, segment)
|
||||
log.Info("load segment done", zap.Int64("segmentID", segmentID))
|
||||
loader.notifyLoadFinish(loadInfo)
|
||||
|
||||
metrics.QueryNodeLoadSegmentLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Start to load,
|
||||
// Make sure we can always benefit from concurrency, and not spawn too many idle goroutines
|
||||
log.Info("start to load segments in parallel",
|
||||
zap.Int("segmentNum", len(infos)),
|
||||
zap.Int("concurrencyLevel", requestResourceResult.ConcurrencyLevel))
|
||||
err = funcutil.ProcessFuncParallel(len(infos),
|
||||
requestResourceResult.ConcurrencyLevel, loadSegmentFunc, "loadSegmentFunc")
|
||||
if err != nil {
|
||||
log.Warn("failed to load some segments", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Wait for all segments loaded
|
||||
segmentIDs := lo.Map(segments, func(info *querypb.SegmentLoadInfo, _ int) int64 { return info.GetSegmentID() })
|
||||
if err := loader.waitSegmentLoadDone(ctx, segmentType, segmentIDs, version); err != nil {
|
||||
log.Warn("failed to wait the filtered out segments load done", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Info("all segment load done")
|
||||
var result []Segment
|
||||
loaded.Range(func(_ int64, s Segment) bool {
|
||||
result = append(result, s)
|
||||
return true
|
||||
})
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (loader *segmentLoaderV2) LoadBloomFilterSet(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) ([]*pkoracle.BloomFilterSet, error) {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.Int64("collectionID", collectionID),
|
||||
zap.Int64s("segmentIDs", lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) int64 {
|
||||
return info.GetSegmentID()
|
||||
})),
|
||||
)
|
||||
|
||||
segmentNum := len(infos)
|
||||
if segmentNum == 0 {
|
||||
log.Info("no segment to load")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
collection := loader.manager.Collection.Get(collectionID)
|
||||
if collection == nil {
|
||||
err := merr.WrapErrCollectionNotFound(collectionID)
|
||||
log.Warn("failed to get collection while loading segment", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Info("start loading remote...", zap.Int("segmentNum", segmentNum))
|
||||
|
||||
loadedBfs := typeutil.NewConcurrentSet[*pkoracle.BloomFilterSet]()
|
||||
// TODO check memory for bf size
|
||||
loadRemoteFunc := func(idx int) error {
|
||||
loadInfo := infos[idx]
|
||||
partitionID := loadInfo.PartitionID
|
||||
segmentID := loadInfo.SegmentID
|
||||
bfs := pkoracle.NewBloomFilterSet(segmentID, partitionID, commonpb.SegmentState_Sealed)
|
||||
|
||||
log.Info("loading bloom filter for remote...")
|
||||
err := loader.loadBloomFilter(ctx, segmentID, bfs, loadInfo.StorageVersion)
|
||||
if err != nil {
|
||||
log.Warn("load remote segment bloom filter failed",
|
||||
zap.Int64("partitionID", partitionID),
|
||||
zap.Int64("segmentID", segmentID),
|
||||
zap.Error(err),
|
||||
)
|
||||
return err
|
||||
}
|
||||
loadedBfs.Insert(bfs)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
err := funcutil.ProcessFuncParallel(segmentNum, segmentNum, loadRemoteFunc, "loadRemoteFunc")
|
||||
if err != nil {
|
||||
// no partial success here
|
||||
log.Warn("failed to load remote segment", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return loadedBfs.Collect(), nil
|
||||
}
|
||||
|
||||
func (loader *segmentLoaderV2) loadBloomFilter(ctx context.Context, segmentID int64, bfs *pkoracle.BloomFilterSet,
|
||||
storeVersion int64,
|
||||
) error {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.Int64("segmentID", segmentID),
|
||||
)
|
||||
|
||||
startTs := time.Now()
|
||||
|
||||
url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), segmentID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(storeVersion).Build())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
statsBlobs := space.StatisticsBlobs()
|
||||
blobs := []*storage.Blob{}
|
||||
|
||||
for _, statsBlob := range statsBlobs {
|
||||
blob := make([]byte, statsBlob.Size)
|
||||
_, err := space.ReadBlob(statsBlob.Name, blob)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
|
||||
blobs = append(blobs, &storage.Blob{Value: blob})
|
||||
}
|
||||
|
||||
var stats []*storage.PrimaryKeyStats
|
||||
|
||||
stats, err = storage.DeserializeStats(blobs)
|
||||
if err != nil {
|
||||
log.Warn("failed to deserialize stats", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
var size uint
|
||||
for _, stat := range stats {
|
||||
pkStat := &storage.PkStatistics{
|
||||
PkFilter: stat.BF,
|
||||
MinPK: stat.MinPk,
|
||||
MaxPK: stat.MaxPk,
|
||||
}
|
||||
size += stat.BF.Cap()
|
||||
bfs.AddHistoricalStats(pkStat)
|
||||
}
|
||||
log.Info("Successfully load pk stats", zap.Duration("time", time.Since(startTs)), zap.Uint("size", size), zap.Int("BFNum", len(stats)))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (loader *segmentLoaderV2) LoadSegment(ctx context.Context,
|
||||
seg Segment,
|
||||
loadInfo *querypb.SegmentLoadInfo,
|
||||
) (err error) {
|
||||
segment := seg.(*LocalSegment)
|
||||
// TODO: we should create a transaction-like api to load segment for segment interface,
|
||||
// but not do many things in segment loader.
|
||||
stateLockGuard, err := segment.StartLoadData()
|
||||
// segment can not do load now.
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
// segment is already loaded.
|
||||
// TODO: if stateLockGuard is nil, we should not call LoadSegment anymore.
|
||||
// but current Load is not clear enough to do an actual state transition, keep previous logic to avoid introduced bug.
|
||||
if stateLockGuard != nil {
|
||||
stateLockGuard.Done(err)
|
||||
}
|
||||
}()
|
||||
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.Int64("collectionID", segment.Collection()),
|
||||
zap.Int64("partitionID", segment.Partition()),
|
||||
zap.String("shard", segment.Shard().VirtualName()),
|
||||
zap.Int64("segmentID", segment.ID()),
|
||||
)
|
||||
log.Info("start loading segment files",
|
||||
zap.Int64("rowNum", loadInfo.GetNumOfRows()),
|
||||
zap.String("segmentType", segment.Type().String()))
|
||||
|
||||
collection := loader.manager.Collection.Get(segment.Collection())
|
||||
if collection == nil {
|
||||
err := merr.WrapErrCollectionNotFound(segment.Collection())
|
||||
log.Warn("failed to get collection while loading segment", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
// pkField := GetPkField(collection.Schema())
|
||||
|
||||
// TODO(xige-16): Optimize the data loading process and reduce data copying
|
||||
// for now, there will be multiple copies in the process of data loading into segCore
|
||||
defer debug.FreeOSMemory()
|
||||
|
||||
if segment.Type() == SegmentTypeSealed {
|
||||
fieldsMap := typeutil.NewConcurrentMap[int64, *schemapb.FieldSchema]()
|
||||
for _, field := range collection.Schema().GetFields() {
|
||||
fieldsMap.Insert(field.FieldID, field)
|
||||
}
|
||||
// fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
||||
indexedFieldInfos := make(map[int64]*IndexedFieldInfo)
|
||||
for _, indexInfo := range loadInfo.IndexInfos {
|
||||
if indexInfo.GetIndexStoreVersion() > 0 {
|
||||
fieldID := indexInfo.FieldID
|
||||
fieldInfo := &IndexedFieldInfo{
|
||||
IndexInfo: indexInfo,
|
||||
}
|
||||
indexedFieldInfos[fieldID] = fieldInfo
|
||||
fieldsMap.Remove(fieldID)
|
||||
// fieldID2IndexInfo[fieldID] = indexInfo
|
||||
}
|
||||
}
|
||||
|
||||
if err := segment.AddFieldDataInfo(ctx, loadInfo.GetNumOfRows(), loadInfo.GetBinlogPaths()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info("load fields...",
|
||||
zap.Int("fieldNum", fieldsMap.Len()),
|
||||
zap.Int64s("indexedFields", lo.Keys(indexedFieldInfos)),
|
||||
)
|
||||
|
||||
schemaHelper, err := typeutil.CreateSchemaHelper(collection.Schema())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tr := timerecord.NewTimeRecorder("segmentLoader.LoadIndex")
|
||||
if err := loader.loadFieldsIndex(ctx, schemaHelper, segment, loadInfo.GetNumOfRows(), indexedFieldInfos); err != nil {
|
||||
return err
|
||||
}
|
||||
metrics.QueryNodeLoadIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
|
||||
if err := loader.loadSealedSegmentFields(ctx, segment, fieldsMap, loadInfo.GetNumOfRows()); err != nil {
|
||||
return err
|
||||
}
|
||||
// https://github.com/milvus-io/milvus/23654
|
||||
// legacy entry num = 0
|
||||
if err := loader.patchEntryNumber(ctx, segment, loadInfo); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := segment.LoadMultiFieldData(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// load statslog if it's growing segment
|
||||
if segment.segmentType == SegmentTypeGrowing {
|
||||
log.Info("loading statslog...")
|
||||
// pkStatsBinlogs, logType := loader.filterPKStatsBinlogs(loadInfo.Statslogs, pkField.GetFieldID())
|
||||
err := loader.loadBloomFilter(ctx, segment.ID(), segment.bloomFilterSet, loadInfo.StorageVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("loading delta...")
|
||||
return loader.LoadDelta(ctx, segment.Collection(), segment)
|
||||
}
|
||||
|
||||
func (loader *segmentLoaderV2) LoadLazySegment(ctx context.Context,
|
||||
segment Segment,
|
||||
loadInfo *querypb.SegmentLoadInfo,
|
||||
) (err error) {
|
||||
return merr.ErrOperationNotSupported
|
||||
}
|
||||
|
||||
func (loader *segmentLoaderV2) loadSealedSegmentFields(ctx context.Context, segment *LocalSegment, fields *typeutil.ConcurrentMap[int64, *schemapb.FieldSchema], rowCount int64) error {
|
||||
runningGroup, _ := errgroup.WithContext(ctx)
|
||||
fields.Range(func(fieldID int64, field *schemapb.FieldSchema) bool {
|
||||
runningGroup.Go(func() error {
|
||||
return segment.LoadFieldData(ctx, fieldID, rowCount, nil, false)
|
||||
})
|
||||
return true
|
||||
})
|
||||
|
||||
err := runningGroup.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Ctx(ctx).Info("load field binlogs done for sealed segment",
|
||||
zap.Int64("collection", segment.Collection()),
|
||||
zap.Int64("segment", segment.ID()),
|
||||
zap.String("segmentType", segment.Type().String()))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewLoader(
|
||||
manager *Manager,
|
||||
cm storage.ChunkManager,
|
||||
|
|
|
@ -23,9 +23,6 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
@ -33,14 +30,10 @@ import (
|
|||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/initcore"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/util/contextutil"
|
||||
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
||||
|
@ -911,152 +904,3 @@ func TestSegmentLoader(t *testing.T) {
|
|||
suite.Run(t, &SegmentLoaderSuite{})
|
||||
suite.Run(t, &SegmentLoaderDetailSuite{})
|
||||
}
|
||||
|
||||
type SegmentLoaderV2Suite struct {
|
||||
suite.Suite
|
||||
loader *segmentLoaderV2
|
||||
|
||||
// Dependencies
|
||||
manager *Manager
|
||||
rootPath string
|
||||
chunkManager storage.ChunkManager
|
||||
|
||||
// Data
|
||||
collectionID int64
|
||||
partitionID int64
|
||||
segmentID int64
|
||||
schema *schemapb.CollectionSchema
|
||||
segmentNum int
|
||||
}
|
||||
|
||||
func (suite *SegmentLoaderV2Suite) SetupSuite() {
|
||||
paramtable.Init()
|
||||
suite.rootPath = suite.T().Name()
|
||||
suite.collectionID = rand.Int63()
|
||||
suite.partitionID = rand.Int63()
|
||||
suite.segmentID = rand.Int63()
|
||||
suite.segmentNum = 5
|
||||
}
|
||||
|
||||
func (suite *SegmentLoaderV2Suite) SetupTest() {
|
||||
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true")
|
||||
// Dependencies
|
||||
suite.manager = NewManager()
|
||||
ctx := context.Background()
|
||||
// TODO:: cpp chunk manager not support local chunk manager
|
||||
// suite.chunkManager = storage.NewLocalChunkManager(storage.RootPath(
|
||||
// fmt.Sprintf("/tmp/milvus-ut/%d", rand.Int63())))
|
||||
chunkManagerFactory := storage.NewTestChunkManagerFactory(paramtable.Get(), suite.rootPath)
|
||||
suite.chunkManager, _ = chunkManagerFactory.NewPersistentStorageChunkManager(ctx)
|
||||
suite.loader = NewLoaderV2(suite.manager, suite.chunkManager)
|
||||
initcore.InitRemoteChunkManager(paramtable.Get())
|
||||
|
||||
// Data
|
||||
suite.schema = GenTestCollectionSchema("test", schemapb.DataType_Int64, false)
|
||||
indexMeta := GenTestIndexMeta(suite.collectionID, suite.schema)
|
||||
loadMeta := &querypb.LoadMetaInfo{
|
||||
LoadType: querypb.LoadType_LoadCollection,
|
||||
CollectionID: suite.collectionID,
|
||||
PartitionIDs: []int64{suite.partitionID},
|
||||
}
|
||||
suite.manager.Collection.PutOrRef(suite.collectionID, suite.schema, indexMeta, loadMeta)
|
||||
}
|
||||
|
||||
func (suite *SegmentLoaderV2Suite) TearDownTest() {
|
||||
ctx := context.Background()
|
||||
for i := 0; i < suite.segmentNum; i++ {
|
||||
suite.manager.Segment.Remove(context.Background(), suite.segmentID+int64(i), querypb.DataScope_All)
|
||||
}
|
||||
suite.chunkManager.RemoveWithPrefix(ctx, suite.rootPath)
|
||||
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
||||
}
|
||||
|
||||
func (suite *SegmentLoaderV2Suite) TestLoad() {
|
||||
tmpDir := suite.T().TempDir()
|
||||
paramtable.Get().CommonCfg.StorageScheme.SwapTempValue("file")
|
||||
paramtable.Get().CommonCfg.StoragePathPrefix.SwapTempValue(tmpDir)
|
||||
ctx := context.Background()
|
||||
|
||||
msgLength := 4
|
||||
|
||||
arrowSchema, err := typeutil.ConvertToArrowSchema(suite.schema.Fields)
|
||||
suite.NoError(err)
|
||||
opt := options.NewSpaceOptionBuilder().
|
||||
SetSchema(schema.NewSchema(
|
||||
arrowSchema,
|
||||
&schema.SchemaOptions{
|
||||
PrimaryColumn: "int64Field",
|
||||
VectorColumn: "floatVectorField",
|
||||
VersionColumn: "Timestamp",
|
||||
})).
|
||||
Build()
|
||||
uri, err := typeutil.GetStorageURI("file", tmpDir, suite.segmentID)
|
||||
suite.NoError(err)
|
||||
space, err := milvus_storage.Open(uri, opt)
|
||||
suite.NoError(err)
|
||||
|
||||
b := array.NewRecordBuilder(memory.DefaultAllocator, arrowSchema)
|
||||
defer b.Release()
|
||||
insertData, err := genInsertData(msgLength, suite.schema)
|
||||
suite.NoError(err)
|
||||
|
||||
err = typeutil.BuildRecord(b, insertData, suite.schema.Fields)
|
||||
suite.NoError(err)
|
||||
rec := b.NewRecord()
|
||||
defer rec.Release()
|
||||
reader, err := array.NewRecordReader(arrowSchema, []arrow.Record{rec})
|
||||
suite.NoError(err)
|
||||
err = space.Write(reader, &options.DefaultWriteOptions)
|
||||
suite.NoError(err)
|
||||
|
||||
collMeta := genCollectionMeta(suite.collectionID, suite.partitionID, suite.schema)
|
||||
inCodec := storage.NewInsertCodecWithSchema(collMeta)
|
||||
statsLog, err := inCodec.SerializePkStatsByData(insertData)
|
||||
suite.NoError(err)
|
||||
|
||||
err = space.WriteBlob(statsLog.Value, statsLog.Key, false)
|
||||
suite.NoError(err)
|
||||
|
||||
dschema := space.Manifest().GetSchema().DeleteSchema()
|
||||
dbuilder := array.NewRecordBuilder(memory.DefaultAllocator, dschema)
|
||||
defer dbuilder.Release()
|
||||
dbuilder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2}, nil)
|
||||
dbuilder.Field(1).(*array.Int64Builder).AppendValues([]int64{100, 200}, nil)
|
||||
|
||||
drec := dbuilder.NewRecord()
|
||||
defer drec.Release()
|
||||
|
||||
dreader, err := array.NewRecordReader(dschema, []arrow.Record{drec})
|
||||
suite.NoError(err)
|
||||
|
||||
err = space.Delete(dreader)
|
||||
suite.NoError(err)
|
||||
|
||||
segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, &querypb.SegmentLoadInfo{
|
||||
SegmentID: suite.segmentID,
|
||||
PartitionID: suite.partitionID,
|
||||
CollectionID: suite.collectionID,
|
||||
NumOfRows: int64(msgLength),
|
||||
StorageVersion: 3,
|
||||
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
|
||||
})
|
||||
suite.NoError(err)
|
||||
|
||||
_, err = suite.loader.LoadBloomFilterSet(ctx, suite.collectionID, 0, &querypb.SegmentLoadInfo{
|
||||
SegmentID: suite.segmentID,
|
||||
PartitionID: suite.partitionID,
|
||||
CollectionID: suite.collectionID,
|
||||
NumOfRows: int64(msgLength),
|
||||
StorageVersion: 3,
|
||||
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
|
||||
})
|
||||
suite.NoError(err)
|
||||
|
||||
segment := segments[0]
|
||||
suite.EqualValues(4, segment.InsertCount())
|
||||
suite.Equal(int64(msgLength-2), segment.RowNum())
|
||||
}
|
||||
|
||||
func TestSegmentLoaderV2(t *testing.T) {
|
||||
suite.Run(t, &SegmentLoaderV2Suite{})
|
||||
}
|
||||
|
|
|
@ -348,11 +348,7 @@ func (node *QueryNode) Init() error {
|
|||
node.subscribingChannels = typeutil.NewConcurrentSet[string]()
|
||||
node.unsubscribingChannels = typeutil.NewConcurrentSet[string]()
|
||||
node.manager = segments.NewManager()
|
||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
||||
node.loader = segments.NewLoaderV2(node.manager, node.chunkManager)
|
||||
} else {
|
||||
node.loader = segments.NewLoader(node.manager, node.chunkManager)
|
||||
}
|
||||
node.loader = segments.NewLoader(node.manager, node.chunkManager)
|
||||
node.manager.SetLoader(node.loader)
|
||||
node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, node.GetNodeID())
|
||||
// init pipeline manager
|
||||
|
|
|
@ -25,7 +25,7 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-storage/go/common/log"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
)
|
||||
|
||||
func TestPerformance(t *testing.T) {
|
||||
|
|
|
@ -24,9 +24,9 @@ import (
|
|||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus-storage/go/common/log"
|
||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
|
|
|
@ -41,7 +41,6 @@ type CodecIndex interface {
|
|||
Delete() error
|
||||
CleanLocalData() error
|
||||
UpLoad() (map[string]int64, error)
|
||||
UpLoadV2() (int64, error)
|
||||
}
|
||||
|
||||
var _ CodecIndex = (*CgoIndex)(nil)
|
||||
|
@ -127,35 +126,6 @@ func CreateIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo)
|
|||
return index, nil
|
||||
}
|
||||
|
||||
func CreateIndexV2(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) {
|
||||
buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo)
|
||||
if err != nil {
|
||||
log.Ctx(ctx).Warn("marshal buildIndexInfo failed",
|
||||
zap.String("clusterID", buildIndexInfo.GetClusterID()),
|
||||
zap.Int64("buildID", buildIndexInfo.GetBuildID()),
|
||||
zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
var indexPtr C.CIndex
|
||||
status := C.CreateIndexV2(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob)))
|
||||
if err := HandleCStatus(&status, "failed to create index"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
index := &CgoIndex{
|
||||
indexPtr: indexPtr,
|
||||
close: false,
|
||||
}
|
||||
|
||||
runtime.SetFinalizer(index, func(index *CgoIndex) {
|
||||
if index != nil && !index.close {
|
||||
log.Error("there is leakage in index object, please check.")
|
||||
}
|
||||
})
|
||||
|
||||
return index, nil
|
||||
}
|
||||
|
||||
// TODO: this seems to be used only for test. We should mark the method
|
||||
// name with ForTest, or maybe move to test file.
|
||||
func (index *CgoIndex) Build(dataset *Dataset) error {
|
||||
|
@ -426,34 +396,3 @@ func (index *CgoIndex) UpLoad() (map[string]int64, error) {
|
|||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (index *CgoIndex) UpLoadV2() (int64, error) {
|
||||
var cBinarySet C.CBinarySet
|
||||
|
||||
status := C.SerializeIndexAndUpLoadV2(index.indexPtr, &cBinarySet)
|
||||
defer func() {
|
||||
if cBinarySet != nil {
|
||||
C.DeleteBinarySet(cBinarySet)
|
||||
}
|
||||
}()
|
||||
if err := HandleCStatus(&status, "failed to serialize index and upload index"); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
buffer, err := GetBinarySetValue(cBinarySet, "index_store_version")
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
var version int64
|
||||
|
||||
version = int64(buffer[7])
|
||||
version = (version << 8) + int64(buffer[6])
|
||||
version = (version << 8) + int64(buffer[5])
|
||||
version = (version << 8) + int64(buffer[4])
|
||||
version = (version << 8) + int64(buffer[3])
|
||||
version = (version << 8) + int64(buffer[2])
|
||||
version = (version << 8) + int64(buffer[1])
|
||||
version = (version << 8) + int64(buffer[0])
|
||||
|
||||
return version, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue