mirror of https://github.com/milvus-io/milvus.git
Implement wal (#3345)
* implement wal Signed-off-by: groot <yihua.mo@zilliz.com> * wal unittest Signed-off-by: groot <yihua.mo@zilliz.com> * typo Signed-off-by: groot <yihua.mo@zilliz.com> * add unittest Signed-off-by: groot <yihua.mo@zilliz.com>pull/3367/head
parent
157f24b454
commit
0f191bcb8f
|
@ -28,6 +28,7 @@ constexpr int64_t MAX_DIMENSION = 32768;
|
|||
constexpr int32_t MAX_SEGMENT_ROW_COUNT = 4 * 1024 * 1024;
|
||||
constexpr int64_t DEFAULT_SEGMENT_ROW_COUNT = 100000; // default row count per segment when creating collection
|
||||
constexpr int64_t MAX_INSERT_DATA_SIZE = 256 * MB;
|
||||
constexpr int64_t MAX_WAL_FILE_SIZE = 256 * MB;
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
|
|
|
@ -94,7 +94,7 @@ class DB {
|
|||
// op_id is for wal machinery, this id will be used in MemManager
|
||||
virtual Status
|
||||
Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id = 0) = 0;
|
||||
idx_t op_id = 0) = 0;
|
||||
|
||||
virtual Status
|
||||
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
|
||||
|
@ -103,7 +103,7 @@ class DB {
|
|||
|
||||
// op_id is for wal machinery, this id will be used in MemManager
|
||||
virtual Status
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id = 0) = 0;
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, idx_t op_id = 0) = 0;
|
||||
|
||||
virtual Status
|
||||
ListIDInSegment(const std::string& collection_name, int64_t segment_id, IDNumbers& entity_ids) = 0;
|
||||
|
|
|
@ -413,7 +413,7 @@ DBImpl::DescribeIndex(const std::string& collection_name, const std::string& fie
|
|||
|
||||
Status
|
||||
DBImpl::Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) {
|
||||
idx_t op_id) {
|
||||
CHECK_INITIALIZED;
|
||||
|
||||
if (data_chunk == nullptr) {
|
||||
|
@ -510,7 +510,7 @@ DBImpl::GetEntityByID(const std::string& collection_name, const IDNumbers& id_ar
|
|||
}
|
||||
|
||||
Status
|
||||
DBImpl::DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) {
|
||||
DBImpl::DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, idx_t op_id) {
|
||||
CHECK_INITIALIZED;
|
||||
|
||||
snapshot::ScopedSnapshotT ss;
|
||||
|
|
|
@ -87,7 +87,7 @@ class DBImpl : public DB, public ConfigObserver {
|
|||
|
||||
Status
|
||||
Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) override;
|
||||
idx_t op_id) override;
|
||||
|
||||
Status
|
||||
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
|
||||
|
@ -95,7 +95,7 @@ class DBImpl : public DB, public ConfigObserver {
|
|||
DataChunkPtr& data_chunk) override;
|
||||
|
||||
Status
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) override;
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, idx_t op_id) override;
|
||||
|
||||
Status
|
||||
Query(const server::ContextPtr& context, const query::QueryPtr& query_ptr, engine::QueryResultPtr& result) override;
|
||||
|
|
|
@ -122,7 +122,7 @@ DBProxy::DescribeIndex(const std::string& collection_name, const std::string& fi
|
|||
|
||||
Status
|
||||
DBProxy::Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) {
|
||||
idx_t op_id) {
|
||||
DB_CHECK
|
||||
return db_->Insert(collection_name, partition_name, data_chunk, op_id);
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ DBProxy::GetEntityByID(const std::string& collection_name, const IDNumbers& id_a
|
|||
}
|
||||
|
||||
Status
|
||||
DBProxy::DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) {
|
||||
DBProxy::DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, idx_t op_id) {
|
||||
DB_CHECK
|
||||
return db_->DeleteEntityByID(collection_name, entity_ids, op_id);
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ class DBProxy : public DB {
|
|||
|
||||
Status
|
||||
Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) override;
|
||||
idx_t op_id) override;
|
||||
|
||||
Status
|
||||
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
|
||||
|
@ -84,7 +84,7 @@ class DBProxy : public DB {
|
|||
DataChunkPtr& data_chunk) override;
|
||||
|
||||
Status
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) override;
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, idx_t op_id) override;
|
||||
|
||||
Status
|
||||
ListIDInSegment(const std::string& collection_name, int64_t segment_id, IDNumbers& entity_ids) override;
|
||||
|
|
|
@ -25,7 +25,7 @@ IDGenerator::~IDGenerator() = default;
|
|||
|
||||
constexpr size_t SimpleIDGenerator::MAX_IDS_PER_MICRO;
|
||||
|
||||
id_t
|
||||
idx_t
|
||||
SimpleIDGenerator::GetNextIDNumber() {
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
|
||||
|
@ -61,7 +61,7 @@ SimpleIDGenerator::GetNextIDNumbers(size_t n, IDNumbers& ids) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
id_t
|
||||
idx_t
|
||||
SafeIDGenerator::GetNextIDNumber() {
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace engine {
|
|||
|
||||
class IDGenerator {
|
||||
public:
|
||||
virtual id_t
|
||||
virtual idx_t
|
||||
GetNextIDNumber() = 0;
|
||||
|
||||
virtual Status
|
||||
|
@ -36,7 +36,7 @@ class SimpleIDGenerator : public IDGenerator {
|
|||
public:
|
||||
~SimpleIDGenerator() override = default;
|
||||
|
||||
id_t
|
||||
idx_t
|
||||
GetNextIDNumber() override;
|
||||
|
||||
Status
|
||||
|
@ -60,7 +60,7 @@ class SafeIDGenerator : public IDGenerator {
|
|||
SafeIDGenerator() = default;
|
||||
~SafeIDGenerator() override = default;
|
||||
|
||||
id_t
|
||||
idx_t
|
||||
GetNextIDNumber() override;
|
||||
|
||||
Status
|
||||
|
|
|
@ -48,11 +48,11 @@ extern const char* DEFAULT_STRUCTURED_INDEX;
|
|||
extern const char* DEFAULT_PARTITON_TAG;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
using id_t = int64_t;
|
||||
using idx_t = int64_t;
|
||||
using offset_t = int32_t;
|
||||
using date_t = int32_t;
|
||||
|
||||
using IDNumbers = std::vector<id_t>;
|
||||
using IDNumbers = std::vector<idx_t>;
|
||||
|
||||
using VectorDistance = faiss::Index::distance_t;
|
||||
using VectorDistances = std::vector<VectorDistance>;
|
||||
|
|
|
@ -141,7 +141,7 @@ GetIDFromChunk(const engine::DataChunkPtr& chunk, engine::IDNumbers& ids) {
|
|||
}
|
||||
|
||||
if (!pair->second->data_.empty()) {
|
||||
ids.resize(pair->second->data_.size() / sizeof(engine::id_t));
|
||||
ids.resize(pair->second->data_.size() / sizeof(engine::idx_t));
|
||||
memcpy((void*)(ids.data()), pair->second->data_.data(), pair->second->data_.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -226,7 +226,7 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id) {
|
|||
}
|
||||
|
||||
void
|
||||
MapAndCopyResult(const knowhere::DatasetPtr& dataset, const std::vector<id_t>& uids, int64_t nq, int64_t k,
|
||||
MapAndCopyResult(const knowhere::DatasetPtr& dataset, const std::vector<idx_t>& uids, int64_t nq, int64_t k,
|
||||
float* distances, int64_t* labels) {
|
||||
int64_t* res_ids = dataset->Get<int64_t*>(knowhere::meta::IDS);
|
||||
float* res_dist = dataset->Get<float*>(knowhere::meta::DISTANCE);
|
||||
|
@ -791,7 +791,7 @@ ExecutionEngineImpl::BuildKnowhereIndex(const std::string& field_name, const Col
|
|||
}
|
||||
LOG_ENGINE_DEBUG_ << "Index config: " << conf.dump();
|
||||
|
||||
std::vector<id_t> uids;
|
||||
std::vector<idx_t> uids;
|
||||
faiss::ConcurrentBitsetPtr blacklist;
|
||||
if (from_index) {
|
||||
auto dataset =
|
||||
|
|
|
@ -75,7 +75,7 @@ MemCollection::Add(int64_t partition_id, const milvus::engine::VectorSourcePtr&
|
|||
}
|
||||
|
||||
Status
|
||||
MemCollection::Delete(const std::vector<id_t>& ids) {
|
||||
MemCollection::Delete(const std::vector<idx_t>& ids) {
|
||||
// Locate which collection file the doc id lands in
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
|
@ -182,7 +182,7 @@ MemCollection::ApplyDeletes() {
|
|||
std::make_shared<segment::SegmentReader>(options_.meta_.path_, seg_visitor);
|
||||
|
||||
// Step 1: Check delete_id in mem
|
||||
std::vector<id_t> delete_ids;
|
||||
std::vector<idx_t> delete_ids;
|
||||
{
|
||||
segment::IdBloomFilterPtr pre_bloom_filter;
|
||||
STATUS_CHECK(segment_reader->LoadBloomFilter(pre_bloom_filter));
|
||||
|
@ -197,11 +197,11 @@ MemCollection::ApplyDeletes() {
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<engine::id_t> uids;
|
||||
std::vector<engine::idx_t> uids;
|
||||
STATUS_CHECK(segment_reader->LoadUids(uids));
|
||||
|
||||
std::sort(delete_ids.begin(), delete_ids.end());
|
||||
std::set<id_t> ids_to_check(delete_ids.begin(), delete_ids.end());
|
||||
std::set<idx_t> ids_to_check(delete_ids.begin(), delete_ids.end());
|
||||
|
||||
// Step 2: Mark previous deleted docs file and bloom filter file stale
|
||||
auto& field_visitors_map = seg_visitor->GetFieldVisitors();
|
||||
|
|
|
@ -40,7 +40,7 @@ class MemCollection {
|
|||
Add(int64_t partition_id, const VectorSourcePtr& source);
|
||||
|
||||
Status
|
||||
Delete(const std::vector<id_t>& ids);
|
||||
Delete(const std::vector<idx_t>& ids);
|
||||
|
||||
Status
|
||||
EraseMem(int64_t partition_id);
|
||||
|
@ -73,7 +73,7 @@ class MemCollection {
|
|||
|
||||
std::mutex mutex_;
|
||||
|
||||
std::set<id_t> doc_ids_to_delete_;
|
||||
std::set<idx_t> doc_ids_to_delete_;
|
||||
|
||||
std::atomic<uint64_t> lsn_;
|
||||
}; // SSMemCollection
|
||||
|
|
|
@ -30,7 +30,7 @@ class MemManager {
|
|||
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) = 0;
|
||||
|
||||
virtual Status
|
||||
DeleteEntities(int64_t collection_id, const std::vector<id_t>& entity_ids, uint64_t lsn) = 0;
|
||||
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) = 0;
|
||||
|
||||
virtual Status
|
||||
Flush(int64_t collection_id) = 0;
|
||||
|
|
|
@ -150,7 +150,7 @@ MemManagerImpl::InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id
|
|||
}
|
||||
|
||||
Status
|
||||
MemManagerImpl::DeleteEntities(int64_t collection_id, const std::vector<id_t>& entity_ids, uint64_t lsn) {
|
||||
MemManagerImpl::DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
MemCollectionPtr mem = GetMemByCollection(collection_id);
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ class MemManagerImpl : public MemManager {
|
|||
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) override;
|
||||
|
||||
Status
|
||||
DeleteEntities(int64_t collection_id, const std::vector<id_t>& entity_ids, uint64_t lsn) override;
|
||||
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) override;
|
||||
|
||||
Status
|
||||
Flush(int64_t collection_id) override;
|
||||
|
|
|
@ -197,12 +197,12 @@ MemSegment::Add(const VectorSourcePtr& source) {
|
|||
}
|
||||
|
||||
Status
|
||||
MemSegment::Delete(const std::vector<id_t>& ids) {
|
||||
MemSegment::Delete(const std::vector<idx_t>& ids) {
|
||||
engine::SegmentPtr segment_ptr;
|
||||
segment_writer_ptr_->GetSegment(segment_ptr);
|
||||
|
||||
// Check wither the doc_id is present, if yes, delete it's corresponding buffer
|
||||
std::vector<id_t> uids;
|
||||
std::vector<idx_t> uids;
|
||||
segment_writer_ptr_->LoadUids(uids);
|
||||
|
||||
std::vector<offset_t> offsets;
|
||||
|
|
|
@ -39,7 +39,7 @@ class MemSegment {
|
|||
Add(const VectorSourcePtr& source);
|
||||
|
||||
Status
|
||||
Delete(const std::vector<id_t>& ids);
|
||||
Delete(const std::vector<idx_t>& ids);
|
||||
|
||||
int64_t
|
||||
GetCurrentMem();
|
||||
|
|
|
@ -137,7 +137,7 @@ TranscriptProxy::DescribeIndex(const std::string& collection_name, const std::st
|
|||
|
||||
Status
|
||||
TranscriptProxy::Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) {
|
||||
idx_t op_id) {
|
||||
return db_->Insert(collection_name, partition_name, data_chunk);
|
||||
}
|
||||
|
||||
|
@ -149,7 +149,8 @@ TranscriptProxy::GetEntityByID(const std::string& collection_name, const IDNumbe
|
|||
}
|
||||
|
||||
Status
|
||||
TranscriptProxy::DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) {
|
||||
TranscriptProxy::DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids,
|
||||
idx_t op_id) {
|
||||
return db_->DeleteEntityByID(collection_name, entity_ids);
|
||||
}
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ class TranscriptProxy : public DBProxy {
|
|||
|
||||
Status
|
||||
Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) override;
|
||||
idx_t op_id) override;
|
||||
|
||||
Status
|
||||
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
|
||||
|
@ -84,7 +84,7 @@ class TranscriptProxy : public DBProxy {
|
|||
DataChunkPtr& data_chunk) override;
|
||||
|
||||
Status
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) override;
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, idx_t op_id) override;
|
||||
|
||||
Status
|
||||
ListIDInSegment(const std::string& collection_name, int64_t segment_id, IDNumbers& entity_ids) override;
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "db/wal/WalFile.h"
|
||||
#include "db/Constants.h"
|
||||
#include "db/Types.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
WalFile::WalFile() {
|
||||
}
|
||||
|
||||
WalFile::~WalFile() {
|
||||
CloseFile();
|
||||
}
|
||||
|
||||
Status
|
||||
WalFile::OpenFile(const std::string& path, OpenMode mode) {
|
||||
CloseFile();
|
||||
|
||||
try {
|
||||
std::string str_mode = (mode == OpenMode::READ) ? "rb" : "awb";
|
||||
file_ = fopen(path.c_str(), str_mode.c_str());
|
||||
if (file_ == nullptr) {
|
||||
std::string msg = "Failed to create wal file: " + path;
|
||||
return Status(DB_ERROR, msg);
|
||||
}
|
||||
file_path_ = path;
|
||||
mode_ = mode;
|
||||
} catch (std::exception& ex) {
|
||||
std::string msg = "Failed to create wal file, reason: " + std::string(ex.what());
|
||||
return Status(DB_ERROR, msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalFile::CloseFile() {
|
||||
if (file_ != nullptr) {
|
||||
fclose(file_);
|
||||
file_ = nullptr;
|
||||
file_size_ = 0;
|
||||
file_path_ = "";
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool
|
||||
WalFile::ExceedMaxSize(int64_t append_size) {
|
||||
return (file_size_ + append_size) > MAX_WAL_FILE_SIZE;
|
||||
}
|
||||
|
||||
Status
|
||||
WalFile::ReadLastOpId(idx_t& op_id) {
|
||||
op_id = std::numeric_limits<idx_t>::max();
|
||||
if (file_ == nullptr || mode_ != OpenMode::READ) {
|
||||
return Status(DB_ERROR, "File not opened or not read mode");
|
||||
}
|
||||
|
||||
// current position
|
||||
auto cur_poz = ftell(file_);
|
||||
|
||||
// get total lenth
|
||||
fseek(file_, 0, SEEK_END);
|
||||
auto end_poz = ftell(file_);
|
||||
|
||||
// read last id
|
||||
idx_t last_id = 0;
|
||||
int64_t offset = end_poz - sizeof(last_id);
|
||||
fseek(file_, offset, SEEK_SET);
|
||||
|
||||
int64_t bytes = fread(&last_id, 1, sizeof(last_id), file_);
|
||||
if (bytes == sizeof(op_id)) {
|
||||
op_id = last_id;
|
||||
}
|
||||
|
||||
// back to current postiion
|
||||
fseek(file_, cur_poz, SEEK_SET);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
|
@ -0,0 +1,144 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "db/Types.h"
|
||||
#include "utils/Status.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
class WalFile {
|
||||
public:
|
||||
WalFile();
|
||||
~WalFile();
|
||||
|
||||
bool
|
||||
IsOpened() const {
|
||||
return file_ != nullptr;
|
||||
}
|
||||
|
||||
enum OpenMode {
|
||||
NA = 0,
|
||||
READ = 1,
|
||||
OVER_WRITE = 2,
|
||||
APPEND_WRITE = 3,
|
||||
};
|
||||
Status
|
||||
OpenFile(const std::string& path, OpenMode mode);
|
||||
|
||||
Status
|
||||
CloseFile();
|
||||
|
||||
bool
|
||||
ExceedMaxSize(int64_t append_size);
|
||||
|
||||
template <typename T>
|
||||
inline int64_t
|
||||
Write(T* value) {
|
||||
if (file_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t bytes = fwrite(value, 1, sizeof(T), file_);
|
||||
file_size_ += bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
inline int64_t
|
||||
Write(void* data, int64_t length) {
|
||||
if (file_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t bytes = fwrite(data, 1, length, file_);
|
||||
file_size_ += bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline int64_t
|
||||
Read(T* value) {
|
||||
if (file_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t bytes = fread(value, 1, sizeof(T), file_);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
inline int64_t
|
||||
Read(void* data, int64_t length) {
|
||||
if (file_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t bytes = fread(data, 1, length, file_);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
inline int64_t
|
||||
ReadStr(std::string& str, int64_t length) {
|
||||
if (file_ == nullptr || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char* buf = new char[length + 1];
|
||||
int64_t bytes = fread(buf, 1, length, file_);
|
||||
buf[length] = '\0';
|
||||
str = buf;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
inline void
|
||||
Flush() {
|
||||
if (file_ && mode_ != OpenMode::READ) {
|
||||
fflush(file_);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size() const {
|
||||
return file_size_;
|
||||
}
|
||||
std::string
|
||||
Path() const {
|
||||
return file_path_;
|
||||
}
|
||||
|
||||
Status
|
||||
ReadLastOpId(idx_t& op_id);
|
||||
|
||||
void inline SeekForward(int64_t offset) {
|
||||
if (file_ == nullptr || mode_ != OpenMode::READ) {
|
||||
return;
|
||||
}
|
||||
|
||||
fseek(file_, offset, SEEK_CUR);
|
||||
}
|
||||
|
||||
private:
|
||||
FILE* file_ = nullptr;
|
||||
OpenMode mode_ = OpenMode::NA;
|
||||
int64_t file_size_ = 0;
|
||||
std::string file_path_;
|
||||
};
|
||||
|
||||
using WalFilePtr = std::shared_ptr<WalFile>;
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
|
@ -10,16 +10,23 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "db/wal/WalManager.h"
|
||||
#include "config/ServerConfig.h"
|
||||
#include "db/Utils.h"
|
||||
#include "db/wal/WalOperationCodec.h"
|
||||
#include "utils/CommonUtil.h"
|
||||
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include <experimental/filesystem>
|
||||
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
WalManager::WalManager() {
|
||||
wal_path_ = config.wal.path();
|
||||
wal_buffer_size_ = config.wal.buffer_size();
|
||||
insert_buffer_size_ = config.cache.insert_buffer_size();
|
||||
const char* MAX_OP_ID_FILE_NAME = "max_op";
|
||||
|
||||
WalManager::WalManager() : cleanup_thread_pool_(1, 1) {
|
||||
}
|
||||
|
||||
WalManager&
|
||||
|
@ -28,6 +35,32 @@ WalManager::GetInstance() {
|
|||
return s_mgr;
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::Start(const DBOptions& options) {
|
||||
enable_ = options.wal_enable_;
|
||||
wal_path_ = options.meta_.path_;
|
||||
insert_buffer_size_ = options.insert_buffer_size_;
|
||||
|
||||
CommonUtil::CreateDirectory(wal_path_);
|
||||
|
||||
auto status = ReadMaxOpId();
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::Stop() {
|
||||
std::lock_guard<std::mutex> lck(cleanup_thread_mutex_);
|
||||
for (auto& iter : cleanup_thread_results_) {
|
||||
iter.wait();
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::RecordOperation(const WalOperationPtr& operation, const DBPtr& db) {
|
||||
if (operation == nullptr) {
|
||||
|
@ -50,6 +83,128 @@ WalManager::RecordOperation(const WalOperationPtr& operation, const DBPtr& db) {
|
|||
break;
|
||||
}
|
||||
|
||||
if (!status.ok()) {
|
||||
LOG_ENGINE_DEBUG_ << "Failed to record wal opertiaon: " << status.message();
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::OperationDone(const std::string& collection_name, idx_t op_id) {
|
||||
if (!enable_) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool start_clecnup = false;
|
||||
{
|
||||
// record max operation id for each collection
|
||||
std::lock_guard<std::mutex> lock(max_op_mutex_);
|
||||
idx_t last_id = max_op_id_map_[collection_name];
|
||||
if (op_id > last_id) {
|
||||
max_op_id_map_[collection_name] = op_id;
|
||||
start_clecnup = true;
|
||||
|
||||
// write max op id to disk
|
||||
std::string path = ConstructFilePath(collection_name, MAX_OP_ID_FILE_NAME);
|
||||
WalFile file;
|
||||
file.OpenFile(path, WalFile::OVER_WRITE);
|
||||
file.Write<idx_t>(&op_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (start_clecnup) {
|
||||
StartCleanupThread(collection_name);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::Recovery(const DBPtr& db) {
|
||||
using DirectoryIterator = std::experimental::filesystem::recursive_directory_iterator;
|
||||
DirectoryIterator iter_outer(wal_path_);
|
||||
DirectoryIterator end_outer;
|
||||
for (; iter_outer != end_outer; ++iter_outer) {
|
||||
auto path_outer = (*iter_outer).path();
|
||||
if (!std::experimental::filesystem::is_directory(path_outer)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string collection_name = path_outer.filename().c_str();
|
||||
|
||||
// iterate files
|
||||
std::map<idx_t, std::experimental::filesystem::path> id_files;
|
||||
DirectoryIterator iter_inner(path_outer);
|
||||
DirectoryIterator end_inner;
|
||||
for (; iter_inner != end_inner; ++iter_inner) {
|
||||
auto path_inner = (*iter_inner).path();
|
||||
std::string file_name = path_inner.filename().c_str();
|
||||
if (file_name == MAX_OP_ID_FILE_NAME) {
|
||||
continue;
|
||||
}
|
||||
idx_t op_id = std::stol(file_name);
|
||||
id_files.insert(std::make_pair(op_id, path_inner));
|
||||
}
|
||||
|
||||
// the max operation id
|
||||
idx_t max_op_id = std::numeric_limits<idx_t>::max();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(max_op_mutex_);
|
||||
if (max_op_id_map_.find(collection_name) != max_op_id_map_.end()) {
|
||||
max_op_id = max_op_id_map_[collection_name];
|
||||
}
|
||||
}
|
||||
|
||||
// id_files arrange id in assendent, we know which file should be read
|
||||
for (auto& pair : id_files) {
|
||||
WalFilePtr file = std::make_shared<WalFile>();
|
||||
file->OpenFile(pair.second.c_str(), WalFile::READ);
|
||||
idx_t last_id = 0;
|
||||
file->ReadLastOpId(last_id);
|
||||
if (last_id <= max_op_id) {
|
||||
file->CloseFile();
|
||||
std::experimental::filesystem::remove(pair.second);
|
||||
continue; // skip and delete this file since all its operations already done
|
||||
}
|
||||
|
||||
Status status = Status::OK();
|
||||
while (status.ok()) {
|
||||
WalOperationPtr operation;
|
||||
operation->collection_name_ = collection_name;
|
||||
status = WalOperationCodec::IterateOperation(file, operation, max_op_id);
|
||||
PerformOperation(operation, db);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::ReadMaxOpId() {
|
||||
using DirectoryIterator = std::experimental::filesystem::recursive_directory_iterator;
|
||||
DirectoryIterator iter(wal_path_);
|
||||
DirectoryIterator end;
|
||||
for (; iter != end; ++iter) {
|
||||
auto path = (*iter).path();
|
||||
if (std::experimental::filesystem::is_directory(path)) {
|
||||
std::string collection_name = path.filename().c_str();
|
||||
path.append(MAX_OP_ID_FILE_NAME);
|
||||
if (!std::experimental::filesystem::is_regular_file(path)) {
|
||||
continue; // ignore?
|
||||
}
|
||||
|
||||
WalFile file;
|
||||
file.OpenFile(path.c_str(), WalFile::READ);
|
||||
idx_t max_op = 0;
|
||||
file.Read(&max_op);
|
||||
|
||||
std::lock_guard<std::mutex> lock(max_op_mutex_);
|
||||
max_op_id_map_.insert(std::make_pair(collection_name, max_op));
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -58,28 +213,209 @@ WalManager::RecordInsertOperation(const InsertEntityOperationPtr& operation, con
|
|||
std::vector<DataChunkPtr> chunks;
|
||||
SplitChunk(operation->data_chunk_, chunks);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
IDNumbers op_ids;
|
||||
auto status = id_gen_.GetNextIDNumbers(chunks.size(), op_ids);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::SplitChunk(const DataChunkPtr& chunk, std::vector<DataChunkPtr>& chunks) {
|
||||
int64_t chunk_size = utils::GetSizeOfChunk(chunk);
|
||||
if (chunk_size > insert_buffer_size_) {
|
||||
} else {
|
||||
chunks.push_back(chunk);
|
||||
for (size_t i = 0; i < chunks.size(); ++i) {
|
||||
idx_t op_id = op_ids[i];
|
||||
DataChunkPtr& chunk = chunks[i];
|
||||
int64_t chunk_size = utils::GetSizeOfChunk(chunk);
|
||||
|
||||
{
|
||||
// open wal file
|
||||
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
|
||||
std::lock_guard<std::mutex> lock(file_map_mutex_);
|
||||
WalFilePtr file = file_map_[operation->collection_name_];
|
||||
if (file == nullptr) {
|
||||
file = std::make_shared<WalFile>();
|
||||
file_map_[operation->collection_name_] = file;
|
||||
file->OpenFile(path, WalFile::APPEND_WRITE);
|
||||
} else if (!file->IsOpened() || file->ExceedMaxSize(chunk_size)) {
|
||||
file->OpenFile(path, WalFile::APPEND_WRITE);
|
||||
}
|
||||
|
||||
// write to wal file
|
||||
status = WalOperationCodec::WriteInsertOperation(file, operation->partition_name, chunk, op_id);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
// insert action to db
|
||||
status = db->Insert(operation->collection_name_, operation->partition_name, operation->data_chunk_, op_id);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::RecordDeleteOperation(const DeleteEntityOperationPtr& operation, const DBPtr& db) {
|
||||
WalManager::SplitChunk(const DataChunkPtr& chunk, std::vector<DataChunkPtr>& chunks) {
|
||||
// int64_t chunk_size = utils::GetSizeOfChunk(chunk);
|
||||
// if (chunk_size > insert_buffer_size_) {
|
||||
// int64_t batch = chunk_size / insert_buffer_size_;
|
||||
// int64_t batch_count = chunk->count_ / batch;
|
||||
// for (int64_t i = 0; i <= batch; ++i) {
|
||||
// }
|
||||
// } else {
|
||||
// chunks.push_back(chunk);
|
||||
// }
|
||||
chunks.push_back(chunk);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::OperationDone(id_t op_id) {
|
||||
return Status::OK();
|
||||
WalManager::RecordDeleteOperation(const DeleteEntityOperationPtr& operation, const DBPtr& db) {
|
||||
idx_t op_id = id_gen_.GetNextIDNumber();
|
||||
int64_t append_size = operation->entity_ids_.size() * sizeof(idx_t);
|
||||
|
||||
{
|
||||
// open wal file
|
||||
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
|
||||
std::lock_guard<std::mutex> lock(file_map_mutex_);
|
||||
WalFilePtr file = file_map_[operation->collection_name_];
|
||||
if (file == nullptr) {
|
||||
file = std::make_shared<WalFile>();
|
||||
file_map_[operation->collection_name_] = file;
|
||||
file->OpenFile(path, WalFile::APPEND_WRITE);
|
||||
} else if (!file->IsOpened() || file->ExceedMaxSize(append_size)) {
|
||||
file->OpenFile(path, WalFile::APPEND_WRITE);
|
||||
}
|
||||
|
||||
// write to wal file
|
||||
auto status = WalOperationCodec::WriteDeleteOperation(file, operation->entity_ids_, op_id);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
// delete action to db
|
||||
return db->DeleteEntityByID(operation->collection_name_, operation->entity_ids_, op_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
WalManager::ConstructFilePath(const std::string& collection_name, const std::string& file_name) {
|
||||
std::experimental::filesystem::path full_path(wal_path_);
|
||||
std::experimental::filesystem::create_directory(full_path);
|
||||
full_path.append(collection_name);
|
||||
std::experimental::filesystem::create_directory(full_path);
|
||||
full_path.append(file_name);
|
||||
|
||||
std::string path(full_path.c_str());
|
||||
return path;
|
||||
}
|
||||
|
||||
void
|
||||
WalManager::StartCleanupThread(const std::string& collection_name) {
|
||||
// the previous thread finished?
|
||||
std::lock_guard<std::mutex> lck(cleanup_thread_mutex_);
|
||||
if (cleanup_thread_results_.empty()) {
|
||||
// start a new cleanup thread
|
||||
cleanup_thread_results_.push_back(
|
||||
cleanup_thread_pool_.enqueue(&WalManager::CleanupThread, this, collection_name));
|
||||
} else {
|
||||
std::chrono::milliseconds span(1);
|
||||
if (cleanup_thread_results_.back().wait_for(span) == std::future_status::ready) {
|
||||
cleanup_thread_results_.pop_back();
|
||||
|
||||
// start a new cleanup thread
|
||||
cleanup_thread_results_.push_back(
|
||||
cleanup_thread_pool_.enqueue(&WalManager::CleanupThread, this, collection_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
WalManager::CleanupThread(std::string collection_name) {
|
||||
SetThreadName("wal_clean");
|
||||
|
||||
using DirectoryIterator = std::experimental::filesystem::recursive_directory_iterator;
|
||||
DirectoryIterator iter_outer(wal_path_);
|
||||
DirectoryIterator end_outer;
|
||||
for (; iter_outer != end_outer; ++iter_outer) {
|
||||
auto path_outer = (*iter_outer).path();
|
||||
if (!std::experimental::filesystem::is_directory(path_outer)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get max operation id
|
||||
std::string file_name = path_outer.filename().c_str();
|
||||
if (file_name != collection_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
idx_t max_op = std::numeric_limits<idx_t>::max();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(max_op_mutex_);
|
||||
if (max_op_id_map_.find(collection_name) != max_op_id_map_.end()) {
|
||||
max_op = max_op_id_map_[collection_name];
|
||||
}
|
||||
}
|
||||
|
||||
// iterate files
|
||||
std::map<idx_t, std::experimental::filesystem::path> id_files;
|
||||
DirectoryIterator iter_inner(path_outer);
|
||||
DirectoryIterator end_inner;
|
||||
for (; iter_inner != end_inner; ++iter_inner) {
|
||||
auto path_inner = (*iter_inner).path();
|
||||
std::string file_name = path_inner.filename().c_str();
|
||||
if (file_name == MAX_OP_ID_FILE_NAME) {
|
||||
continue;
|
||||
}
|
||||
idx_t op_id = std::stol(file_name);
|
||||
id_files.insert(std::make_pair(op_id, path_inner));
|
||||
}
|
||||
|
||||
if (id_files.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// remove unused files
|
||||
// the std::map arrange id in assendent, direct delete files except the last one
|
||||
idx_t max_id = id_files.rbegin()->first;
|
||||
std::experimental::filesystem::path max_file = id_files.rbegin()->second;
|
||||
id_files.erase(max_id);
|
||||
for (auto& pair : id_files) {
|
||||
std::experimental::filesystem::remove(pair.second);
|
||||
}
|
||||
|
||||
// the last wal file need to be deleted?
|
||||
WalFile file;
|
||||
file.OpenFile(max_file.c_str(), WalFile::READ);
|
||||
idx_t last_id = 0;
|
||||
file.ReadLastOpId(last_id);
|
||||
if (last_id <= max_op) {
|
||||
file.CloseFile();
|
||||
std::experimental::filesystem::remove(max_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status
|
||||
WalManager::PerformOperation(const WalOperationPtr& operation, const DBPtr& db) {
|
||||
Status status;
|
||||
switch (operation->Type()) {
|
||||
case WalOperationType::INSERT_ENTITY: {
|
||||
InsertEntityOperationPtr op = std::static_pointer_cast<InsertEntityOperation>(operation);
|
||||
status = db->Insert(op->collection_name_, op->partition_name, op->data_chunk_, op->ID());
|
||||
break;
|
||||
}
|
||||
case WalOperationType::DELETE_ENTITY: {
|
||||
DeleteEntityOperationPtr op = std::static_pointer_cast<DeleteEntityOperation>(operation);
|
||||
status = db->DeleteEntityByID(op->collection_name_, op->entity_ids_, op->ID());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return Status(DB_ERROR, "Unsupportted wal operation");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
|
|
|
@ -14,10 +14,15 @@
|
|||
#include "db/DB.h"
|
||||
#include "db/IDGenerator.h"
|
||||
#include "db/Types.h"
|
||||
#include "db/wal/WalFile.h"
|
||||
#include "db/wal/WalOperation.h"
|
||||
#include "utils/Status.h"
|
||||
#include "utils/ThreadPool.h"
|
||||
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace milvus {
|
||||
|
@ -30,18 +35,25 @@ class WalManager {
|
|||
static WalManager&
|
||||
GetInstance();
|
||||
|
||||
void
|
||||
SetWalPath(const std::string& path) {
|
||||
wal_path_ = path;
|
||||
}
|
||||
Status
|
||||
Start(const DBOptions& options);
|
||||
|
||||
Status
|
||||
Stop();
|
||||
|
||||
Status
|
||||
RecordOperation(const WalOperationPtr& operation, const DBPtr& db);
|
||||
|
||||
Status
|
||||
OperationDone(id_t op_id);
|
||||
OperationDone(const std::string& collection_name, idx_t op_id);
|
||||
|
||||
Status
|
||||
Recovery(const DBPtr& db);
|
||||
|
||||
private:
|
||||
Status
|
||||
ReadMaxOpId();
|
||||
|
||||
Status
|
||||
RecordInsertOperation(const InsertEntityOperationPtr& operation, const DBPtr& db);
|
||||
|
||||
|
@ -51,12 +63,36 @@ class WalManager {
|
|||
Status
|
||||
SplitChunk(const DataChunkPtr& chunk, std::vector<DataChunkPtr>& chunks);
|
||||
|
||||
std::string
|
||||
ConstructFilePath(const std::string& collection_name, const std::string& file_name);
|
||||
|
||||
void
|
||||
StartCleanupThread(const std::string& collection_name);
|
||||
|
||||
void
|
||||
CleanupThread(std::string collection_name);
|
||||
|
||||
Status
|
||||
PerformOperation(const WalOperationPtr& operation, const DBPtr& db);
|
||||
|
||||
private:
|
||||
SafeIDGenerator id_gen_;
|
||||
|
||||
bool enable_ = false;
|
||||
std::string wal_path_;
|
||||
int64_t wal_buffer_size_ = 0;
|
||||
int64_t insert_buffer_size_ = 0;
|
||||
|
||||
using WalFileMap = std::unordered_map<std::string, WalFilePtr>;
|
||||
WalFileMap file_map_; // mapping collection name to file
|
||||
std::mutex file_map_mutex_;
|
||||
|
||||
using MaxOpIdMap = std::unordered_map<std::string, id_t>;
|
||||
MaxOpIdMap max_op_id_map_; // mapping collection name to max operation id
|
||||
std::mutex max_op_mutex_;
|
||||
|
||||
ThreadPool cleanup_thread_pool_;
|
||||
std::mutex cleanup_thread_mutex_;
|
||||
std::list<std::future<void>> cleanup_thread_results_;
|
||||
};
|
||||
|
||||
} // namespace engine
|
||||
|
|
|
@ -32,10 +32,10 @@ class WalOperation {
|
|||
explicit WalOperation(WalOperationType type);
|
||||
|
||||
void
|
||||
SetID(id_t id) {
|
||||
SetID(idx_t id) {
|
||||
id_ = id;
|
||||
}
|
||||
id_t
|
||||
idx_t
|
||||
ID() const {
|
||||
return id_;
|
||||
}
|
||||
|
@ -46,8 +46,11 @@ class WalOperation {
|
|||
}
|
||||
|
||||
protected:
|
||||
id_t id_ = 0;
|
||||
idx_t id_ = 0;
|
||||
WalOperationType type_ = WalOperationType::INVALID;
|
||||
|
||||
public:
|
||||
std::string collection_name_;
|
||||
};
|
||||
|
||||
using WalOperationPtr = std::shared_ptr<WalOperation>;
|
||||
|
@ -58,7 +61,6 @@ class InsertEntityOperation : public WalOperation {
|
|||
InsertEntityOperation();
|
||||
|
||||
public:
|
||||
std::string collection_name_;
|
||||
std::string partition_name;
|
||||
DataChunkPtr data_chunk_;
|
||||
};
|
||||
|
@ -71,8 +73,7 @@ class DeleteEntityOperation : public WalOperation {
|
|||
DeleteEntityOperation();
|
||||
|
||||
public:
|
||||
std::string collection_name_;
|
||||
engine::IDNumbers entity_ids_;
|
||||
IDNumbers entity_ids_;
|
||||
};
|
||||
|
||||
using DeleteEntityOperationPtr = std::shared_ptr<DeleteEntityOperation>;
|
||||
|
|
|
@ -10,17 +10,269 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "db/wal/WalOperationCodec.h"
|
||||
#include "utils/Log.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
Status
|
||||
WalOperationCodec::SerializeOperation(const std::string& path, const InsertEntityOperationPtr& operation) {
|
||||
WalOperationCodec::WriteInsertOperation(const WalFilePtr& file, const std::string& partition_name,
|
||||
const DataChunkPtr& chunk, idx_t op_id) {
|
||||
if (file == nullptr || !file->IsOpened() || chunk == nullptr) {
|
||||
return Status(DB_ERROR, "Invalid input for write insert operation");
|
||||
}
|
||||
|
||||
try {
|
||||
// calculate total bytes, it must equal to total_bytes
|
||||
int64_t calculate_total_bytes = 0;
|
||||
calculate_total_bytes += sizeof(int32_t); // operation type
|
||||
calculate_total_bytes += sizeof(idx_t); // operation id
|
||||
calculate_total_bytes += sizeof(int64_t); // calculated total bytes
|
||||
calculate_total_bytes += sizeof(int32_t); // partition name length
|
||||
calculate_total_bytes += partition_name.size(); // partition name
|
||||
calculate_total_bytes += sizeof(int32_t); // fixed field count
|
||||
for (auto& pair : chunk->fixed_fields_) {
|
||||
calculate_total_bytes += sizeof(int32_t); // field name length
|
||||
calculate_total_bytes += pair.first.size(); // field name
|
||||
|
||||
calculate_total_bytes += sizeof(int64_t); // data size
|
||||
calculate_total_bytes += pair.second->data_.size(); // data
|
||||
}
|
||||
calculate_total_bytes += sizeof(idx_t); // operation id again
|
||||
|
||||
int64_t total_bytes = 0;
|
||||
// write operation type
|
||||
int32_t type = WalOperationType::INSERT_ENTITY;
|
||||
total_bytes += file->Write<int32_t>(&type);
|
||||
|
||||
// write operation id
|
||||
total_bytes += file->Write<idx_t>(&op_id);
|
||||
|
||||
// write calculated total bytes
|
||||
total_bytes += file->Write<int64_t>(&calculate_total_bytes);
|
||||
|
||||
// write partition name
|
||||
int32_t part_name_length = partition_name.size();
|
||||
total_bytes += file->Write<int32_t>(&part_name_length);
|
||||
if (part_name_length > 0) {
|
||||
total_bytes += file->Write((void*)partition_name.data(), part_name_length);
|
||||
}
|
||||
|
||||
// write fixed data
|
||||
int32_t field_count = chunk->fixed_fields_.size();
|
||||
total_bytes += file->Write<int32_t>(&field_count);
|
||||
for (auto& pair : chunk->fixed_fields_) {
|
||||
if (pair.second == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int32_t field_name_length = pair.first.size();
|
||||
total_bytes += file->Write<int32_t>(&field_name_length);
|
||||
total_bytes += file->Write((void*)pair.first.data(), field_name_length);
|
||||
|
||||
int64_t data_size = pair.second->data_.size();
|
||||
total_bytes += file->Write<int64_t>(&data_size);
|
||||
total_bytes += file->Write((void*)pair.second->data_.data(), data_size);
|
||||
}
|
||||
|
||||
// TODO: write variable data
|
||||
|
||||
// write operation id again
|
||||
// Note: makesure operation id is written at end, so that wal cleanup thread know which file can be deleted
|
||||
total_bytes += file->Write<idx_t>(&op_id);
|
||||
|
||||
// flush to system buffer
|
||||
file->Flush();
|
||||
|
||||
if (total_bytes != calculate_total_bytes) {
|
||||
LOG_ENGINE_ERROR_ << "wal serialize(insert) bytes " << total_bytes << " not equal "
|
||||
<< calculate_total_bytes;
|
||||
} else {
|
||||
LOG_ENGINE_DEBUG_ << "Wal serialize(insert) " << total_bytes << " bytes";
|
||||
}
|
||||
} catch (std::exception& ex) {
|
||||
std::string msg = "Failed to write insert operation, reason: " + std::string(ex.what());
|
||||
return Status(DB_ERROR, msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalOperationCodec::SerializeOperation(const std::string& path, const DeleteEntityOperationPtr& operation) {
|
||||
WalOperationCodec::WriteDeleteOperation(const WalFilePtr& file, const IDNumbers& entity_ids, idx_t op_id) {
|
||||
if (file == nullptr || !file->IsOpened() || entity_ids.empty()) {
|
||||
return Status(DB_ERROR, "Invalid input for write delete operation");
|
||||
}
|
||||
|
||||
try {
|
||||
// calculate total bytes, it must equal to total_bytes
|
||||
int64_t calculate_total_bytes = 0;
|
||||
calculate_total_bytes += sizeof(int32_t); // operation type
|
||||
calculate_total_bytes += sizeof(idx_t); // operation id
|
||||
calculate_total_bytes += sizeof(int64_t); // calculated total bytes
|
||||
calculate_total_bytes += sizeof(int64_t); // id count
|
||||
calculate_total_bytes += entity_ids.size() * sizeof(idx_t); // ids
|
||||
calculate_total_bytes += sizeof(idx_t); // operation id again
|
||||
|
||||
int64_t total_bytes = 0;
|
||||
// write operation type
|
||||
int32_t type = WalOperationType::DELETE_ENTITY;
|
||||
total_bytes += file->Write<int32_t>(&type);
|
||||
|
||||
// write operation id
|
||||
total_bytes += file->Write<idx_t>(&op_id);
|
||||
|
||||
// write calculated total bytes
|
||||
total_bytes += file->Write<int64_t>(&calculate_total_bytes);
|
||||
|
||||
// write entity ids
|
||||
int64_t id_count = entity_ids.size();
|
||||
total_bytes += file->Write<int64_t>(&id_count);
|
||||
|
||||
total_bytes += file->Write((void*)entity_ids.data(), id_count * sizeof(idx_t));
|
||||
|
||||
// write operation id again
|
||||
// Note: makesure operation id is written at end, so that wal cleanup thread know which file can be deleted
|
||||
total_bytes += file->Write<idx_t>(&op_id);
|
||||
|
||||
// flush to system buffer
|
||||
file->Flush();
|
||||
|
||||
if (total_bytes != calculate_total_bytes) {
|
||||
LOG_ENGINE_ERROR_ << "wal serialize(delete) bytes " << total_bytes << " not equal "
|
||||
<< calculate_total_bytes;
|
||||
} else {
|
||||
LOG_ENGINE_DEBUG_ << "Wal serialize(delete) " << total_bytes << " bytes";
|
||||
}
|
||||
} catch (std::exception& ex) {
|
||||
std::string msg = "Failed to write insert operation, reason: " + std::string(ex.what());
|
||||
return Status(DB_ERROR, msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
WalOperationCodec::IterateOperation(const WalFilePtr& file, WalOperationPtr& operation, idx_t from_op_id) {
|
||||
if (file == nullptr || !file->IsOpened()) {
|
||||
return Status(DB_ERROR, "Invalid input iterate wal operation");
|
||||
}
|
||||
|
||||
// read operation type
|
||||
int32_t type = WalOperationType::INVALID;
|
||||
int64_t read_bytes = file->Read<int32_t>(&type);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
// read operation id
|
||||
idx_t op_id = 0;
|
||||
read_bytes = file->Read<idx_t>(&op_id);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
// read total bytes
|
||||
int64_t total_bytes = 0;
|
||||
read_bytes = file->Read<int64_t>(&total_bytes);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
// if the operation id is less/equal than from_op_id, skip this operation
|
||||
if (op_id <= from_op_id) {
|
||||
int64_t offset = total_bytes - sizeof(int32_t) - sizeof(idx_t) - sizeof(int64_t);
|
||||
file->SeekForward(offset);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (type == WalOperationType::INSERT_ENTITY) {
|
||||
// read partition name
|
||||
int32_t part_name_length = 0;
|
||||
read_bytes = file->Read<int32_t>(&part_name_length);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
std::string partition_name;
|
||||
read_bytes = file->ReadStr(partition_name, part_name_length);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
// read fixed data
|
||||
int32_t field_count = 0;
|
||||
read_bytes = file->Read<int32_t>(&field_count);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
DataChunkPtr chunk = std::make_shared<DataChunk>();
|
||||
for (int32_t i = 0; i < field_count; i++) {
|
||||
int32_t field_name_length = 0;
|
||||
read_bytes = file->Read<int32_t>(&field_name_length);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
// field name
|
||||
std::string field_name;
|
||||
read_bytes = file->ReadStr(field_name, field_name_length);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
// binary data
|
||||
int64_t data_size = 0;
|
||||
read_bytes = file->Read<int64_t>(&data_size);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
BinaryDataPtr data = std::make_shared<BinaryData>();
|
||||
data->data_.resize(data_size);
|
||||
read_bytes = file->Read(data->data_.data(), data_size);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
chunk->fixed_fields_.insert(std::make_pair(field_name, data));
|
||||
}
|
||||
|
||||
InsertEntityOperationPtr insert_op = std::make_shared<InsertEntityOperation>();
|
||||
insert_op->partition_name = partition_name;
|
||||
insert_op->data_chunk_ = chunk;
|
||||
operation = insert_op;
|
||||
} else if (type == WalOperationType::DELETE_ENTITY) {
|
||||
// read entity ids
|
||||
int64_t id_count = 0;
|
||||
read_bytes = file->Read<int64_t>(&id_count);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
IDNumbers ids;
|
||||
ids.resize(id_count);
|
||||
read_bytes = file->Read(ids.data(), id_count * sizeof(idx_t));
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
DeleteEntityOperationPtr delete_op = std::make_shared<DeleteEntityOperation>();
|
||||
delete_op->entity_ids_.swap(ids);
|
||||
operation = delete_op;
|
||||
}
|
||||
|
||||
read_bytes = file->Read<idx_t>(&op_id);
|
||||
if (read_bytes <= 0) {
|
||||
return Status(DB_ERROR, "End of file");
|
||||
}
|
||||
|
||||
operation->SetID(op_id);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
#include "db/wal/WalFile.h"
|
||||
#include "db/wal/WalOperation.h"
|
||||
#include "utils/Status.h"
|
||||
|
||||
|
@ -22,10 +23,14 @@ namespace engine {
|
|||
class WalOperationCodec {
|
||||
public:
|
||||
static Status
|
||||
SerializeOperation(const std::string& path, const InsertEntityOperationPtr& operation);
|
||||
WriteInsertOperation(const WalFilePtr& file, const std::string& partition_name, const DataChunkPtr& chunk,
|
||||
idx_t op_id);
|
||||
|
||||
static Status
|
||||
SerializeOperation(const std::string& path, const DeleteEntityOperationPtr& operation);
|
||||
WriteDeleteOperation(const WalFilePtr& file, const IDNumbers& entity_ids, idx_t op_id);
|
||||
|
||||
static Status
|
||||
IterateOperation(const WalFilePtr& file, WalOperationPtr& operation, idx_t from_op_id);
|
||||
};
|
||||
|
||||
} // namespace engine
|
||||
|
|
|
@ -25,9 +25,36 @@ WalProxy::WalProxy(const DBPtr& db, const DBOptions& options) : DBProxy(db, opti
|
|||
}
|
||||
}
|
||||
|
||||
Status
|
||||
WalProxy::Start() {
|
||||
// let service start
|
||||
auto status = db_->Start();
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (options_.wal_enable_) {
|
||||
WalManager::GetInstance().Start(options_);
|
||||
WalManager::GetInstance().Recovery(db_);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Status
|
||||
WalProxy::Stop() {
|
||||
auto status = db_->Stop();
|
||||
|
||||
if (options_.wal_enable_) {
|
||||
WalManager::GetInstance().Stop();
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Status
|
||||
WalProxy::Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) {
|
||||
idx_t op_id) {
|
||||
// write operation into disk
|
||||
InsertEntityOperationPtr op = std::make_shared<InsertEntityOperation>();
|
||||
op->collection_name_ = collection_name;
|
||||
|
@ -38,7 +65,7 @@ WalProxy::Insert(const std::string& collection_name, const std::string& partitio
|
|||
}
|
||||
|
||||
Status
|
||||
WalProxy::DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) {
|
||||
WalProxy::DeleteEntityByID(const std::string& collection_name, const IDNumbers& entity_ids, idx_t op_id) {
|
||||
// write operation into disk
|
||||
DeleteEntityOperationPtr op = std::make_shared<DeleteEntityOperation>();
|
||||
op->collection_name_ = collection_name;
|
||||
|
@ -47,17 +74,5 @@ WalProxy::DeleteEntityByID(const std::string& collection_name, const engine::IDN
|
|||
return WalManager::GetInstance().RecordOperation(op, db_);
|
||||
}
|
||||
|
||||
Status
|
||||
WalProxy::Flush(const std::string& collection_name) {
|
||||
auto status = db_->Flush(collection_name);
|
||||
return status;
|
||||
}
|
||||
|
||||
Status
|
||||
WalProxy::Flush() {
|
||||
auto status = db_->Flush();
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
|
|
|
@ -24,18 +24,18 @@ class WalProxy : public DBProxy {
|
|||
public:
|
||||
WalProxy(const DBPtr& db, const DBOptions& options);
|
||||
|
||||
Status
|
||||
Start() override;
|
||||
|
||||
Status
|
||||
Stop() override;
|
||||
|
||||
Status
|
||||
Insert(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk,
|
||||
id_t op_id) override;
|
||||
idx_t op_id) override;
|
||||
|
||||
Status
|
||||
DeleteEntityByID(const std::string& collection_name, const engine::IDNumbers& entity_ids, id_t op_id) override;
|
||||
|
||||
Status
|
||||
Flush(const std::string& collection_name) override;
|
||||
|
||||
Status
|
||||
Flush() override;
|
||||
DeleteEntityByID(const std::string& collection_name, const IDNumbers& entity_ids, idx_t op_id) override;
|
||||
|
||||
private:
|
||||
};
|
||||
|
|
|
@ -41,14 +41,14 @@ IdBloomFilter::GetBloomFilter() {
|
|||
}
|
||||
|
||||
bool
|
||||
IdBloomFilter::Check(id_t uid) {
|
||||
IdBloomFilter::Check(engine::idx_t uid) {
|
||||
std::string s = std::to_string(uid);
|
||||
const std::lock_guard<std::mutex> lock(mutex_);
|
||||
return scaling_bloom_check(bloom_filter_, s.c_str(), s.size());
|
||||
}
|
||||
|
||||
Status
|
||||
IdBloomFilter::Add(id_t uid) {
|
||||
IdBloomFilter::Add(engine::idx_t uid) {
|
||||
std::string s = std::to_string(uid);
|
||||
const std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (scaling_bloom_add(bloom_filter_, s.c_str(), s.size(), uid) == -1) {
|
||||
|
@ -60,7 +60,7 @@ IdBloomFilter::Add(id_t uid) {
|
|||
}
|
||||
|
||||
Status
|
||||
IdBloomFilter::Remove(id_t uid) {
|
||||
IdBloomFilter::Remove(engine::idx_t uid) {
|
||||
std::string s = std::to_string(uid);
|
||||
const std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (scaling_bloom_remove(bloom_filter_, s.c_str(), s.size(), uid) == -1) {
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include "cache/DataObj.h"
|
||||
#include "dablooms/dablooms.h"
|
||||
#include "db/Types.h"
|
||||
#include "utils/Status.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -37,13 +38,13 @@ class IdBloomFilter : public cache::DataObj {
|
|||
GetBloomFilter();
|
||||
|
||||
bool
|
||||
Check(id_t uid);
|
||||
Check(engine::idx_t uid);
|
||||
|
||||
Status
|
||||
Add(id_t uid);
|
||||
Add(engine::idx_t uid);
|
||||
|
||||
Status
|
||||
Remove(id_t uid);
|
||||
Remove(engine::idx_t uid);
|
||||
|
||||
int64_t
|
||||
Size() override;
|
||||
|
|
|
@ -218,7 +218,7 @@ SegmentReader::LoadFieldsEntities(const std::vector<std::string>& fields_name, c
|
|||
}
|
||||
|
||||
Status
|
||||
SegmentReader::LoadUids(std::vector<engine::id_t>& uids) {
|
||||
SegmentReader::LoadUids(std::vector<engine::idx_t>& uids) {
|
||||
engine::BinaryDataPtr raw;
|
||||
auto status = LoadField(engine::FIELD_UID, raw);
|
||||
if (!status.ok()) {
|
||||
|
@ -230,14 +230,14 @@ SegmentReader::LoadUids(std::vector<engine::id_t>& uids) {
|
|||
return Status(DB_ERROR, "Failed to load id field");
|
||||
}
|
||||
|
||||
if (raw->data_.size() % sizeof(engine::id_t) != 0) {
|
||||
if (raw->data_.size() % sizeof(engine::idx_t) != 0) {
|
||||
std::string err_msg = "Failed to load uids: illegal file size";
|
||||
LOG_ENGINE_ERROR_ << err_msg;
|
||||
return Status(DB_ERROR, err_msg);
|
||||
}
|
||||
|
||||
uids.clear();
|
||||
uids.resize(raw->data_.size() / sizeof(engine::id_t));
|
||||
uids.resize(raw->data_.size() / sizeof(engine::idx_t));
|
||||
memcpy(uids.data(), raw->data_.data(), raw->data_.size());
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -50,7 +50,7 @@ class SegmentReader {
|
|||
engine::DataChunkPtr& data_chunk);
|
||||
|
||||
Status
|
||||
LoadUids(std::vector<engine::id_t>& uids);
|
||||
LoadUids(std::vector<engine::idx_t>& uids);
|
||||
|
||||
Status
|
||||
LoadVectorIndex(const std::string& field_name, knowhere::VecIndexPtr& index_ptr, bool flat = false);
|
||||
|
|
|
@ -359,7 +359,7 @@ SegmentWriter::RowCount() {
|
|||
}
|
||||
|
||||
Status
|
||||
SegmentWriter::LoadUids(std::vector<engine::id_t>& uids) {
|
||||
SegmentWriter::LoadUids(std::vector<engine::idx_t>& uids) {
|
||||
engine::BinaryDataPtr raw;
|
||||
auto status = segment_ptr_->GetFixedFieldData(engine::FIELD_UID, raw);
|
||||
if (!status.ok()) {
|
||||
|
@ -371,14 +371,14 @@ SegmentWriter::LoadUids(std::vector<engine::id_t>& uids) {
|
|||
return Status(DB_ERROR, "Invalid id field");
|
||||
}
|
||||
|
||||
if (raw->data_.size() % sizeof(engine::id_t) != 0) {
|
||||
if (raw->data_.size() % sizeof(engine::idx_t) != 0) {
|
||||
std::string err_msg = "Failed to load uids: illegal file size";
|
||||
LOG_ENGINE_ERROR_ << err_msg;
|
||||
return Status(DB_ERROR, err_msg);
|
||||
}
|
||||
|
||||
uids.clear();
|
||||
uids.resize(raw->data_.size() / sizeof(engine::id_t));
|
||||
uids.resize(raw->data_.size() / sizeof(engine::idx_t));
|
||||
memcpy(uids.data(), raw->data_.data(), raw->data_.size());
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -61,7 +61,7 @@ class SegmentWriter {
|
|||
RowCount();
|
||||
|
||||
Status
|
||||
LoadUids(std::vector<engine::id_t>& uids);
|
||||
LoadUids(std::vector<engine::idx_t>& uids);
|
||||
|
||||
Status
|
||||
SetVectorIndex(const std::string& field_name, const knowhere::VecIndexPtr& index);
|
||||
|
|
|
@ -17,6 +17,7 @@ set( TEST_FILES ${CMAKE_CURRENT_SOURCE_DIR}/utils.cpp
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/test_db.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_meta.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_ss_event.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_wal.cpp
|
||||
)
|
||||
|
||||
add_executable( test_db
|
||||
|
|
|
@ -776,7 +776,7 @@ TEST_F(DBTest, CompactTest) {
|
|||
if (delete_count < 0) {
|
||||
return;
|
||||
}
|
||||
std::vector<milvus::engine::id_t> delete_ids;
|
||||
std::vector<milvus::engine::idx_t> delete_ids;
|
||||
for (auto i = from; i < to; ++i) {
|
||||
delete_ids.push_back(batch_entity_ids[i]);
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ TEST_F(SegmentTest, SegmentTest) {
|
|||
break;
|
||||
}
|
||||
|
||||
std::vector<milvus::engine::id_t> raw_uids = {123};
|
||||
std::vector<milvus::engine::idx_t> raw_uids = {123};
|
||||
std::vector<uint8_t> raw_vectors = {1, 2, 3, 4};
|
||||
|
||||
{
|
||||
|
|
|
@ -0,0 +1,283 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include <fiu-control.h>
|
||||
#include <fiu/fiu-local.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "db/DBProxy.h"
|
||||
#include "db/utils.h"
|
||||
#include "db/wal/WalManager.h"
|
||||
#include "db/wal/WalFile.h"
|
||||
#include "db/wal/WalOperationCodec.h"
|
||||
#include "db/wal/WalProxy.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using DBProxy = milvus::engine::DBProxy;
|
||||
using WalFile = milvus::engine::WalFile;
|
||||
using WalManager = milvus::engine::WalManager;
|
||||
using WalOperation = milvus::engine::WalOperation;
|
||||
using WalOperationPtr = milvus::engine::WalOperationPtr;
|
||||
using WalOperationType = milvus::engine::WalOperationType;
|
||||
using WalOperationCodec = milvus::engine::WalOperationCodec;
|
||||
using WalProxy = milvus::engine::WalProxy;
|
||||
|
||||
void CreateChunk(DataChunkPtr& chunk, int64_t row_count, int64_t& chunk_size) {
|
||||
chunk = std::make_shared<DataChunk>();
|
||||
chunk->count_ = row_count;
|
||||
chunk_size = 0;
|
||||
{
|
||||
// int32 type field
|
||||
std::string field_name = "f1";
|
||||
auto bin = std::make_shared<BinaryData>();
|
||||
bin->data_.resize(chunk->count_ * sizeof(int32_t));
|
||||
int32_t* p = (int32_t*)(bin->data_.data());
|
||||
for (int64_t i = 0; i < chunk->count_; ++i) {
|
||||
p[i] = i;
|
||||
}
|
||||
chunk->fixed_fields_.insert(std::make_pair(field_name, bin));
|
||||
chunk_size += chunk->count_ * sizeof(int32_t);
|
||||
}
|
||||
{
|
||||
// vector type field
|
||||
int64_t dimension = 128;
|
||||
std::string field_name = "f2";
|
||||
auto bin = std::make_shared<BinaryData>();
|
||||
bin->data_.resize(chunk->count_ * sizeof(float) * dimension);
|
||||
float* p = (float*)(bin->data_.data());
|
||||
for (int64_t i = 0; i < chunk->count_; ++i) {
|
||||
for (int64_t j = 0; j < dimension; ++j) {
|
||||
p[i * dimension + j] = i * j / 100.0;
|
||||
}
|
||||
}
|
||||
chunk->fixed_fields_.insert(std::make_pair(field_name, bin));
|
||||
chunk_size += chunk->count_ * sizeof(float) * dimension;
|
||||
}
|
||||
}
|
||||
|
||||
class DummyDB : public DBProxy {
|
||||
public:
|
||||
Status
|
||||
Insert(const std::string& collection_name,
|
||||
const std::string& partition_name,
|
||||
DataChunkPtr& data_chunk,
|
||||
idx_t op_id) override {
|
||||
WalManager::GetInstance().OperationDone(collection_name, op_id);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
DeleteEntityByID(const std::string& collection_name,
|
||||
const IDNumbers& entity_ids,
|
||||
idx_t op_id) override {
|
||||
WalManager::GetInstance().OperationDone(collection_name, op_id);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST_F(WalTest, WalFileTest) {
|
||||
std::string path = "/tmp/milvus_wal/test_file";
|
||||
idx_t last_id = 12345;
|
||||
|
||||
{
|
||||
WalFile file;
|
||||
ASSERT_FALSE(file.IsOpened());
|
||||
ASSERT_EQ(file.Size(), 0);
|
||||
|
||||
int64_t k = 0;
|
||||
int64_t bytes = file.Write<int64_t>(&k);
|
||||
ASSERT_EQ(bytes, 0);
|
||||
|
||||
bytes = file.Read<int64_t>(&k);
|
||||
ASSERT_EQ(bytes, 0);
|
||||
|
||||
auto status = file.CloseFile();
|
||||
ASSERT_TRUE(status.ok());
|
||||
}
|
||||
|
||||
{
|
||||
WalFile file;
|
||||
auto status = file.OpenFile(path, WalFile::APPEND_WRITE);
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_TRUE(file.IsOpened());
|
||||
|
||||
int64_t max_size = milvus::engine::MAX_WAL_FILE_SIZE;
|
||||
ASSERT_FALSE(file.ExceedMaxSize(max_size));
|
||||
|
||||
int64_t total_bytes = 0;
|
||||
int8_t len = path.size();
|
||||
int64_t bytes = file.Write<int8_t>(&len);
|
||||
ASSERT_EQ(bytes, sizeof(int8_t));
|
||||
total_bytes += bytes;
|
||||
|
||||
ASSERT_TRUE(file.ExceedMaxSize(max_size));
|
||||
|
||||
bytes = file.Write(path.data(), len);
|
||||
ASSERT_EQ(bytes, len);
|
||||
total_bytes += bytes;
|
||||
|
||||
bytes = file.Write<idx_t>(&last_id);
|
||||
ASSERT_EQ(bytes, sizeof(last_id));
|
||||
total_bytes += bytes;
|
||||
|
||||
int64_t file_size = file.Size();
|
||||
ASSERT_EQ(total_bytes, file_size);
|
||||
|
||||
std::string file_path = file.Path();
|
||||
ASSERT_EQ(file_path, path);
|
||||
|
||||
file.Flush();
|
||||
file.CloseFile();
|
||||
ASSERT_FALSE(file.IsOpened());
|
||||
}
|
||||
|
||||
{
|
||||
WalFile file;
|
||||
auto status = file.OpenFile(path, WalFile::READ);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
int8_t len = 0;
|
||||
int64_t bytes = file.Read<int8_t>(&len);
|
||||
ASSERT_EQ(bytes, sizeof(int8_t));
|
||||
|
||||
std::string str;
|
||||
bytes = file.ReadStr(str, len);
|
||||
ASSERT_EQ(bytes, len);
|
||||
ASSERT_EQ(str, path);
|
||||
|
||||
idx_t id_read = 0;
|
||||
bytes = file.Read<int64_t>(&id_read);
|
||||
ASSERT_EQ(bytes, sizeof(id_read));
|
||||
ASSERT_EQ(id_read, last_id);
|
||||
|
||||
idx_t op_id = 0;
|
||||
status = file.ReadLastOpId(op_id);
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_EQ(op_id, last_id);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(WalTest, WalFileCodecTest) {
|
||||
std::string path = "/tmp/milvus_wal/test_file";
|
||||
auto file = std::make_shared<WalFile>();
|
||||
|
||||
IDNumbers op_ids;
|
||||
std::vector<WalOperationType> op_types;
|
||||
// insert operation
|
||||
{
|
||||
auto status = file->OpenFile(path, WalFile::APPEND_WRITE);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
DataChunkPtr chunk;
|
||||
int64_t chunk_size = 0;
|
||||
CreateChunk(chunk, 1000, chunk_size);
|
||||
|
||||
std::string partition_name = "p1";
|
||||
idx_t op_id = 100;
|
||||
op_ids.push_back(op_id);
|
||||
op_types.push_back(WalOperationType::INSERT_ENTITY);
|
||||
WalOperationCodec::WriteInsertOperation(file, partition_name, chunk, op_id);
|
||||
|
||||
ASSERT_GE(file->Size(), chunk_size);
|
||||
|
||||
file->CloseFile();
|
||||
|
||||
WalFile file_read;
|
||||
file_read.OpenFile(path, WalFile::READ);
|
||||
idx_t last_id = 0;
|
||||
file_read.ReadLastOpId(last_id);
|
||||
ASSERT_EQ(last_id, op_id);
|
||||
}
|
||||
|
||||
// delete operation
|
||||
{
|
||||
auto status = file->OpenFile(path, WalFile::APPEND_WRITE);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
auto pre_size = file->Size();
|
||||
|
||||
IDNumbers ids = {1, 2, 3};
|
||||
idx_t op_id = 200;
|
||||
op_ids.push_back(op_id);
|
||||
op_types.push_back(WalOperationType::DELETE_ENTITY);
|
||||
WalOperationCodec::WriteDeleteOperation(file, ids, op_id);
|
||||
|
||||
auto post_size = file->Size();
|
||||
ASSERT_GE(post_size - pre_size, ids.size() * sizeof(idx_t));
|
||||
|
||||
file->CloseFile();
|
||||
|
||||
WalFile file_read;
|
||||
file_read.OpenFile(path, WalFile::READ);
|
||||
idx_t last_id = 0;
|
||||
file_read.ReadLastOpId(last_id);
|
||||
ASSERT_EQ(last_id, op_id);
|
||||
}
|
||||
|
||||
// iterate operations
|
||||
{
|
||||
auto status = file->OpenFile(path, WalFile::READ);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
Status iter_status;
|
||||
int32_t op_index = 0;
|
||||
while(iter_status.ok()) {
|
||||
WalOperationPtr operation;
|
||||
iter_status = WalOperationCodec::IterateOperation(file, operation, 0);
|
||||
if (operation == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ASSERT_EQ(operation->ID(), op_ids[op_index]);
|
||||
ASSERT_EQ(operation->Type(), op_types[op_index]);
|
||||
++op_index;
|
||||
}
|
||||
ASSERT_EQ(op_index, op_ids.size());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(WalTest, WalProxyTest) {
|
||||
std::string collection_name = "col_1";
|
||||
std::string partition_name = "part_1";
|
||||
|
||||
// write over more than 400MB data
|
||||
for (int64_t i = 1; i <= 1000; i++) {
|
||||
idx_t op_id = i;
|
||||
if (i % 10 == 0) {
|
||||
IDNumbers ids = {1, 2, 3};
|
||||
auto status = db_->DeleteEntityByID(collection_name, ids, op_id);
|
||||
ASSERT_TRUE(status.ok());
|
||||
} else {
|
||||
DataChunkPtr chunk;
|
||||
int64_t chunk_size = 0;
|
||||
CreateChunk(chunk, 1000, chunk_size);
|
||||
|
||||
auto status = db_->Insert(collection_name, partition_name, chunk, op_id);
|
||||
ASSERT_TRUE(status.ok());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(WalManagerTest, WalManagerTest) {
|
||||
std::string path = "/tmp/milvus_wal/test_file";
|
||||
|
||||
// WalManager::GetInstance().Start(options_);
|
||||
// WalManager::GetInstance().Recovery(db_);
|
||||
}
|
|
@ -38,6 +38,7 @@
|
|||
#include "db/meta/backend/MockEngine.h"
|
||||
#include "db/meta/backend/MySqlEngine.h"
|
||||
#include "db/meta/backend/SqliteEngine.h"
|
||||
#include "db/wal/WalProxy.h"
|
||||
#include "scheduler/ResourceFactory.h"
|
||||
#include "scheduler/SchedInst.h"
|
||||
#include "utils/CommonUtil.h"
|
||||
|
@ -289,6 +290,7 @@ SchedulerTest::TearDown() {
|
|||
BaseTest::TearDown();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
EventTest::SetUp() {
|
||||
auto uri = "mock://:@:/";
|
||||
|
@ -300,6 +302,30 @@ void
|
|||
EventTest::TearDown() {
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
milvus::engine::DBOptions
|
||||
WalTest::GetOptions() {
|
||||
milvus::engine::DBOptions options;
|
||||
options.meta_.path_ = "/tmp/milvus_wal";
|
||||
options.meta_.backend_uri_ = "mock://:@:/";
|
||||
options.wal_enable_ = true;
|
||||
return options;
|
||||
}
|
||||
|
||||
void
|
||||
WalTest::SetUp() {
|
||||
milvus::engine::DBPtr db = std::make_shared<milvus::engine::DBProxy>(nullptr, GetOptions());
|
||||
db_ = std::make_shared<milvus::engine::WalProxy>(db, GetOptions());
|
||||
db_->Start();
|
||||
}
|
||||
|
||||
void
|
||||
WalTest::TearDown() {
|
||||
db_->Stop();
|
||||
db_ = nullptr;
|
||||
std::experimental::filesystem::remove_all(GetOptions().meta_.path_);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main(int argc, char **argv) {
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <string>
|
||||
|
||||
#include "db/DB.h"
|
||||
#include "db/DBFactory.h"
|
||||
#include "db/meta/MetaAdapter.h"
|
||||
#include "db/snapshot/CompoundOperations.h"
|
||||
#include "db/snapshot/Context.h"
|
||||
|
@ -82,13 +83,18 @@ using IterateSegmentFileHandler = milvus::engine::snapshot::IterateHandler<Segme
|
|||
using PartitionIterator = milvus::engine::snapshot::PartitionIterator;
|
||||
using SegmentIterator = milvus::engine::snapshot::SegmentIterator;
|
||||
using SegmentFileIterator = milvus::engine::snapshot::SegmentFileIterator;
|
||||
using DB = milvus::engine::DB;
|
||||
using Status = milvus::Status;
|
||||
using Store = milvus::engine::snapshot::Store;
|
||||
|
||||
using StorePtr = milvus::engine::snapshot::Store::Ptr;
|
||||
using MetaAdapterPtr = milvus::engine::meta::MetaAdapterPtr;
|
||||
|
||||
using DB = milvus::engine::DB;
|
||||
using Status = milvus::Status;
|
||||
using idx_t = milvus::engine::idx_t;
|
||||
using IDNumbers = milvus::engine::IDNumbers;
|
||||
using DataChunk = milvus::engine::DataChunk;
|
||||
using DataChunkPtr = milvus::engine::DataChunkPtr;
|
||||
using BinaryData = milvus::engine::BinaryData;
|
||||
|
||||
inline int
|
||||
RandomInt(int start, int end) {
|
||||
std::random_device dev;
|
||||
|
@ -391,6 +397,7 @@ class SchedulerTest : public BaseTest {
|
|||
TearDown() override;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
class EventTest : public BaseTest {
|
||||
protected:
|
||||
StorePtr store_;
|
||||
|
@ -401,3 +408,17 @@ class EventTest : public BaseTest {
|
|||
void
|
||||
TearDown() override;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
class WalTest : public ::testing::Test {
|
||||
protected:
|
||||
std::shared_ptr<DB> db_;
|
||||
|
||||
milvus::engine::DBOptions
|
||||
GetOptions();
|
||||
|
||||
void
|
||||
SetUp() override;
|
||||
void
|
||||
TearDown() override;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue