prepare change memmanager for wal (#3376)

Signed-off-by: groot <yihua.mo@zilliz.com>
pull/3380/head
groot 2020-08-21 11:14:29 +08:00 committed by GitHub
parent 15ee27f359
commit 3d40a3886f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 80 additions and 93 deletions

View File

@ -206,7 +206,8 @@ DBImpl::DropCollection(const std::string& collection_name) {
auto& snapshots = snapshot::Snapshots::GetInstance(); auto& snapshots = snapshot::Snapshots::GetInstance();
STATUS_CHECK(snapshots.GetSnapshot(ss, collection_name)); STATUS_CHECK(snapshots.GetSnapshot(ss, collection_name));
mem_mgr_->EraseMem(ss->GetCollectionId()); // not allow insert // erase insert buffer of this collection
mem_mgr_->EraseMem(ss->GetCollectionId());
return snapshots.DropCollection(ss->GetCollectionId(), std::numeric_limits<snapshot::LSN_TYPE>::max()); return snapshots.DropCollection(ss->GetCollectionId(), std::numeric_limits<snapshot::LSN_TYPE>::max());
} }
@ -291,8 +292,11 @@ DBImpl::DropPartition(const std::string& collection_name, const std::string& par
snapshot::ScopedSnapshotT ss; snapshot::ScopedSnapshotT ss;
STATUS_CHECK(snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_name)); STATUS_CHECK(snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_name));
// SS TODO: Is below step needed? Or How to implement it? // erase insert buffer of this partition
/* mem_mgr_->EraseMem(partition_name); */ auto partition = ss->GetPartition(partition_name);
if (partition != nullptr) {
mem_mgr_->EraseMem(ss->GetCollectionId(), partition->GetID());
}
snapshot::PartitionContext context; snapshot::PartitionContext context;
context.name = partition_name; context.name = partition_name;

View File

@ -106,7 +106,7 @@ MemCollection::EraseMem(int64_t partition_id) {
} }
Status Status
MemCollection::Serialize(uint64_t wal_lsn) { MemCollection::Serialize() {
TimeRecorder recorder("MemCollection::Serialize collection " + std::to_string(collection_id_)); TimeRecorder recorder("MemCollection::Serialize collection " + std::to_string(collection_id_));
if (!doc_ids_to_delete_.empty()) { if (!doc_ids_to_delete_.empty()) {
@ -132,7 +132,7 @@ MemCollection::Serialize(uint64_t wal_lsn) {
for (auto& partition_segments : mem_segments_) { for (auto& partition_segments : mem_segments_) {
MemSegmentList& segments = partition_segments.second; MemSegmentList& segments = partition_segments.second;
for (auto& segment : segments) { for (auto& segment : segments) {
auto status = segment->Serialize(wal_lsn); auto status = segment->Serialize();
if (!status.ok()) { if (!status.ok()) {
return status; return status;
} }
@ -171,7 +171,6 @@ MemCollection::ApplyDeletes() {
STATUS_CHECK(snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_id_)); STATUS_CHECK(snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_id_));
snapshot::OperationContext context; snapshot::OperationContext context;
context.lsn = lsn_;
auto segments_op = std::make_shared<snapshot::CompoundSegmentsOperation>(context, ss); auto segments_op = std::make_shared<snapshot::CompoundSegmentsOperation>(context, ss);
int64_t segment_iterated = 0; int64_t segment_iterated = 0;
@ -308,15 +307,5 @@ MemCollection::ApplyDeletes() {
return segments_op->Push(); return segments_op->Push();
} }
uint64_t
MemCollection::GetLSN() {
return lsn_;
}
void
MemCollection::SetLSN(uint64_t lsn) {
lsn_ = lsn;
}
} // namespace engine } // namespace engine
} // namespace milvus } // namespace milvus

View File

@ -46,7 +46,7 @@ class MemCollection {
EraseMem(int64_t partition_id); EraseMem(int64_t partition_id);
Status Status
Serialize(uint64_t wal_lsn); Serialize();
int64_t int64_t
GetCollectionId() const; GetCollectionId() const;
@ -54,12 +54,6 @@ class MemCollection {
size_t size_t
GetCurrentMem(); GetCurrentMem();
uint64_t
GetLSN();
void
SetLSN(uint64_t lsn);
private: private:
Status Status
ApplyDeletes(); ApplyDeletes();
@ -74,9 +68,7 @@ class MemCollection {
std::mutex mutex_; std::mutex mutex_;
std::set<idx_t> doc_ids_to_delete_; std::set<idx_t> doc_ids_to_delete_;
};
std::atomic<uint64_t> lsn_;
}; // SSMemCollection
using MemCollectionPtr = std::shared_ptr<MemCollection>; using MemCollectionPtr = std::shared_ptr<MemCollection>;

View File

@ -27,10 +27,10 @@ namespace engine {
class MemManager { class MemManager {
public: public:
virtual Status virtual Status
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) = 0; InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, idx_t op_id) = 0;
virtual Status virtual Status
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) = 0; DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, idx_t op_id) = 0;
virtual Status virtual Status
Flush(int64_t collection_id) = 0; Flush(int64_t collection_id) = 0;
@ -52,7 +52,7 @@ class MemManager {
virtual size_t virtual size_t
GetCurrentMem() = 0; GetCurrentMem() = 0;
}; // MemManagerAbstract };
using MemManagerPtr = std::shared_ptr<MemManager>; using MemManagerPtr = std::shared_ptr<MemManager>;

View File

@ -36,15 +36,15 @@ MemManagerImpl::GetMemByCollection(int64_t collection_id) {
} }
Status Status
MemManagerImpl::InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) { MemManagerImpl::InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, idx_t op_id) {
auto status = ValidateChunk(collection_id, chunk); auto status = ValidateChunk(collection_id, chunk);
if (!status.ok()) { if (!status.ok()) {
return status; return status;
} }
VectorSourcePtr source = std::make_shared<VectorSource>(chunk); VectorSourcePtr source = std::make_shared<VectorSource>(chunk, op_id);
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
return InsertEntitiesNoLock(collection_id, partition_id, source, lsn); return InsertEntitiesNoLock(collection_id, partition_id, source);
} }
Status Status
@ -141,21 +141,18 @@ MemManagerImpl::ValidateChunk(int64_t collection_id, const DataChunkPtr& chunk)
Status Status
MemManagerImpl::InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id, MemManagerImpl::InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id,
const milvus::engine::VectorSourcePtr& source, uint64_t lsn) { const milvus::engine::VectorSourcePtr& source) {
MemCollectionPtr mem = GetMemByCollection(collection_id); MemCollectionPtr mem = GetMemByCollection(collection_id);
mem->SetLSN(lsn);
auto status = mem->Add(partition_id, source); auto status = mem->Add(partition_id, source);
return status; return status;
} }
Status Status
MemManagerImpl::DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) { MemManagerImpl::DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, idx_t op_id) {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
MemCollectionPtr mem = GetMemByCollection(collection_id); MemCollectionPtr mem = GetMemByCollection(collection_id);
mem->SetLSN(lsn);
auto status = mem->Delete(entity_ids); auto status = mem->Delete(entity_ids);
if (!status.ok()) { if (!status.ok()) {
return status; return status;
@ -188,10 +185,9 @@ MemManagerImpl::InternalFlush(std::set<int64_t>& collection_ids) {
} }
std::unique_lock<std::mutex> lock(serialization_mtx_); std::unique_lock<std::mutex> lock(serialization_mtx_);
auto max_lsn = GetMaxLSN(temp_immutable_list);
for (auto& mem : temp_immutable_list) { for (auto& mem : temp_immutable_list) {
LOG_ENGINE_DEBUG_ << "Flushing collection: " << mem->GetCollectionId(); LOG_ENGINE_DEBUG_ << "Flushing collection: " << mem->GetCollectionId();
auto status = mem->Serialize(max_lsn); auto status = mem->Serialize();
if (!status.ok()) { if (!status.ok()) {
LOG_ENGINE_ERROR_ << "Flush collection " << mem->GetCollectionId() << " failed"; LOG_ENGINE_ERROR_ << "Flush collection " << mem->GetCollectionId() << " failed";
return status; return status;
@ -294,17 +290,5 @@ MemManagerImpl::GetCurrentMem() {
return GetCurrentMutableMem() + GetCurrentImmutableMem(); return GetCurrentMutableMem() + GetCurrentImmutableMem();
} }
uint64_t
MemManagerImpl::GetMaxLSN(const MemList& collections) {
uint64_t max_lsn = 0;
for (auto& collection : collections) {
auto cur_lsn = collection->GetLSN();
if (collection->GetLSN() > max_lsn) {
max_lsn = cur_lsn;
}
}
return max_lsn;
}
} // namespace engine } // namespace engine
} // namespace milvus } // namespace milvus

View File

@ -39,10 +39,10 @@ class MemManagerImpl : public MemManager {
~MemManagerImpl() = default; ~MemManagerImpl() = default;
Status Status
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) override; InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, idx_t op_id) override;
Status Status
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) override; DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, idx_t op_id) override;
Status Status
Flush(int64_t collection_id) override; Flush(int64_t collection_id) override;
@ -73,7 +73,7 @@ class MemManagerImpl : public MemManager {
ValidateChunk(int64_t collection_id, const DataChunkPtr& chunk); ValidateChunk(int64_t collection_id, const DataChunkPtr& chunk);
Status Status
InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id, const VectorSourcePtr& source, uint64_t lsn); InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id, const VectorSourcePtr& source);
Status Status
ToImmutable(); ToImmutable();
@ -81,9 +81,6 @@ class MemManagerImpl : public MemManager {
Status Status
ToImmutable(int64_t collection_id); ToImmutable(int64_t collection_id);
uint64_t
GetMaxLSN(const MemList& collections);
Status Status
InternalFlush(std::set<int64_t>& collection_ids); InternalFlush(std::set<int64_t>& collection_ids);
@ -94,7 +91,7 @@ class MemManagerImpl : public MemManager {
DBOptions options_; DBOptions options_;
std::mutex mutex_; std::mutex mutex_;
std::mutex serialization_mtx_; std::mutex serialization_mtx_;
}; // NewMemManager };
} // namespace engine } // namespace engine
} // namespace milvus } // namespace milvus

View File

@ -242,7 +242,7 @@ MemSegment::IsFull() {
} }
Status Status
MemSegment::Serialize(uint64_t wal_lsn) { MemSegment::Serialize() {
int64_t size = GetCurrentMem(); int64_t size = GetCurrentMem();
server::CollectSerializeMetrics metrics(size); server::CollectSerializeMetrics metrics(size);
@ -260,7 +260,7 @@ MemSegment::Serialize(uint64_t wal_lsn) {
STATUS_CHECK(operation_->CommitRowCount(segment_writer_ptr_->RowCount())); STATUS_CHECK(operation_->CommitRowCount(segment_writer_ptr_->RowCount()));
STATUS_CHECK(operation_->Push()); STATUS_CHECK(operation_->Push());
LOG_ENGINE_DEBUG_ << "New segment " << segment_->GetID() << " serialized, lsn = " << wal_lsn; LOG_ENGINE_DEBUG_ << "New segment " << segment_->GetID() << " serialized";
return Status::OK(); return Status::OK();
} }

View File

@ -51,7 +51,7 @@ class MemSegment {
IsFull(); IsFull();
Status Status
Serialize(uint64_t wal_lsn); Serialize();
int64_t int64_t
GetSegmentId() const; GetSegmentId() const;
@ -69,9 +69,8 @@ class MemSegment {
DBOptions options_; DBOptions options_;
int64_t current_mem_; int64_t current_mem_;
// ExecutionEnginePtr execution_engine_;
segment::SegmentWriterPtr segment_writer_ptr_; segment::SegmentWriterPtr segment_writer_ptr_;
}; // SSMemTableFile };
using MemSegmentPtr = std::shared_ptr<MemSegment>; using MemSegmentPtr = std::shared_ptr<MemSegment>;

View File

@ -21,7 +21,7 @@
namespace milvus { namespace milvus {
namespace engine { namespace engine {
VectorSource::VectorSource(const DataChunkPtr& chunk) : chunk_(chunk) { VectorSource::VectorSource(const DataChunkPtr& chunk, idx_t op_id) : chunk_(chunk), op_id_(op_id) {
} }
Status Status

View File

@ -25,11 +25,9 @@
namespace milvus { namespace milvus {
namespace engine { namespace engine {
// TODO(zhiru): this class needs to be refactored once attributes are added
class VectorSource { class VectorSource {
public: public:
explicit VectorSource(const DataChunkPtr& chunk); explicit VectorSource(const DataChunkPtr& chunk, idx_t op_id);
Status Status
Add(const segment::SegmentWriterPtr& segment_writer_ptr, const int64_t& num_attrs_to_add, int64_t& num_attrs_added); Add(const segment::SegmentWriterPtr& segment_writer_ptr, const int64_t& num_attrs_to_add, int64_t& num_attrs_added);
@ -37,11 +35,16 @@ class VectorSource {
bool bool
AllAdded(); AllAdded();
idx_t
OperationID() const {
return op_id_;
}
private: private:
DataChunkPtr chunk_; DataChunkPtr chunk_;
idx_t op_id_ = 0;
int64_t current_num_added_ = 0; int64_t current_num_added_ = 0;
}; // SSVectorSource };
using VectorSourcePtr = std::shared_ptr<VectorSource>; using VectorSourcePtr = std::shared_ptr<VectorSource>;

View File

@ -11,6 +11,9 @@
#include "db/wal/WalManager.h" #include "db/wal/WalManager.h"
#include "db/Utils.h" #include "db/Utils.h"
#include "db/snapshot/ResourceHelper.h"
#include "db/snapshot/ResourceTypes.h"
#include "db/snapshot/Snapshots.h"
#include "db/wal/WalOperationCodec.h" #include "db/wal/WalOperationCodec.h"
#include "utils/CommonUtil.h" #include "utils/CommonUtil.h"
@ -67,13 +70,15 @@ Status
WalManager::DropCollection(const std::string& collection_name) { WalManager::DropCollection(const std::string& collection_name) {
// write a placeholder file 'del' under collection folder, let cleanup thread remove this folder // write a placeholder file 'del' under collection folder, let cleanup thread remove this folder
std::string path = ConstructFilePath(collection_name, WAL_DEL_FILE_NAME); std::string path = ConstructFilePath(collection_name, WAL_DEL_FILE_NAME);
WalFile file; if (!path.empty()) {
file.OpenFile(path, WalFile::OVER_WRITE); WalFile file;
bool del = true; file.OpenFile(path, WalFile::OVER_WRITE);
file.Write<bool>(&del); bool del = true;
file.Write<bool>(&del);
AddCleanupTask(collection_name); AddCleanupTask(collection_name);
StartCleanupThread(); StartCleanupThread();
}
return Status::OK(); return Status::OK();
} }
@ -124,9 +129,11 @@ WalManager::OperationDone(const std::string& collection_name, idx_t op_id) {
// write max op id to disk // write max op id to disk
std::string path = ConstructFilePath(collection_name, WAL_MAX_OP_FILE_NAME); std::string path = ConstructFilePath(collection_name, WAL_MAX_OP_FILE_NAME);
WalFile file; if (!path.empty()) {
file.OpenFile(path, WalFile::OVER_WRITE); WalFile file;
file.Write<idx_t>(&op_id); file.OpenFile(path, WalFile::OVER_WRITE);
file.Write<idx_t>(&op_id);
}
} }
} }
@ -256,9 +263,9 @@ WalManager::RecordInsertOperation(const InsertEntityOperationPtr& operation, con
DataChunkPtr& chunk = chunks[i]; DataChunkPtr& chunk = chunks[i];
int64_t chunk_size = utils::GetSizeOfChunk(chunk); int64_t chunk_size = utils::GetSizeOfChunk(chunk);
{ // open wal file
// open wal file std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id)); if (!path.empty()) {
std::lock_guard<std::mutex> lock(file_map_mutex_); std::lock_guard<std::mutex> lock(file_map_mutex_);
WalFilePtr file = file_map_[operation->collection_name_]; WalFilePtr file = file_map_[operation->collection_name_];
if (file == nullptr) { if (file == nullptr) {
@ -309,9 +316,9 @@ WalManager::RecordDeleteOperation(const DeleteEntityOperationPtr& operation, con
idx_t op_id = id_gen_.GetNextIDNumber(); idx_t op_id = id_gen_.GetNextIDNumber();
int64_t append_size = operation->entity_ids_.size() * sizeof(idx_t); int64_t append_size = operation->entity_ids_.size() * sizeof(idx_t);
{ // open wal file
// open wal file std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id)); if (!path.empty()) {
std::lock_guard<std::mutex> lock(file_map_mutex_); std::lock_guard<std::mutex> lock(file_map_mutex_);
WalFilePtr file = file_map_[operation->collection_name_]; WalFilePtr file = file_map_[operation->collection_name_];
if (file == nullptr) { if (file == nullptr) {
@ -339,17 +346,29 @@ WalManager::RecordDeleteOperation(const DeleteEntityOperationPtr& operation, con
std::string std::string
WalManager::ConstructFilePath(const std::string& collection_name, const std::string& file_name) { WalManager::ConstructFilePath(const std::string& collection_name, const std::string& file_name) {
std::experimental::filesystem::path full_path(wal_path_); // use snapshot to construct wal path
std::experimental::filesystem::create_directory(full_path); // typically, the wal file path is like: /xxx/xxx/wal/C_1/xxxxxxxxxx
full_path.append(collection_name); // if the snapshot not work, use collection name to construct path
std::experimental::filesystem::create_directory(full_path); snapshot::ScopedSnapshotT ss;
auto status = snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_name);
if (status.ok() && ss->GetCollection() != nullptr) {
std::string col_path = snapshot::GetResPath<snapshot::Collection>(wal_path_, ss->GetCollection());
if (!file_name.empty()) { std::experimental::filesystem::path full_path(col_path);
std::experimental::filesystem::create_directory(full_path);
full_path.append(file_name); full_path.append(file_name);
}
std::string path(full_path.c_str()); std::string path(full_path.c_str());
return path; return path;
} else {
std::experimental::filesystem::path full_path(wal_path_);
full_path.append(collection_name);
std::experimental::filesystem::create_directory(full_path);
full_path.append(file_name);
std::string path(full_path.c_str());
return path;
}
} }
void void