prepare change memmanager for wal (#3376)

Signed-off-by: groot <yihua.mo@zilliz.com>
pull/3380/head
groot 2020-08-21 11:14:29 +08:00 committed by GitHub
parent 15ee27f359
commit 3d40a3886f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 80 additions and 93 deletions

View File

@ -206,7 +206,8 @@ DBImpl::DropCollection(const std::string& collection_name) {
auto& snapshots = snapshot::Snapshots::GetInstance();
STATUS_CHECK(snapshots.GetSnapshot(ss, collection_name));
mem_mgr_->EraseMem(ss->GetCollectionId()); // not allow insert
// erase insert buffer of this collection
mem_mgr_->EraseMem(ss->GetCollectionId());
return snapshots.DropCollection(ss->GetCollectionId(), std::numeric_limits<snapshot::LSN_TYPE>::max());
}
@ -291,8 +292,11 @@ DBImpl::DropPartition(const std::string& collection_name, const std::string& par
snapshot::ScopedSnapshotT ss;
STATUS_CHECK(snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_name));
// SS TODO: Is below step needed? Or How to implement it?
/* mem_mgr_->EraseMem(partition_name); */
// erase insert buffer of this partition
auto partition = ss->GetPartition(partition_name);
if (partition != nullptr) {
mem_mgr_->EraseMem(ss->GetCollectionId(), partition->GetID());
}
snapshot::PartitionContext context;
context.name = partition_name;

View File

@ -106,7 +106,7 @@ MemCollection::EraseMem(int64_t partition_id) {
}
Status
MemCollection::Serialize(uint64_t wal_lsn) {
MemCollection::Serialize() {
TimeRecorder recorder("MemCollection::Serialize collection " + std::to_string(collection_id_));
if (!doc_ids_to_delete_.empty()) {
@ -132,7 +132,7 @@ MemCollection::Serialize(uint64_t wal_lsn) {
for (auto& partition_segments : mem_segments_) {
MemSegmentList& segments = partition_segments.second;
for (auto& segment : segments) {
auto status = segment->Serialize(wal_lsn);
auto status = segment->Serialize();
if (!status.ok()) {
return status;
}
@ -171,7 +171,6 @@ MemCollection::ApplyDeletes() {
STATUS_CHECK(snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_id_));
snapshot::OperationContext context;
context.lsn = lsn_;
auto segments_op = std::make_shared<snapshot::CompoundSegmentsOperation>(context, ss);
int64_t segment_iterated = 0;
@ -308,15 +307,5 @@ MemCollection::ApplyDeletes() {
return segments_op->Push();
}
uint64_t
MemCollection::GetLSN() {
return lsn_;
}
void
MemCollection::SetLSN(uint64_t lsn) {
lsn_ = lsn;
}
} // namespace engine
} // namespace milvus

View File

@ -46,7 +46,7 @@ class MemCollection {
EraseMem(int64_t partition_id);
Status
Serialize(uint64_t wal_lsn);
Serialize();
int64_t
GetCollectionId() const;
@ -54,12 +54,6 @@ class MemCollection {
size_t
GetCurrentMem();
uint64_t
GetLSN();
void
SetLSN(uint64_t lsn);
private:
Status
ApplyDeletes();
@ -74,9 +68,7 @@ class MemCollection {
std::mutex mutex_;
std::set<idx_t> doc_ids_to_delete_;
std::atomic<uint64_t> lsn_;
}; // SSMemCollection
};
using MemCollectionPtr = std::shared_ptr<MemCollection>;

View File

@ -27,10 +27,10 @@ namespace engine {
class MemManager {
public:
virtual Status
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) = 0;
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, idx_t op_id) = 0;
virtual Status
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) = 0;
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, idx_t op_id) = 0;
virtual Status
Flush(int64_t collection_id) = 0;
@ -52,7 +52,7 @@ class MemManager {
virtual size_t
GetCurrentMem() = 0;
}; // MemManagerAbstract
};
using MemManagerPtr = std::shared_ptr<MemManager>;

View File

@ -36,15 +36,15 @@ MemManagerImpl::GetMemByCollection(int64_t collection_id) {
}
Status
MemManagerImpl::InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) {
MemManagerImpl::InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, idx_t op_id) {
auto status = ValidateChunk(collection_id, chunk);
if (!status.ok()) {
return status;
}
VectorSourcePtr source = std::make_shared<VectorSource>(chunk);
VectorSourcePtr source = std::make_shared<VectorSource>(chunk, op_id);
std::unique_lock<std::mutex> lock(mutex_);
return InsertEntitiesNoLock(collection_id, partition_id, source, lsn);
return InsertEntitiesNoLock(collection_id, partition_id, source);
}
Status
@ -141,21 +141,18 @@ MemManagerImpl::ValidateChunk(int64_t collection_id, const DataChunkPtr& chunk)
Status
MemManagerImpl::InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id,
const milvus::engine::VectorSourcePtr& source, uint64_t lsn) {
const milvus::engine::VectorSourcePtr& source) {
MemCollectionPtr mem = GetMemByCollection(collection_id);
mem->SetLSN(lsn);
auto status = mem->Add(partition_id, source);
return status;
}
Status
MemManagerImpl::DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) {
MemManagerImpl::DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, idx_t op_id) {
std::unique_lock<std::mutex> lock(mutex_);
MemCollectionPtr mem = GetMemByCollection(collection_id);
mem->SetLSN(lsn);
auto status = mem->Delete(entity_ids);
if (!status.ok()) {
return status;
@ -188,10 +185,9 @@ MemManagerImpl::InternalFlush(std::set<int64_t>& collection_ids) {
}
std::unique_lock<std::mutex> lock(serialization_mtx_);
auto max_lsn = GetMaxLSN(temp_immutable_list);
for (auto& mem : temp_immutable_list) {
LOG_ENGINE_DEBUG_ << "Flushing collection: " << mem->GetCollectionId();
auto status = mem->Serialize(max_lsn);
auto status = mem->Serialize();
if (!status.ok()) {
LOG_ENGINE_ERROR_ << "Flush collection " << mem->GetCollectionId() << " failed";
return status;
@ -294,17 +290,5 @@ MemManagerImpl::GetCurrentMem() {
return GetCurrentMutableMem() + GetCurrentImmutableMem();
}
uint64_t
MemManagerImpl::GetMaxLSN(const MemList& collections) {
uint64_t max_lsn = 0;
for (auto& collection : collections) {
auto cur_lsn = collection->GetLSN();
if (collection->GetLSN() > max_lsn) {
max_lsn = cur_lsn;
}
}
return max_lsn;
}
} // namespace engine
} // namespace milvus

View File

@ -39,10 +39,10 @@ class MemManagerImpl : public MemManager {
~MemManagerImpl() = default;
Status
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, uint64_t lsn) override;
InsertEntities(int64_t collection_id, int64_t partition_id, const DataChunkPtr& chunk, idx_t op_id) override;
Status
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, uint64_t lsn) override;
DeleteEntities(int64_t collection_id, const std::vector<idx_t>& entity_ids, idx_t op_id) override;
Status
Flush(int64_t collection_id) override;
@ -73,7 +73,7 @@ class MemManagerImpl : public MemManager {
ValidateChunk(int64_t collection_id, const DataChunkPtr& chunk);
Status
InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id, const VectorSourcePtr& source, uint64_t lsn);
InsertEntitiesNoLock(int64_t collection_id, int64_t partition_id, const VectorSourcePtr& source);
Status
ToImmutable();
@ -81,9 +81,6 @@ class MemManagerImpl : public MemManager {
Status
ToImmutable(int64_t collection_id);
uint64_t
GetMaxLSN(const MemList& collections);
Status
InternalFlush(std::set<int64_t>& collection_ids);
@ -94,7 +91,7 @@ class MemManagerImpl : public MemManager {
DBOptions options_;
std::mutex mutex_;
std::mutex serialization_mtx_;
}; // NewMemManager
};
} // namespace engine
} // namespace milvus

View File

@ -242,7 +242,7 @@ MemSegment::IsFull() {
}
Status
MemSegment::Serialize(uint64_t wal_lsn) {
MemSegment::Serialize() {
int64_t size = GetCurrentMem();
server::CollectSerializeMetrics metrics(size);
@ -260,7 +260,7 @@ MemSegment::Serialize(uint64_t wal_lsn) {
STATUS_CHECK(operation_->CommitRowCount(segment_writer_ptr_->RowCount()));
STATUS_CHECK(operation_->Push());
LOG_ENGINE_DEBUG_ << "New segment " << segment_->GetID() << " serialized, lsn = " << wal_lsn;
LOG_ENGINE_DEBUG_ << "New segment " << segment_->GetID() << " serialized";
return Status::OK();
}

View File

@ -51,7 +51,7 @@ class MemSegment {
IsFull();
Status
Serialize(uint64_t wal_lsn);
Serialize();
int64_t
GetSegmentId() const;
@ -69,9 +69,8 @@ class MemSegment {
DBOptions options_;
int64_t current_mem_;
// ExecutionEnginePtr execution_engine_;
segment::SegmentWriterPtr segment_writer_ptr_;
}; // SSMemTableFile
};
using MemSegmentPtr = std::shared_ptr<MemSegment>;

View File

@ -21,7 +21,7 @@
namespace milvus {
namespace engine {
VectorSource::VectorSource(const DataChunkPtr& chunk) : chunk_(chunk) {
VectorSource::VectorSource(const DataChunkPtr& chunk, idx_t op_id) : chunk_(chunk), op_id_(op_id) {
}
Status

View File

@ -25,11 +25,9 @@
namespace milvus {
namespace engine {
// TODO(zhiru): this class needs to be refactored once attributes are added
class VectorSource {
public:
explicit VectorSource(const DataChunkPtr& chunk);
explicit VectorSource(const DataChunkPtr& chunk, idx_t op_id);
Status
Add(const segment::SegmentWriterPtr& segment_writer_ptr, const int64_t& num_attrs_to_add, int64_t& num_attrs_added);
@ -37,11 +35,16 @@ class VectorSource {
bool
AllAdded();
idx_t
OperationID() const {
return op_id_;
}
private:
DataChunkPtr chunk_;
idx_t op_id_ = 0;
int64_t current_num_added_ = 0;
}; // SSVectorSource
};
using VectorSourcePtr = std::shared_ptr<VectorSource>;

View File

@ -11,6 +11,9 @@
#include "db/wal/WalManager.h"
#include "db/Utils.h"
#include "db/snapshot/ResourceHelper.h"
#include "db/snapshot/ResourceTypes.h"
#include "db/snapshot/Snapshots.h"
#include "db/wal/WalOperationCodec.h"
#include "utils/CommonUtil.h"
@ -67,13 +70,15 @@ Status
WalManager::DropCollection(const std::string& collection_name) {
// write a placeholder file 'del' under collection folder, let cleanup thread remove this folder
std::string path = ConstructFilePath(collection_name, WAL_DEL_FILE_NAME);
WalFile file;
file.OpenFile(path, WalFile::OVER_WRITE);
bool del = true;
file.Write<bool>(&del);
if (!path.empty()) {
WalFile file;
file.OpenFile(path, WalFile::OVER_WRITE);
bool del = true;
file.Write<bool>(&del);
AddCleanupTask(collection_name);
StartCleanupThread();
AddCleanupTask(collection_name);
StartCleanupThread();
}
return Status::OK();
}
@ -124,9 +129,11 @@ WalManager::OperationDone(const std::string& collection_name, idx_t op_id) {
// write max op id to disk
std::string path = ConstructFilePath(collection_name, WAL_MAX_OP_FILE_NAME);
WalFile file;
file.OpenFile(path, WalFile::OVER_WRITE);
file.Write<idx_t>(&op_id);
if (!path.empty()) {
WalFile file;
file.OpenFile(path, WalFile::OVER_WRITE);
file.Write<idx_t>(&op_id);
}
}
}
@ -256,9 +263,9 @@ WalManager::RecordInsertOperation(const InsertEntityOperationPtr& operation, con
DataChunkPtr& chunk = chunks[i];
int64_t chunk_size = utils::GetSizeOfChunk(chunk);
{
// open wal file
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
// open wal file
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
if (!path.empty()) {
std::lock_guard<std::mutex> lock(file_map_mutex_);
WalFilePtr file = file_map_[operation->collection_name_];
if (file == nullptr) {
@ -309,9 +316,9 @@ WalManager::RecordDeleteOperation(const DeleteEntityOperationPtr& operation, con
idx_t op_id = id_gen_.GetNextIDNumber();
int64_t append_size = operation->entity_ids_.size() * sizeof(idx_t);
{
// open wal file
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
// open wal file
std::string path = ConstructFilePath(operation->collection_name_, std::to_string(op_id));
if (!path.empty()) {
std::lock_guard<std::mutex> lock(file_map_mutex_);
WalFilePtr file = file_map_[operation->collection_name_];
if (file == nullptr) {
@ -339,17 +346,29 @@ WalManager::RecordDeleteOperation(const DeleteEntityOperationPtr& operation, con
std::string
WalManager::ConstructFilePath(const std::string& collection_name, const std::string& file_name) {
std::experimental::filesystem::path full_path(wal_path_);
std::experimental::filesystem::create_directory(full_path);
full_path.append(collection_name);
std::experimental::filesystem::create_directory(full_path);
// use snapshot to construct wal path
// typically, the wal file path is like: /xxx/xxx/wal/C_1/xxxxxxxxxx
// if the snapshot not work, use collection name to construct path
snapshot::ScopedSnapshotT ss;
auto status = snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_name);
if (status.ok() && ss->GetCollection() != nullptr) {
std::string col_path = snapshot::GetResPath<snapshot::Collection>(wal_path_, ss->GetCollection());
if (!file_name.empty()) {
std::experimental::filesystem::path full_path(col_path);
std::experimental::filesystem::create_directory(full_path);
full_path.append(file_name);
}
std::string path(full_path.c_str());
return path;
std::string path(full_path.c_str());
return path;
} else {
std::experimental::filesystem::path full_path(wal_path_);
full_path.append(collection_name);
std::experimental::filesystem::create_directory(full_path);
full_path.append(file_name);
std::string path(full_path.c_str());
return path;
}
}
void

View File

@ -436,4 +436,4 @@ TEST_F(WalTest, WalManagerTest) {
WalManager::GetInstance().Recovery(db_2);
ASSERT_EQ(db_2->InsertCount(), insert_count);
ASSERT_EQ(db_2->DeleteCount(), delete_count);
}
}