mirror of https://github.com/milvus-io/milvus.git
MS-412 Fix gpu cache logical error
Former-commit-id: 6bd2a056feee54393fa4bc16b1b233f54dac0500pull/191/head
parent
4023e110c0
commit
662320c07d
|
@ -5,6 +5,8 @@ Please mark all change in change log and use the ticket from JIRA.
|
|||
# Milvus 0.4.0 (2019-07-28)
|
||||
|
||||
## Bug
|
||||
- MS-411 - Fix metric unittest linking error
|
||||
- MS-412 - Fix gpu cache logical error
|
||||
|
||||
## Improvement
|
||||
- MS-327 - Clean code for milvus
|
||||
|
@ -80,7 +82,6 @@ Please mark all change in change log and use the ticket from JIRA.
|
|||
- MS-330 - Stability test failed caused by server core dumped
|
||||
- MS-347 - Build index hangs again
|
||||
- MS-382 - fix MySQLMetaImpl::CleanUpFilesWithTTL unknown column bug
|
||||
- MS-411 - Fix metric unittest linking error
|
||||
|
||||
## Improvement
|
||||
- MS-156 - Add unittest for merge result functions
|
||||
|
|
|
@ -65,21 +65,21 @@ resource_config:
|
|||
memory: 64
|
||||
device_id: 0
|
||||
enable_loader: true
|
||||
enable_executor: true
|
||||
enable_executor: false
|
||||
|
||||
gtx1060:
|
||||
type: GPU
|
||||
memory: 6
|
||||
device_id: 0
|
||||
enable_loader: false
|
||||
enable_executor: false
|
||||
enable_loader: true
|
||||
enable_executor: true
|
||||
|
||||
gtx1660:
|
||||
type: GPU
|
||||
memory: 6
|
||||
device_id: 1
|
||||
enable_loader: false
|
||||
enable_executor: false
|
||||
enable_loader: true
|
||||
enable_executor: true
|
||||
|
||||
# connection list, length: 0~N
|
||||
# format: -${resource_name}===${resource_name}
|
||||
|
|
|
@ -139,9 +139,11 @@ Status ExecutionEngineImpl::Load(bool to_cache) {
|
|||
}
|
||||
|
||||
Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) {
|
||||
index_ = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_);
|
||||
bool already_in_cache = (index_ != nullptr);
|
||||
if (!index_) {
|
||||
auto index = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_);
|
||||
bool already_in_cache = (index != nullptr);
|
||||
if (already_in_cache) {
|
||||
index_ = index;
|
||||
} else {
|
||||
try {
|
||||
index_ = index_->CopyToGpu(device_id);
|
||||
ENGINE_LOG_DEBUG << "CPU to GPU" << device_id;
|
||||
|
@ -161,9 +163,11 @@ Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) {
|
|||
}
|
||||
|
||||
Status ExecutionEngineImpl::CopyToCpu() {
|
||||
index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
|
||||
bool already_in_cache = (index_ != nullptr);
|
||||
if (!index_) {
|
||||
auto index = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
|
||||
bool already_in_cache = (index != nullptr);
|
||||
if (already_in_cache) {
|
||||
index_ = index;
|
||||
} else {
|
||||
try {
|
||||
index_ = index_->CopyToCpu();
|
||||
ENGINE_LOG_DEBUG << "GPU to CPU";
|
||||
|
@ -175,7 +179,7 @@ Status ExecutionEngineImpl::CopyToCpu() {
|
|||
}
|
||||
}
|
||||
|
||||
if(!already_in_cache) {
|
||||
if (!already_in_cache) {
|
||||
Cache();
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -276,7 +280,7 @@ Status ExecutionEngineImpl::Init() {
|
|||
using namespace zilliz::milvus::server;
|
||||
ServerConfig &config = ServerConfig::GetInstance();
|
||||
ConfigNode server_config = config.GetConfig(CONFIG_SERVER);
|
||||
gpu_num_ = server_config.GetInt32Value("gpu_index", 0);
|
||||
gpu_num_ = server_config.GetInt32Value("gpu_index", 0);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
******************************************************************************/
|
||||
|
||||
#include <list>
|
||||
#include <random>
|
||||
#include "Action.h"
|
||||
|
||||
|
||||
|
@ -38,6 +39,22 @@ push_task_round_robin(TaskTable &self_task_table, std::list<ResourcePtr> &neighb
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
push_task_randomly(TaskTable &self_task_table, std::vector<ResourcePtr> &neighbours) {
|
||||
std::random_device rd;
|
||||
std::mt19937 mt(rd());
|
||||
std::uniform_int_distribution<uint64_t> dist(0, neighbours.size() - 1);
|
||||
CacheMgr cache;
|
||||
|
||||
auto indexes = PickToMove(self_task_table, cache, self_task_table.Size());
|
||||
for (auto index : indexes) {
|
||||
if (self_task_table.Move(index)) {
|
||||
auto task = self_task_table.Get(index)->task;
|
||||
neighbours[dist(mt)]->task_table().Put(task);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Action::PushTaskToNeighbour(const ResourceWPtr &res) {
|
||||
auto self = res.lock();
|
||||
|
@ -60,18 +77,21 @@ Action::PushTaskToNeighbourHasExecutor(const ResourceWPtr &res) {
|
|||
auto self = res.lock();
|
||||
if (not self) return;
|
||||
|
||||
std::list<ResourcePtr> neighbours;
|
||||
std::list<ResourcePtr> l_neighbours;
|
||||
std::vector<ResourcePtr> v_neighbours;
|
||||
for (auto &neighbour_node : self->GetNeighbours()) {
|
||||
auto node = neighbour_node.neighbour_node.lock();
|
||||
if (not node) continue;
|
||||
|
||||
auto resource = std::static_pointer_cast<Resource>(node);
|
||||
if (resource->HasExecutor()) {
|
||||
neighbours.emplace_back(resource);
|
||||
l_neighbours.push_back(resource);
|
||||
v_neighbours.push_back(resource);
|
||||
}
|
||||
}
|
||||
|
||||
push_task_round_robin(self->task_table(), neighbours);
|
||||
// push_task_round_robin(self->task_table(), l_neighbours);
|
||||
push_task_randomly(self->task_table(), v_neighbours);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue