From 2f4207acb2a321548431dd21c47954e64fefb63d Mon Sep 17 00:00:00 2001 From: yukun Date: Fri, 18 Sep 2020 12:04:29 +0800 Subject: [PATCH] Improve MergeTopkToResultSet performance (#3788) * Fix WebServer README.md Signed-off-by: fishpenguin * Add changelog Signed-off-by: fishpenguin * Fix cpplint Signed-off-by: fishpenguin * Fix Readme.md Signed-off-by: fishpenguin * Fix MergeTopkToResultSet performance problem Signed-off-by: fishpenguin Signed-off-by: shengjun.li --- core/src/scheduler/task/SearchTask.cpp | 32 ++++++++++++++++---------- core/src/scheduler/task/SearchTask.h | 3 ++- core/src/server/web_impl/README.md | 14 +++++------ 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 69a5a3386b..22372be885 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -140,7 +140,13 @@ SearchTask::OnExecute() { } SearchTask::MergeTopkToResultSet(context.query_result_->result_ids_, context.query_result_->result_distances_, spec_k, nq, topk, - ascending_reduce_, search_job->query_result()); + ascending_reduce_, search_job->query_result()->result_ids_, + search_job->query_result()->result_distances_); + + LOG_ENGINE_DEBUG_ << "Merged result: " + << "nq = " << nq << ", topk = " << topk + << ", len of ids = " << context.query_result_->result_ids_.size() + << ", len of distance = " << context.query_result_->result_distances_.size(); } rc.RecordSection("reduce topk done"); @@ -155,17 +161,19 @@ SearchTask::OnExecute() { void SearchTask::MergeTopkToResultSet(const engine::ResultIds& src_ids, const engine::ResultDistances& src_distances, - size_t src_k, size_t nq, size_t topk, bool ascending, engine::QueryResultPtr& result) { + size_t src_k, size_t nq, size_t topk, bool ascending, engine::ResultIds& tar_ids, + engine::ResultDistances& tar_distances) { if (src_ids.empty()) { LOG_ENGINE_DEBUG_ << LogOut("[%s][%d] Search result is empty.", "search", 0); return; } - size_t tar_k = result->result_ids_.size() / nq; + size_t tar_k = tar_ids.size() / nq; size_t buf_k = std::min(topk, src_k + tar_k); engine::ResultIds buf_ids(nq * buf_k, -1); engine::ResultDistances buf_distances(nq * buf_k, 0.0); + for (uint64_t i = 0; i < nq; i++) { size_t buf_k_j = 0, src_k_j = 0, tar_k_j = 0; size_t buf_idx, src_idx, tar_idx; @@ -179,15 +187,15 @@ SearchTask::MergeTopkToResultSet(const engine::ResultIds& src_ids, const engine: tar_idx = tar_k_multi_i + tar_k_j; buf_idx = buf_k_multi_i + buf_k_j; - if ((result->result_ids_[tar_idx] == -1) || // initialized value - (ascending && src_distances[src_idx] < result->result_distances_[tar_idx]) || - (!ascending && src_distances[src_idx] > result->result_distances_[tar_idx])) { + if ((tar_ids[tar_idx] == -1) || // initialized value + (ascending && src_distances[src_idx] < tar_distances[tar_idx]) || + (!ascending && src_distances[src_idx] > tar_distances[tar_idx])) { buf_ids[buf_idx] = src_ids[src_idx]; buf_distances[buf_idx] = src_distances[src_idx]; src_k_j++; } else { - buf_ids[buf_idx] = result->result_ids_[tar_idx]; - buf_distances[buf_idx] = result->result_distances_[tar_idx]; + buf_ids[buf_idx] = tar_ids[tar_idx]; + buf_distances[buf_idx] = tar_distances[tar_idx]; tar_k_j++; } buf_k_j++; @@ -207,16 +215,16 @@ SearchTask::MergeTopkToResultSet(const engine::ResultIds& src_ids, const engine: while (buf_k_j < buf_k && tar_k_j < tar_k) { buf_idx = buf_k_multi_i + buf_k_j; tar_idx = tar_k_multi_i + tar_k_j; - buf_ids[buf_idx] = result->result_ids_[tar_idx]; - buf_distances[buf_idx] = result->result_distances_[tar_idx]; + buf_ids[buf_idx] = tar_ids[tar_idx]; + buf_distances[buf_idx] = tar_distances[tar_idx]; tar_k_j++; buf_k_j++; } } } } - result->result_ids_.swap(buf_ids); - result->result_distances_.swap(buf_distances); + tar_ids.swap(buf_ids); + tar_distances.swap(buf_distances); } int64_t diff --git a/core/src/scheduler/task/SearchTask.h b/core/src/scheduler/task/SearchTask.h index bb846e231a..a4b211d8d6 100644 --- a/core/src/scheduler/task/SearchTask.h +++ b/core/src/scheduler/task/SearchTask.h @@ -47,7 +47,8 @@ class SearchTask : public Task { static void MergeTopkToResultSet(const engine::ResultIds& src_ids, const engine::ResultDistances& src_distances, size_t src_k, - size_t nq, size_t topk, bool ascending, engine::QueryResultPtr& result); + size_t nq, size_t topk, bool ascending, engine::ResultIds& tar_ids, + engine::ResultDistances& tar_distances); int64_t nq(); diff --git a/core/src/server/web_impl/README.md b/core/src/server/web_impl/README.md index d55fda0416..bc17f54bcf 100644 --- a/core/src/server/web_impl/README.md +++ b/core/src/server/web_impl/README.md @@ -18,9 +18,9 @@ - [`/collections/{collection_name}` (GET)](#collectionscollection_name-get) - [`/collections/{collection_name}` (DELETE)](#collectionscollection_name-delete) - [`/collections/{collection_name}` (OPTIONS)](#collectionscollection_name-options) - - [`/collections/{collection_name}/fields/{field_name}/indexes` (POST)](#collectionscollection_nameindexes-post) - - [`/collections/{collection_name}/fields/{field_name}/indexes` (DELETE)](#collectionscollection_nameindexes-delete) - - [`/collections/{collection_name}/fields/{field_name}/indexes` (OPTIONS)](#collectionscollection_nameindexes-options) + - [`/collections/{collection_name}/fields/{field_name}/indexes` (POST)](#collectionscollection_namefieldsfield_nameindexes-post) + - [`/collections/{collection_name}/fields/{field_name}/indexes` (DELETE)](#collectionscollection_namefieldsfield_nameindexes-delete) + - [`/collections/{collection_name}/fields/{field_name}/indexes` (OPTIONS)](#collectionscollection_namefieldsfield_nameindexes-options) - [`/collections/{collection_name}/partitions` (GET)](#collectionscollection_namepartitions-get) - [`/collections/{collection_name}/partitions` (POST)](#collectionscollection_namepartitions-post) - [`/collections/{collection_name}/partitions` (OPTIONS)](#collectionscollection_namepartitions-options) @@ -29,10 +29,10 @@ - [`/collections/{collection_name}/entities` (PUT)](#collectionscollection_nameentities-put) - [`/collections/{collection_name}/entities` (POST)](#collectionscollection_nameentities-post) - [`/collections/{collection_name}/entities` (DELETE)](#collectionscollection_nameentities-delete) - - [`/collections/{collection_name}/entities` (GET)](#collectionscollection_namevectorsidentitiy_id-get) + - [`/collections/{collection_name}/entities` (GET)](#collectionscollection_nameentities_id-get) - [`/collections/{collection_name}/entities` (OPTIONS)](#collectionscollection_nameentities-options) - [`/system/{msg}` (GET)](#systemmsg-get) - - [`system/{op}` (PUT)](#systemop-put) + - [`/system/{op}` (PUT)](#systemop-put) - [Error Codes](#error-codes) @@ -730,7 +730,7 @@ $ curl -X DELETE "http://127.0.0.1:19121/collections/test_collection/indexes" -H If the deletion is successful, no message will be returned. -### `/collections/{collection_name}/indexes` (OPTIONS) +### `/collections/{collection_name}/fields/{field_name}/indexes` (OPTIONS) Use this API for Cross-Origin Resource Sharing (CORS). @@ -738,7 +738,7 @@ Use this API for Cross-Origin Resource Sharing (CORS). | Request Component | Value | | ----------------- | ---------------------------------------- | -| Name | `/collections/{collection_name}/fields/{field_name}/indexes/{index_name}` | +| Name | `/collections/{collection_name}/fields/{field_name}/indexes` | | Header | N/A | | Body | N/A | | Method | OPTIONS |