From afe69c46b5a7311a6f14e5aee5a25e33c997373c Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sun, 20 Oct 2019 18:02:49 +0800 Subject: [PATCH 1/4] Update faiss package Former-commit-id: c019b47b819397ebf533b47d6718190ee2dc5410 --- core/src/index/cmake/ThirdPartyPackagesCore.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index ee1d88ee32..99f52dc284 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -243,7 +243,8 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0 # set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0 # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1 - set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1 endif() else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") From 2256f607094e6a7c4f3b619d05ac20a2a7ddeb31 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sun, 20 Oct 2019 18:55:05 +0800 Subject: [PATCH 2/4] Add cost print and IVF stats Former-commit-id: 93d721bc680fb74e488cb18498a200d785f9d3c4 --- .../knowhere/knowhere/index/vector_index/IndexIVF.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 0c4856f2b6..be52039deb 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -216,7 +217,15 @@ IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const C void IVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { auto params = GenParams(cfg); + stdclock::time_point before = stdclock::now(); faiss::ivflib::search_with_parameters(index_.get(), n, (float*)data, k, distances, labels, params.get()); + stdclock::time_point after = stdclock::now(); + double search_cost = (std::chrono::duration(after - before)).count(); + KNOWHERE_LOG_DEBUG << "IVF search cost: " << search_cost + << ", quantization cost: " << faiss::indexIVF_stats.quantization_time + << ", data search cost: " << faiss::indexIVF_stats.search_time; + faiss::indexIVF_stats.quantization_time = 0; + faiss::indexIVF_stats.search_time = 0; } VectorIndexPtr From 56e21a4f5cc6dcfa9e07595cc675ab78ecc2948e Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 21 Oct 2019 11:39:15 +0800 Subject: [PATCH 3/4] fix build error Former-commit-id: c186ad9be4f183cbaa70f7dbaacf1f3e0a1df617 --- .../index/knowhere/knowhere/index/vector_index/IndexIVF.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index be52039deb..fba2e11e2e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -31,11 +31,14 @@ #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" +#include "knowhere/common/Log.h" #include "knowhere/index/vector_index/IndexGPUIVF.h" #include "knowhere/index/vector_index/IndexIVF.h" namespace knowhere { +using stdclock = std::chrono::high_resolution_clock; + IndexModelPtr IVF::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); From 18108119b2ace6f5f3175e5bb613976d2ff334f6 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 21 Oct 2019 16:46:33 +0800 Subject: [PATCH 4/4] #59 Topk result is incorrect for small dataset Former-commit-id: 4961584a889ad12cc41ff2b342eec8fa6e1eb427 --- CHANGELOG.md | 1 + core/src/scheduler/task/SearchTask.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa2116a63b..47ab1cb94e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#39 - Create SQ8H index hang if using github server version - \#30 - Some troubleshoot messages in Milvus do not provide enough information - \#48 - Config unittest failed +- \#59 - Topk result is incorrect for small dataset ## Improvement - MS-552 - Add and change the easylogging library diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index b7a1e211d2..2836d41dd4 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -253,7 +253,7 @@ XSearchTask::MergeTopkToResultSet(const std::vector& input_ids, const s if (result[i].empty()) { result_buf.resize(input_k, scheduler::IdDistPair(-1, 0.0)); - uint64_t input_k_multi_i = input_k * i; + uint64_t input_k_multi_i = topk * i; for (auto k = 0; k < input_k; ++k) { uint64_t idx = input_k_multi_i + k; auto& result_buf_item = result_buf[k]; @@ -266,7 +266,7 @@ XSearchTask::MergeTopkToResultSet(const std::vector& input_ids, const s result_buf.resize(output_k, scheduler::IdDistPair(-1, 0.0)); size_t buf_k = 0, src_k = 0, tar_k = 0; uint64_t src_idx; - uint64_t input_k_multi_i = input_k * i; + uint64_t input_k_multi_i = topk * i; while (buf_k < output_k && src_k < input_k && tar_k < tar_size) { src_idx = input_k_multi_i + src_k; auto& result_buf_item = result_buf[buf_k];