diff --git a/CHANGELOG.md b/CHANGELOG.md index eb4fba56c3..cfcea678dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#310 - Add Q&A for 'protocol https not supported or disable in libcurl' issue - \#322 - Add option to enable / disable prometheus - \#358 - Add more information in build.sh and install.md +- \#255 - Add ivfsq8 test report detailed version ## Task diff --git a/core/src/index/CMakeLists.txt b/core/src/index/CMakeLists.txt index 4b5c1b1de3..53453d53aa 100644 --- a/core/src/index/CMakeLists.txt +++ b/core/src/index/CMakeLists.txt @@ -88,14 +88,14 @@ endif () include(ThirdPartyPackagesCore) if (CMAKE_BUILD_TYPE STREQUAL "Release") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -DELPP_THREAD_SAFE -fopenmp") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -DELPP_THREAD_SAFE -fopenmp -mavx -mf16c -msse4 -mpopcnt") if (KNOWHERE_GPU_VERSION) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O3") endif () else () - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fPIC -DELPP_THREAD_SAFE -fopenmp") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -fPIC -DELPP_THREAD_SAFE -fopenmp -mavx -mf16c -msse4 -mpopcnt") if (KNOWHERE_GPU_VERSION) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O0 -g") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O3 -g") endif () endif () diff --git a/core/src/index/knowhere/knowhere/common/Config.h b/core/src/index/knowhere/knowhere/common/Config.h index 6191ecb771..48c2de8b1b 100644 --- a/core/src/index/knowhere/knowhere/common/Config.h +++ b/core/src/index/knowhere/knowhere/common/Config.h @@ -18,6 +18,8 @@ #pragma once #include +#include +#include "Log.h" namespace knowhere { @@ -50,6 +52,18 @@ struct Cfg { CheckValid() { return true; } + + void + Dump() { + KNOWHERE_LOG_DEBUG << DumpImpl().str(); + } + + virtual std::stringstream + DumpImpl() { + std::stringstream ss; + ss << "dim: " << d << ", metric: " << int(metric_type) << ", gpuid: " << gpu_id << ", k: " << k; + return ss; + } }; using Config = std::shared_ptr; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp index 20f3388174..6e9ee55658 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp @@ -34,4 +34,26 @@ GetMetricType(METRICTYPE& type) { KNOWHERE_THROW_MSG("Metric type is invalid"); } +std::stringstream +IVFCfg::DumpImpl() { + auto ss = Cfg::DumpImpl(); + ss << ", nlist: " << nlist << ", nprobe: " << nprobe; + return ss; +} + +std::stringstream +IVFSQCfg::DumpImpl() { + auto ss = IVFCfg::DumpImpl(); + ss << ", nbits: " << nbits; + return ss; +} + +std::stringstream +NSGCfg::DumpImpl() { + auto ss = IVFCfg::DumpImpl(); + ss << ", knng: " << knng << ", search_length: " << search_length << ", out_degree: " << out_degree + << ", candidate: " << candidate_pool_size; + return ss; +} + } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h index b2854abef8..b931790b04 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h @@ -53,6 +53,9 @@ struct IVFCfg : public Cfg { IVFCfg() = default; + std::stringstream + DumpImpl() override; + bool CheckValid() override { return true; @@ -69,6 +72,9 @@ struct IVFSQCfg : public IVFCfg { : IVFCfg(dim, k, gpu_id, nlist, nprobe, type), nbits(nbits) { } + std::stringstream + DumpImpl() override; + IVFSQCfg() = default; bool @@ -119,6 +125,9 @@ struct NSGCfg : public IVFCfg { NSGCfg() = default; + std::stringstream + DumpImpl() override; + bool CheckValid() override { return true; diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index 461745f1fd..0ab38d3394 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -47,7 +47,8 @@ ConfAdapter::Match(const TempMetaConf& metaconf) { auto conf = std::make_shared(); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; - conf->gpu_id = conf->gpu_id; + conf->gpu_id = metaconf.gpu_id; + conf->k = metaconf.k; MatchBase(conf); return conf; } @@ -65,7 +66,7 @@ IVFConfAdapter::Match(const TempMetaConf& metaconf) { conf->nlist = MatchNlist(metaconf.size, metaconf.nlist); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; - conf->gpu_id = conf->gpu_id; + conf->gpu_id = metaconf.gpu_id; MatchBase(conf); return conf; } @@ -114,7 +115,7 @@ IVFSQConfAdapter::Match(const TempMetaConf& metaconf) { conf->nlist = MatchNlist(metaconf.size, metaconf.nlist); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; - conf->gpu_id = conf->gpu_id; + conf->gpu_id = metaconf.gpu_id; conf->nbits = 8; MatchBase(conf); return conf; @@ -126,7 +127,7 @@ IVFPQConfAdapter::Match(const TempMetaConf& metaconf) { conf->nlist = MatchNlist(metaconf.size, metaconf.nlist); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; - conf->gpu_id = conf->gpu_id; + conf->gpu_id = metaconf.gpu_id; conf->nbits = 8; if (!(conf->d % 4)) @@ -175,21 +176,17 @@ NSGConfAdapter::Match(const TempMetaConf& metaconf) { conf->nlist = MatchNlist(metaconf.size, metaconf.nlist); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; - conf->gpu_id = conf->gpu_id; + conf->gpu_id = metaconf.gpu_id; + conf->k = metaconf.k; - double factor = metaconf.size / TYPICAL_COUNT; auto scale_factor = round(metaconf.dim / 128.0); scale_factor = scale_factor >= 4 ? 4 : scale_factor; - conf->nprobe = conf->nlist > 10000 ? conf->nlist * 0.02 : conf->nlist * 0.1; - conf->knng = (100 + 100 * scale_factor) * factor; - conf->search_length = (40 + 5 * scale_factor) * factor; - conf->out_degree = (50 + 5 * scale_factor) * factor; - conf->candidate_pool_size = (200 + 100 * scale_factor) * factor; + conf->nprobe = int64_t(conf->nlist * 0.01); + conf->knng = 40 + 10 * scale_factor; // the size of knng + conf->search_length = 40 + 5 * scale_factor; + conf->out_degree = 50 + 5 * scale_factor; + conf->candidate_pool_size = 200 + 100 * scale_factor; MatchBase(conf); - - // WRAPPER_LOG_DEBUG << "nlist: " << conf->nlist - // << ", gpu_id: " << conf->gpu_id << ", d: " << conf->d - // << ", nprobe: " << conf->nprobe << ", knng: " << conf->knng; return conf; } diff --git a/core/src/wrapper/ConfAdapter.h b/core/src/wrapper/ConfAdapter.h index 5ec3d52486..85637a4969 100644 --- a/core/src/wrapper/ConfAdapter.h +++ b/core/src/wrapper/ConfAdapter.h @@ -46,9 +46,6 @@ class ConfAdapter { virtual knowhere::Config MatchSearch(const TempMetaConf& metaconf, const IndexType& type); - // virtual void - // Dump(){} - protected: static void MatchBase(knowhere::Config conf); diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index 025601a1cd..a07fafc7b8 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -56,10 +56,6 @@ class KnowhereWrapperTest index_ = GetVecIndexFactory(index_type); conf = ParamGenerator::GetInstance().GenBuild(index_type, tempconf); searchconf = ParamGenerator::GetInstance().GenSearchConf(index_type, tempconf); - -// conf->k = k; -// conf->d = dim; -// conf->gpu_id = DEVICEID; } void TearDown() override { @@ -97,6 +93,7 @@ INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, TEST_P(KnowhereWrapperTest, BASE_TEST) { EXPECT_EQ(index_->GetType(), index_type); + // conf->Dump(); auto elems = nq * k; std::vector res_ids(elems); @@ -191,3 +188,119 @@ TEST(whatever, test_config) { auto pq_conf = std::make_shared(); pq_conf->Match(conf); } + +// #include "knowhere/index/vector_index/IndexIDMAP.h" +// #include "src/wrapper/VecImpl.h" +// #include "src/index/unittest/utils.h" +// The two case below prove NSG is concern with data distribution +// Further work: 1. Use right basedata and pass it by milvus +// a. batch size is 100000 [Pass] +// b. transfer all at once [Pass] +// 2. Use SIFT1M in test and check time cost [] +// TEST_P(KnowhereWrapperTest, nsgwithidmap) { +// auto idmap = GetVecIndexFactory(milvus::engine::IndexType::FAISS_IDMAP); +// auto ori_xb = xb; +// auto ori_ids = ids; +// std::vector temp_xb; +// std::vector temp_ids; +// nb = 50000; +// for (int i = 0; i < 20; ++i) { +// GenData(dim, nb, nq, xb, xq, ids, k, gt_ids, gt_dis); +// assert(xb.size() == nb*dim); +// //#define IDMAP +// #ifdef IDMAP +// temp_xb.insert(temp_xb.end(), xb.data(), xb.data() + nb*dim); +// temp_ids.insert(temp_ids.end(), ori_ids.data()+nb*i, ori_ids.data() + nb*(i+1)); +// if (i == 0) { +// idmap->BuildAll(nb, temp_xb.data(), temp_ids.data(), conf); +// } else { +// idmap->Add(nb, temp_xb.data(), temp_ids.data()); +// } +// temp_xb.clear(); +// temp_ids.clear(); +// #else +// temp_xb.insert(temp_xb.end(), xb.data(), xb.data() + nb*dim); +// temp_ids.insert(temp_ids.end(), ori_ids.data()+nb*i, ori_ids.data() + nb*(i+1)); +// #endif +// } + +// #ifdef IDMAP +// auto idmap_idx = std::dynamic_pointer_cast(idmap); +// auto x = idmap_idx->Count(); +// index_->BuildAll(idmap_idx->Count(), idmap_idx->GetRawVectors(), idmap_idx->GetRawIds(), conf); +// #else +// assert(temp_xb.size() == 1000000*128); +// index_->BuildAll(1000000, temp_xb.data(), ori_ids.data(), conf); +// #endif +// } + +// TEST_P(KnowhereWrapperTest, nsgwithsidmap) { +// auto idmap = GetVecIndexFactory(milvus::engine::IndexType::FAISS_IDMAP); +// auto ori_xb = xb; +// std::vector temp_xb; +// std::vector temp_ids; +// nb = 50000; +// for (int i = 0; i < 20; ++i) { +// #define IDMAP +// #ifdef IDMAP +// temp_xb.insert(temp_xb.end(), ori_xb.data()+nb*dim*i, ori_xb.data() + nb*dim*(i+1)); +// temp_ids.insert(temp_ids.end(), ids.data()+nb*i, ids.data() + nb*(i+1)); +// if (i == 0) { +// idmap->BuildAll(nb, temp_xb.data(), temp_ids.data(), conf); +// } else { +// idmap->Add(nb, temp_xb.data(), temp_ids.data()); +// } +// temp_xb.clear(); +// temp_ids.clear(); +// #else +// temp_xb.insert(temp_xb.end(), ori_xb.data()+nb*dim*i, ori_xb.data() + nb*dim*(i+1)); +// temp_ids.insert(temp_ids.end(), ids.data()+nb*i, ids.data() + nb*(i+1)); +// #endif +// } + +// #ifdef IDMAP +// auto idmap_idx = std::dynamic_pointer_cast(idmap); +// auto x = idmap_idx->Count(); +// index_->BuildAll(idmap_idx->Count(), idmap_idx->GetRawVectors(), idmap_idx->GetRawIds(), conf); +// #else +// index_->BuildAll(1000000, temp_xb.data(), temp_ids.data(), conf); +// #endif + +// // The code use to store raw base data +// FileIOWriter writer("/tmp/newraw"); +// ori_xb.shrink_to_fit(); +// std::cout << "size" << ori_xb.size(); +// writer(static_cast(ori_xb.data()), ori_xb.size()* sizeof(float)); +// std::cout << "Finish!" << std::endl; +// } + +// void load_data(char* filename, float*& data, unsigned& num, +// unsigned& dim) { // load data with sift10K pattern +// std::ifstream in(filename, std::ios::binary); +// if (!in.is_open()) { +// std::cout << "open file error" << std::endl; +// exit(-1); +// } +// in.read((char*)&dim, 4); +// in.seekg(0, std::ios::end); +// std::ios::pos_type ss = in.tellg(); +// size_t fsize = (size_t)ss; +// num = (unsigned)(fsize / (dim + 1) / 4); +// data = new float[(size_t)num * (size_t)dim]; + +// in.seekg(0, std::ios::beg); +// for (size_t i = 0; i < num; i++) { +// in.seekg(4, std::ios::cur); +// in.read((char*)(data + i * dim), dim * 4); +// } +// in.close(); +// } + +// TEST_P(KnowhereWrapperTest, Sift1M) { +// float* data = nullptr; +// unsigned points_num, dim; +// load_data("/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_base.fvecs", data, points_num, +// dim); std::cout << points_num << " " << dim << std::endl; + +// index_->BuildAll(points_num, data, ids.data(), conf); +// } diff --git a/core/unittest/wrapper/utils.cpp b/core/unittest/wrapper/utils.cpp index 00a1db2c88..96b9e643f5 100644 --- a/core/unittest/wrapper/utils.cpp +++ b/core/unittest/wrapper/utils.cpp @@ -117,6 +117,11 @@ void DataGenBase::GenData(const int& dim, const int& nb, const int& nq, std::vector& xb, std::vector& xq, std::vector& ids, const int& k, std::vector& gt_ids, std::vector& gt_dis) { + xb.clear(); + xq.clear(); + ids.clear(); + gt_ids.clear(); + gt_dis.clear(); xb.resize(nb * dim); xq.resize(nq * dim); ids.resize(nb); diff --git a/core/unittest/wrapper/utils.h b/core/unittest/wrapper/utils.h index 1eaa7ae4ec..05dc92f2d6 100644 --- a/core/unittest/wrapper/utils.h +++ b/core/unittest/wrapper/utils.h @@ -25,7 +25,6 @@ #include #include - #include "wrapper/VecIndex.h" #include "wrapper/utils.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" @@ -90,17 +89,20 @@ class ParamGenerator { return instance; } - knowhere::Config GenSearchConf(const milvus::engine::IndexType& type, const milvus::engine::TempMetaConf& conf) { + knowhere::Config + GenSearchConf(const milvus::engine::IndexType& type, const milvus::engine::TempMetaConf& conf) { auto adapter = milvus::engine::AdapterMgr::GetInstance().GetAdapter(type); return adapter->MatchSearch(conf, type); } - knowhere::Config GenBuild(const milvus::engine::IndexType& type, const milvus::engine::TempMetaConf& conf) { + knowhere::Config + GenBuild(const milvus::engine::IndexType& type, const milvus::engine::TempMetaConf& conf) { auto adapter = milvus::engine::AdapterMgr::GetInstance().GetAdapter(type); return adapter->Match(conf); } - knowhere::Config Gen(const milvus::engine::IndexType& type) { + knowhere::Config + Gen(const milvus::engine::IndexType& type) { switch (type) { case milvus::engine::IndexType::FAISS_IDMAP: { auto tempconf = std::make_shared(); diff --git a/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md b/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md new file mode 100644 index 0000000000..4d6840430e --- /dev/null +++ b/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md @@ -0,0 +1,210 @@ +# milvus_ivfsq8_test_report_detailed_version + +## Summary + +This document contains the test reports of IVF_SQ8 index on Milvus single server. + + + +## Test objectives + +The time cost and recall when searching with different parameters. + + + +## Test method + +### Hardware/Software requirements + +Operating System: CentOS Linux release 7.6.1810 (Core) + +CPU: Intel(R) Xeon(R) CPU E5-2678 v3 @ 2.50GHz + +GPU0: GeForce GTX 1080 + +GPU1: GeForce GTX 1080 + +Memory: 503GB + +Docker version: 18.09 + +Nvidia Driver version: 430.34 + +Milvus version: 0.5.3 + +SDK interface: Python 3.6.8 + +Pymilvus version: 0.2.5 + + + +### Data model + +The data used in the tests are: + +- Data source: sift1b +- Data type: hdf5 + +For details on this dataset, you can check : http://corpus-texmex.irisa.fr/ . + + + +### Measures + +- Query Elapsed Time: Time cost (in seconds) to run a query. Variables that affect Query Elapsed Time: + + - nq (Number of queried vectors) + + > Note: In the query test of query elapsed time, we will test the following parameters with different values: + > + > nq - grouped by: [1, 5, 10, 200, 400, 600, 800, 1000], + +- Recall: The fraction of the total amount of relevant instances that were actually retrieved . Variables that affect Recall: + + - nq (Number of queried vectors) + - topk (Top k result of a query) + + > Note: In the query test of recall, we will test the following parameters with different values: + > + > nq - grouped by: [1, 5, 10, 200, 400, 600, 800, 1000], + > + > topk - grouped by: [1, 10, 100] + + + +## Test reports + +### Test environment + +Data base: sift1b-1,000,000,000 vectors, 128-dimension + +Table Attributes + +- nlist: 16384 +- metric_type: L2 + +Query configuration + +- nprobe: 32 + +Milvus configuration + +- cpu_cache_capacity: 150 +- gpu_cache_capacity: 6 +- use_blas_threshold: 1100 + +You can check the definition of Milvus configuration on https://milvus.io/docs/en/reference/milvus_config/. + +Test method + +Test the query elapsed time and recall with several parameters, and once only change one parameter. + +- Whether to restart Milvus after each query: No + + + +### Performance test + +#### Data query + +**Test result** + +Query Elapsed Time + +topk : 100 + +search_resources: gpu0, gpu1 + +| nq/topk | topk=100 | +| :-----: | :------: | +| nq=1 | 15.57 | +| nq=10 | 15.80 | +| nq=200 | 15.72 | +| nq=400 | 15.94 | +| nq=600 | 16.58 | +| nq=800 | 16.71 | +| nq=1000 | 16.91 | + +When nq is 1000, the query time cost of a 128-dimension vector is around 17ms in GPU Mode. + + + +topk : 100 + +search_resources: cpu, gpu0 + +| nq/topk | topk=100 | +| :-----: | :------: | +| nq=1 | 1.12 | +| nq=10 | 2.89 | +| nq=200 | 8.10 | +| nq=400 | 12.36 | +| nq=600 | 17.81 | +| nq=800 | 23.24 | +| nq=1000 | 27.41 | + +When nq is 1000, the query time cost of a 128-dimension vector is around 27ms in CPU Mode. + + + +**Conclusion** + +The query elapsed time in CPU Mode increases quickly with nq, while in GPU Mode query elapsed time increases much slower. When nq is small, CPU Mode consumes less time than GPU Mode. However, as nq becomes larger, GPU Mode shows its advantage against CPU Mode. + +The query elapsed time in GPU Mode consists of two parts: (1) index CPU-to-GPU copy time; (2) nprobe buckets search time. When nq is smaller than 500, index CPU-to-GPU copy time cannot be amortized efficiently, CPU Mode is a better choice; when nq is larger than 500, choosing GPU Mode is better. + +Compared with CPU, GPU has much more cores and stronger computing capability. When nq is large, it can better reflect GPU's advantages on computing. + + + +### Recall test + +**Test result** + +topk = 1 : recall - recall@1 + +topk = 10 : recall - recall@10 + +topk = 100 : recall - recall@100 + +We use the ground_truth in sift1b dataset to calculate the recall of query results. + + + +Recall of GPU Mode + +search_resources: gpu0, gpu1 + +| nq/topk | topk=1 | topk=10 | topk=100 | +| :-----: | :----: | :-----: | :------: | +| nq=1 | 1.000 | 0.800 | 0.790 | +| nq=5 | 0.800 | 0.820 | 0.908 | +| nq=10 | 0.900 | 0.910 | 0.939 | +| nq=200 | 0.955 | 0.941 | 0.929 | +| nq=400 | 0.958 | 0.944 | 0.932 | +| nq=600 | 0.952 | 0.946 | 0.934 | +| nq=800 | 0.941 | 0.943 | 0.930 | +| nq=1000 | 0.938 | 0.942 | 0.930 | + + + +Recall of CPU Mode + +search_resources: cpu, gpu0 + +| nq/topk | topk=1 | topk=10 | topk=100 | +| :-----: | :----: | :-----: | :------: | +| nq=1 | 1.000 | 0.800 | 0.790 | +| nq=5 | 0.800 | 0.820 | 0.908 | +| nq=10 | 0.900 | 0.910 | 0.939 | +| nq=200 | 0.955 | 0.941 | 0.929 | +| nq=400 | 0.958 | 0.944 | 0.932 | +| nq=600 | 0.952 | 0.946 | 0.934 | +| nq=800 | 0.941 | 0.943 | 0.930 | +| nq=1000 | 0.938 | 0.942 | 0.930 | + + + +**Conclusion** + +As nq increases, the recall gradually stabilizes to over 93%. \ No newline at end of file diff --git a/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md b/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md new file mode 100644 index 0000000000..4b5e18f291 --- /dev/null +++ b/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md @@ -0,0 +1,211 @@ +# milvus_ivfsq8_test_report_detailed_version_cn + +## 概述 + +本文描述了ivfsq8索引在milvus单机部署方式下的测试报告。 + + + +## 测试目标 + +参数不同情况下的查询时间和召回率。 + + + +## 测试方法 + +### 软硬件环境 + +操作系统: CentOS Linux release 7.6.1810 (Core) + +CPU: Intel(R) Xeon(R) CPU E5-2678 v3 @ 2.50GHz + +GPU0: GeForce GTX 1080 + +GPU1: GeForce GTX 1080 + +内存: 503GB + +Docker版本: 18.09 + +Nvidia Driver版本: 430.34 + +Milvus版本: 0.5.3 + +SDK接口: Python 3.6.8 + +Pymilvus版本: 0.2.5 + + + +### 数据模型 + +本测试中用到的主要数据: + +- 数据来源: sift1b +- 数据类型: hdf5 + +关于该数据集的详细信息请参考 : http://corpus-texmex.irisa.fr/ 。 + + + +### 测试指标 + +- Query Elapsed Time: 数据库查询所有向量的时间(以秒计)。影响Query Elapsed Time的变量: + + - nq (被查询向量的数量) + + > 备注:在向量查询测试中,我们会测试下面参数不同的取值来观察结果: + > + > 被查询向量的数量nq将按照 [1, 5, 10, 200, 400, 600, 800, 1000]的数量分组。 + +- Recall: 实际返回的正确结果占总数之比 . 影响Recall的变量: + + - nq (被查询向量的数量) + - topk (单条查询中最相似的K个结果) + + > 备注:在向量准确性测试中,我们会测试下面参数不同的取值来观察结果: + > + > 被查询向量的数量nq将按照 [1, 5, 10, 200, 400, 600, 800, 1000]的数量分组, + > + > 单条查询中最相似的K个结果topk将按照[1, 10, 100]的数量分组。 + + + +## 测试报告 + +### 测试环境 + +数据集: sift1b-1,000,000,000向量, 128维 + +表格属性: + +- nlist: 16384 +- metric_type: L2 + +查询设置: + +- nprobe: 32 + +Milvus设置: + +- cpu_cache_capacity: 150 +- gpu_cache_capacity: 6 +- use_blas_threshold: 1100 + +你可以在 https://milvus.io/docs/en/reference/milvus_config/上查询Milvus设置的详细定义。 + +测试方法 + +通过一次仅改变一个参数的值,测试查询向量时间和召回率。 + +- 查询后是否重启Milvus:否 + + + +### 性能测试 + +#### 数据查询 + +测试结果 + +Query Elapsed Time + +topk : 100 + +search_resources: gpu0, gpu1 + +| nq/topk | topk=100 | +| :-----: | :------: | +| nq=1 | 15.57 | +| nq=10 | 15.80 | +| nq=200 | 15.72 | +| nq=400 | 15.94 | +| nq=600 | 16.58 | +| nq=800 | 16.71 | +| nq=1000 | 16.91 | + +当nq为1000时,在GPU模式下查询一条128维向量需要耗时约17毫秒。 + + + +topk : 100 + +search_resources: cpu, gpu0 + +| nq/topk | topk=100 | +| :-----: | :------: | +| nq=1 | 1.12 | +| nq=10 | 2.89 | +| nq=200 | 8.10 | +| nq=400 | 12.36 | +| nq=600 | 17.81 | +| nq=800 | 23.24 | +| nq=1000 | 27.41 | + +当nq为1000时,在GPU模式下查询一条128维向量需要耗时约27毫秒。 + + + +**总结** + +在CPU模式下查询耗时随nq的增长快速增大,而在GPU模式下查询耗时的增大则缓慢许多。当nq较小时,CPU模式比GPU模式耗时更少。但当nq足够大时,GPU模式则更具有优势。 + +在GPU模式下的查询耗时由两部分组成:(1)索引从CPU到GPU的拷贝时间;(2)所有分桶的查询时间。当nq小于500时,索引从CPU到GPU 的拷贝时间无法被有效均摊,此时CPU模式时一个更优的选择;当nq大于500时,选择GPU模式更合理。 + +和CPU相比,GPU具有更多的核数和更强的算力。当nq较大时,GPU在计算上的优势能被更好地被体现。 + + + +### 召回率测试 + +**测试结果** + +topk = 1 : recall - recall@1 + +topk = 10 : recall - recall@10 + +topk = 100 : recall - recall@100 + +我们利用sift1b数据集中的ground_truth来计算查询结果的召回率。 + + + +Recall of GPU Mode + +search_resources: gpu0, gpu1 + +| nq/topk | topk=1 | topk=10 | topk=100 | +| :-----: | :----: | :-----: | :------: | +| nq=1 | 1.000 | 0.800 | 0.790 | +| nq=5 | 0.800 | 0.820 | 0.908 | +| nq=10 | 0.900 | 0.910 | 0.939 | +| nq=200 | 0.955 | 0.941 | 0.929 | +| nq=400 | 0.958 | 0.944 | 0.932 | +| nq=600 | 0.952 | 0.946 | 0.934 | +| nq=800 | 0.941 | 0.943 | 0.930 | +| nq=1000 | 0.938 | 0.942 | 0.930 | + + + +Recall of CPU Mode + +search_resources: cpu, gpu0 + +| nq/topk | topk=1 | topk=10 | topk=100 | +| :-----: | :----: | :-----: | :------: | +| nq=1 | 1.000 | 0.800 | 0.790 | +| nq=5 | 0.800 | 0.820 | 0.908 | +| nq=10 | 0.900 | 0.910 | 0.939 | +| nq=200 | 0.955 | 0.941 | 0.929 | +| nq=400 | 0.958 | 0.944 | 0.932 | +| nq=600 | 0.952 | 0.946 | 0.934 | +| nq=800 | 0.941 | 0.943 | 0.930 | +| nq=1000 | 0.938 | 0.942 | 0.930 | + + + +**总结** + +​ 随着nq的增大,召回率逐渐稳定至93%以上。 + diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index a3c55c4ae3..55594220d3 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -11,7 +11,9 @@ grpc_server = Server() def create_app(testing_config=None): config = testing_config if testing_config else settings.DefaultConfig - db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) + db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO, pool_size=config.SQL_POOL_SIZE, + pool_recycle=config.SQL_POOL_RECYCLE, pool_timeout=config.SQL_POOL_TIMEOUT, + pool_pre_ping=config.SQL_POOL_PRE_PING, max_overflow=config.SQL_MAX_OVERFLOW) from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() diff --git a/shards/mishards/db_base.py b/shards/mishards/db_base.py index 5f2eee9ba1..e55c330352 100644 --- a/shards/mishards/db_base.py +++ b/shards/mishards/db_base.py @@ -23,15 +23,12 @@ class DB: uri and self.init_db(uri, echo) self.session_factory = scoped_session(sessionmaker(class_=LocalSession, db=self)) - def init_db(self, uri, echo=False): + def init_db(self, uri, echo=False, pool_size=100, pool_recycle=5, pool_timeout=30, pool_pre_ping=True, max_overflow=0): url = make_url(uri) if url.get_backend_name() == 'sqlite': self.engine = create_engine(url) else: - self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, - pool_pre_ping=True, - echo=echo, - max_overflow=0) + self.engine = create_engine(uri, pool_size, pool_recycle, pool_timeout, pool_pre_ping, echo, max_overflow) self.uri = uri self.url = url diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py index 8d7361dddc..832f1639ea 100644 --- a/shards/mishards/settings.py +++ b/shards/mishards/settings.py @@ -50,10 +50,16 @@ class TracingConfig: } } + max_overflow=0 class DefaultConfig: SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') SQL_ECHO = env.bool('SQL_ECHO', False) + SQL_POOL_SIZE = env.int('pool_size', 100) + SQL_POOL_RECYCLE = env.int('pool_recycle', 5) + SQL_POOL_TIMEOUT = env.int('pool_timeout', 30) + SQL_POOL_PRE_PING = env.bool('pool_pre_ping', True) + SQL_MAX_OVERFLOW = env.int('max_overflow', 0) TRACER_PLUGIN_PATH = env.str('TRACER_PLUGIN_PATH', '') TRACER_CLASS_NAME = env.str('TRACER_CLASS_NAME', '') ROUTER_PLUGIN_PATH = env.str('ROUTER_PLUGIN_PATH', '') @@ -65,5 +71,10 @@ class DefaultConfig: class TestingConfig(DefaultConfig): SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI', '') SQL_ECHO = env.bool('SQL_TEST_ECHO', False) + SQL_POOL_SIZE = env.int('pool_size', 100) + SQL_POOL_RECYCLE = env.int('pool_recycle', 5) + SQL_POOL_TIMEOUT = env.int('pool_timeout', 30) + SQL_POOL_PRE_PING = env.bool('pool_pre_ping', True) + SQL_MAX_OVERFLOW = env.int('max_overflow', 0) TRACER_CLASS_NAME = env.str('TRACER_CLASS_TEST_NAME', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_TEST_NAME', 'FileBasedHashRingRouter')