From 6e7fb831bd8ae5a1a17b1da5a56fabff2006d44c Mon Sep 17 00:00:00 2001 From: op-hunter Date: Thu, 2 Apr 2020 09:47:45 +0800 Subject: [PATCH] add annoy params validate check (#1829) * fix bug & solve compile warning from annoylib Signed-off-by: cmli * fix lint error Signed-off-by: cmli Co-authored-by: cmli --- CHANGELOG.md | 1 + .../knowhere/index/vector_index/ConfAdapter.cpp | 4 ++-- core/src/index/thirdparty/annoy/src/annoylib.h | 12 ++++++------ core/src/utils/ValidationUtil.cpp | 16 ++++++++++++++++ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe90c27729..8fdacd4c32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ Please mark all change in change log and use the issue from GitHub - \#1598 Server down during mixed operations - \#1601 External link bug in HTTP doc - \#1609 Refine Compact function +- \#1808 Building index params check for Annoy ## Feature - \#216 Add CLI to get server info diff --git a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp index 0f69887715..36288d26fc 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp @@ -299,9 +299,9 @@ BinIVFConfAdapter::CheckTrain(Config& oricfg, const IndexMode mode) { bool ANNOYConfAdapter::CheckTrain(Config& oricfg, const IndexMode mode) { - static int64_t MIN_NTREES = 0; + static int64_t MIN_NTREES = 1; // too large of n_trees takes much time, if there is real requirement, change this threshold. - static int64_t MAX_NTREES = 16384; + static int64_t MAX_NTREES = 1024; CheckIntByRange(knowhere::IndexParams::n_trees, MIN_NTREES, MAX_NTREES); diff --git a/core/src/index/thirdparty/annoy/src/annoylib.h b/core/src/index/thirdparty/annoy/src/annoylib.h index eebfa78d62..feec29cb9a 100644 --- a/core/src/index/thirdparty/annoy/src/annoylib.h +++ b/core/src/index/thirdparty/annoy/src/annoylib.h @@ -967,7 +967,7 @@ public: memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s); _n_nodes += _roots.size(); - if (_verbose) showUpdate("has %d nodes\n", _n_nodes); + if (_verbose) showUpdate("has %ld nodes\n", _n_nodes); if (_on_disk) { _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * _n_nodes); @@ -1105,7 +1105,7 @@ public: _loaded = true; _built = true; _n_items = m; - if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m); + if (_verbose) showUpdate("found %lu roots with degree %ld\n", _roots.size(), m); return true; } @@ -1144,7 +1144,7 @@ public: _loaded = true; _built = true; _n_items = m; - if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m); + if (_verbose) showUpdate("found %lu roots with degree %ld\n", _roots.size(), m); return true; } @@ -1215,7 +1215,7 @@ protected: } _nodes_size = new_nodes_size; - if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); + if (_verbose) showUpdate("Reallocating to %ld nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); } } @@ -1266,7 +1266,7 @@ protected: bool side = D::side(m, n->v, _f, _random); children_indices[side].push_back(j); } else { - showUpdate("No node for index %d?\n", j); + showUpdate("No node for index %ld?\n", j); } } @@ -1317,7 +1317,7 @@ protected: std::priority_queue > q; if (search_k <= 0) { - search_k = n * _roots.size(); + search_k = std::max(n * _roots.size(), (size_t )_n_items * 5 / 100); } for (size_t i = 0; i < _roots.size(); i++) { diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index f86f688fc2..35a938069f 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -265,6 +266,13 @@ ValidationUtil::ValidateIndexParams(const milvus::json& index_params, } break; } + case (int32_t)engine::EngineType::ANNOY: { + auto status = CheckParameterRange(index_params, knowhere::IndexParams::n_trees, 1, 1024); + if (!status.ok()) { + return status; + } + break; + } } return Status::OK(); } @@ -302,6 +310,14 @@ ValidationUtil::ValidateSearchParams(const milvus::json& search_params, } break; } + case (int32_t)engine::EngineType::ANNOY: { + auto status = CheckParameterRange(search_params, knowhere::IndexParams::search_k, + std::numeric_limits::min(), std::numeric_limits::max()); + if (!status.ok()) { + return status; + } + break; + } } return Status::OK(); }