mirror of https://github.com/milvus-io/milvus.git
* #1663 PQ index parameter m validation Signed-off-by: groot <yihua.mo@zilliz.com> * typo Signed-off-by: groot <yihua.mo@zilliz.com> * fix unittest failure Signed-off-by: groot <yihua.mo@zilliz.com> * fix unittest failure Signed-off-by: groot <yihua.mo@zilliz.com>pull/1690/head^2
parent
400446351c
commit
55ecfd5930
|
@ -10,6 +10,7 @@ Please mark all change in change log and use the issue from GitHub
|
|||
- \#1635 Vectors can be returned by searching after vectors deleted if `cache_insert_data` set true
|
||||
- \#1648 The cache cannot be used all when the vector type is binary
|
||||
- \#1651 Check validity of dimension when collection metric type is binary one
|
||||
- \#1663 PQ index parameter 'm' validation
|
||||
|
||||
## Feature
|
||||
- \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure
|
||||
|
@ -21,7 +22,7 @@ Please mark all change in change log and use the issue from GitHub
|
|||
- \#1548 Move store/Directory to storage/Operation and add FSHandler
|
||||
- \#1619 Improve compact performance
|
||||
- \#1649 Fix Milvus crash on old CPU
|
||||
- \#1653 IndexFlat performance improvement for NQ < thread_number
|
||||
- \#1653 IndexFlat performance improvement for NQ less than thread_number
|
||||
|
||||
## Task
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "db/engine/ExecutionEngine.h"
|
||||
#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include "utils/StringHelpFunctions.h"
|
||||
#include "wrapper/ConfAdapter.h"
|
||||
|
||||
#include <arpa/inet.h>
|
||||
|
||||
|
@ -44,7 +45,9 @@ CheckParameterRange(const milvus::json& json_params, const std::string& param_na
|
|||
bool min_close = true, bool max_closed = true) {
|
||||
if (json_params.find(param_name) == json_params.end()) {
|
||||
std::string msg = "Parameter list must contain: ";
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg + param_name);
|
||||
msg += param_name;
|
||||
SERVER_LOG_ERROR << msg;
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
}
|
||||
|
||||
try {
|
||||
|
@ -60,7 +63,9 @@ CheckParameterRange(const milvus::json& json_params, const std::string& param_na
|
|||
}
|
||||
} catch (std::exception& e) {
|
||||
std::string msg = "Invalid " + param_name + ": ";
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg + e.what());
|
||||
msg += e.what();
|
||||
SERVER_LOG_ERROR << msg;
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
@ -70,7 +75,9 @@ Status
|
|||
CheckParameterExistence(const milvus::json& json_params, const std::string& param_name) {
|
||||
if (json_params.find(param_name) == json_params.end()) {
|
||||
std::string msg = "Parameter list must contain: ";
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg + param_name);
|
||||
msg += param_name;
|
||||
SERVER_LOG_ERROR << msg;
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
}
|
||||
|
||||
try {
|
||||
|
@ -82,7 +89,9 @@ CheckParameterExistence(const milvus::json& json_params, const std::string& para
|
|||
}
|
||||
} catch (std::exception& e) {
|
||||
std::string msg = "Invalid " + param_name + ": ";
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg + e.what());
|
||||
msg += e.what();
|
||||
SERVER_LOG_ERROR << msg;
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
@ -201,6 +210,31 @@ ValidationUtil::ValidateIndexParams(const milvus::json& index_params, const engi
|
|||
return status;
|
||||
}
|
||||
|
||||
// special check for 'm' parameter
|
||||
std::vector<int64_t> resset;
|
||||
milvus::engine::IVFPQConfAdapter::GetValidMList(table_schema.dimension_, resset);
|
||||
int64_t m_value = index_params[index_params, knowhere::IndexParams::m];
|
||||
if (resset.empty()) {
|
||||
std::string msg = "Invalid table dimension, unable to get reasonable values for 'm'";
|
||||
SERVER_LOG_ERROR << msg;
|
||||
return Status(SERVER_INVALID_TABLE_DIMENSION, msg);
|
||||
}
|
||||
|
||||
auto iter = std::find(std::begin(resset), std::end(resset), m_value);
|
||||
if (iter == std::end(resset)) {
|
||||
std::string msg =
|
||||
"Invalid " + std::string(knowhere::IndexParams::m) + ", must be one of the following values: ";
|
||||
for (size_t i = 0; i < resset.size(); i++) {
|
||||
if (i != 0) {
|
||||
msg += ",";
|
||||
}
|
||||
msg += std::to_string(resset[i]);
|
||||
}
|
||||
|
||||
SERVER_LOG_ERROR << msg;
|
||||
return Status(SERVER_INVALID_ARGUMENT, msg);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case (int32_t)engine::EngineType::NSG_MIX: {
|
||||
|
|
|
@ -182,6 +182,18 @@ IVFPQConfAdapter::CheckTrain(milvus::json& oricfg) {
|
|||
// static int64_t MAX_POINTS_PER_CENTROID = 256;
|
||||
// CheckIntByRange(knowhere::meta::ROWS, MIN_POINTS_PER_CENTROID * nlist, MAX_POINTS_PER_CENTROID * nlist);
|
||||
|
||||
std::vector<int64_t> resset;
|
||||
int64_t dimension = oricfg[knowhere::meta::DIM].get<int64_t>();
|
||||
IVFPQConfAdapter::GetValidMList(dimension, resset);
|
||||
|
||||
CheckIntByValues(knowhere::IndexParams::m, resset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
IVFPQConfAdapter::GetValidMList(int64_t dimension, std::vector<int64_t>& resset) {
|
||||
resset.clear();
|
||||
/*
|
||||
* Faiss 1.6
|
||||
* Only 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32 dims per sub-quantizer are currently supported with
|
||||
|
@ -189,19 +201,16 @@ IVFPQConfAdapter::CheckTrain(milvus::json& oricfg) {
|
|||
*/
|
||||
static std::vector<int64_t> support_dim_per_subquantizer{32, 28, 24, 20, 16, 12, 10, 8, 6, 4, 3, 2, 1};
|
||||
static std::vector<int64_t> support_subquantizer{96, 64, 56, 48, 40, 32, 28, 24, 20, 16, 12, 8, 4, 3, 2, 1};
|
||||
std::vector<int64_t> resset;
|
||||
|
||||
for (const auto& dimperquantizer : support_dim_per_subquantizer) {
|
||||
if (!(oricfg[knowhere::meta::DIM].get<int64_t>() % dimperquantizer)) {
|
||||
auto subquantzier_num = oricfg[knowhere::meta::DIM].get<int64_t>() / dimperquantizer;
|
||||
if (!(dimension % dimperquantizer)) {
|
||||
auto subquantzier_num = dimension / dimperquantizer;
|
||||
auto finder = std::find(support_subquantizer.begin(), support_subquantizer.end(), subquantzier_num);
|
||||
if (finder != support_subquantizer.end()) {
|
||||
resset.push_back(subquantzier_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
CheckIntByValues(knowhere::IndexParams::m, resset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "VecIndex.h"
|
||||
#include "utils/Json.h"
|
||||
|
@ -56,6 +57,9 @@ class IVFPQConfAdapter : public IVFConfAdapter {
|
|||
public:
|
||||
bool
|
||||
CheckTrain(milvus::json& oricfg) override;
|
||||
|
||||
static void
|
||||
GetValidMList(int64_t dimension, std::vector<int64_t>& resset);
|
||||
};
|
||||
|
||||
class NSGConfAdapter : public IVFConfAdapter {
|
||||
|
|
|
@ -551,6 +551,29 @@ TEST(ValidationUtilTest, VALIDATE_INDEX_PARAMS_TEST) {
|
|||
table_schema,
|
||||
(int32_t)milvus::engine::EngineType::NSG_MIX);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
// special check for PQ 'm'
|
||||
json_params = {{"nlist", 32}, {"m", 4}};
|
||||
status =
|
||||
milvus::server::ValidationUtil::ValidateIndexParams(json_params,
|
||||
table_schema,
|
||||
(int32_t)milvus::engine::EngineType::FAISS_PQ);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
json_params = {{"nlist", 32}, {"m", 3}};
|
||||
status =
|
||||
milvus::server::ValidationUtil::ValidateIndexParams(json_params,
|
||||
table_schema,
|
||||
(int32_t)milvus::engine::EngineType::FAISS_PQ);
|
||||
ASSERT_FALSE(status.ok());
|
||||
|
||||
table_schema.dimension_ = 99;
|
||||
json_params = {{"nlist", 32}, {"m", 4}};
|
||||
status =
|
||||
milvus::server::ValidationUtil::ValidateIndexParams(json_params,
|
||||
table_schema,
|
||||
(int32_t)milvus::engine::EngineType::FAISS_PQ);
|
||||
ASSERT_FALSE(status.ok());
|
||||
}
|
||||
|
||||
TEST(ValidationUtilTest, VALIDATE_SEARCH_PARAMS_TEST) {
|
||||
|
|
|
@ -450,11 +450,12 @@ class TestIndexBase:
|
|||
logging.getLogger().info(get_index)
|
||||
# status, ids = connect.add_vectors(collection, vectors)
|
||||
status = connect.create_index(collection, index_type, index_param)
|
||||
status, result = connect.describe_index(collection)
|
||||
logging.getLogger().info(result)
|
||||
assert result._params == index_param
|
||||
assert result._collection_name == collection
|
||||
assert result._index_type == index_type
|
||||
if status.OK():
|
||||
status, result = connect.describe_index(collection)
|
||||
logging.getLogger().info(result)
|
||||
assert result._params == index_param
|
||||
assert result._collection_name == collection
|
||||
assert result._index_type == index_type
|
||||
|
||||
def test_describe_and_drop_index_multi_collections(self, connect, get_simple_index):
|
||||
'''
|
||||
|
|
|
@ -1153,7 +1153,7 @@ class TestSearchParamsInvalid(object):
|
|||
if index_type in [IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H]:
|
||||
connect.create_index(collection, index_type, {"nlist": 16384})
|
||||
if (index_type == IndexType.IVF_PQ):
|
||||
connect.create_index(collection, index_type, {"nlist": 16384, "m": 10})
|
||||
connect.create_index(collection, index_type, {"nlist": 16384, "m": 16})
|
||||
if(index_type == IndexType.HNSW):
|
||||
connect.create_index(collection, index_type, {"M": 16, "efConstruction": 500})
|
||||
if (index_type == IndexType.RNSG):
|
||||
|
|
Loading…
Reference in New Issue