mirror of https://github.com/milvus-io/milvus.git
Optimize the process of indexing and querying (#4455)
* fix index Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * fix engine Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * fix ut Signed-off-by: shengjun.li <shengjun.li@zilliz.com>pull/4486/head
parent
e5d218f405
commit
a07526c98e
|
@ -8,6 +8,7 @@ Please mark all change in change log and use the issue from GitHub
|
|||
## Feature
|
||||
|
||||
## Improvement
|
||||
- \#4454 Optimize the process of indexing and querying
|
||||
|
||||
## Task
|
||||
|
||||
|
|
|
@ -73,12 +73,6 @@ enum class DataType {
|
|||
|
||||
class ExecutionEngine {
|
||||
public:
|
||||
virtual Status
|
||||
AddWithIds(int64_t n, const float* xdata, const int64_t* xids) = 0;
|
||||
|
||||
virtual Status
|
||||
AddWithIds(int64_t n, const uint8_t* xdata, const int64_t* xids) = 0;
|
||||
|
||||
virtual size_t
|
||||
Count() const = 0;
|
||||
|
||||
|
|
|
@ -325,20 +325,6 @@ ExecutionEngineImpl::HybridUnset() const {
|
|||
#endif
|
||||
}
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::AddWithIds(int64_t n, const float* xdata, const int64_t* xids) {
|
||||
auto dataset = knowhere::GenDatasetWithIds(n, index_->Dim(), xdata, xids);
|
||||
index_->Add(dataset, knowhere::Config());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::AddWithIds(int64_t n, const uint8_t* xdata, const int64_t* xids) {
|
||||
auto dataset = knowhere::GenDatasetWithIds(n, index_->Dim(), xdata, xids);
|
||||
index_->Add(dataset, knowhere::Config());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t
|
||||
ExecutionEngineImpl::Count() const {
|
||||
if (index_ == nullptr) {
|
||||
|
@ -723,14 +709,12 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
|
|||
std::shared_ptr<std::vector<segment::doc_id_t>> uids;
|
||||
faiss::ConcurrentBitsetPtr blacklist;
|
||||
if (from_index) {
|
||||
auto dataset =
|
||||
knowhere::GenDatasetWithIds(Count(), Dimension(), from_index->GetRawVectors(), from_index->GetRawIds());
|
||||
auto dataset = knowhere::GenDataset(Count(), Dimension(), from_index->GetRawVectors());
|
||||
to_index->BuildAll(dataset, conf);
|
||||
uids = from_index->GetUids();
|
||||
blacklist = from_index->GetBlacklist();
|
||||
} else if (bin_from_index) {
|
||||
auto dataset = knowhere::GenDatasetWithIds(Count(), Dimension(), bin_from_index->GetRawVectors(),
|
||||
bin_from_index->GetRawIds());
|
||||
auto dataset = knowhere::GenDataset(Count(), Dimension(), bin_from_index->GetRawVectors());
|
||||
to_index->BuildAll(dataset, conf);
|
||||
uids = bin_from_index->GetUids();
|
||||
blacklist = bin_from_index->GetBlacklist();
|
||||
|
@ -754,383 +738,16 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
|
|||
}
|
||||
|
||||
void
|
||||
MapAndCopyResult(const knowhere::DatasetPtr& dataset, std::shared_ptr<std::vector<milvus::segment::doc_id_t>> uids,
|
||||
int64_t nq, int64_t k, float* distances, int64_t* labels) {
|
||||
int64_t* res_ids = dataset->Get<int64_t*>(knowhere::meta::IDS);
|
||||
CopyResult(const knowhere::DatasetPtr& dataset, int64_t result_len, float* distances, int64_t* labels) {
|
||||
float* res_dist = dataset->Get<float*>(knowhere::meta::DISTANCE);
|
||||
|
||||
memcpy(distances, res_dist, sizeof(float) * nq * k);
|
||||
|
||||
/* map offsets to ids */
|
||||
int64_t num = nq * k;
|
||||
for (int64_t i = 0; i < num; ++i) {
|
||||
int64_t offset = res_ids[i];
|
||||
if (offset != -1) {
|
||||
labels[i] = (*uids)[offset];
|
||||
} else {
|
||||
labels[i] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
free(res_ids);
|
||||
memcpy(distances, res_dist, sizeof(float) * result_len);
|
||||
free(res_dist);
|
||||
|
||||
int64_t* res_ids = dataset->Get<int64_t*>(knowhere::meta::IDS);
|
||||
memcpy(labels, res_ids, sizeof(int64_t) * result_len);
|
||||
free(res_ids);
|
||||
}
|
||||
|
||||
#if 0
|
||||
template <typename T>
|
||||
void
|
||||
ProcessRangeQuery(std::vector<T> data, T value, query::CompareOperator type, faiss::ConcurrentBitsetPtr& bitset) {
|
||||
switch (type) {
|
||||
case query::CompareOperator::LT: {
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
if (data[i] >= value) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case query::CompareOperator::LTE: {
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
if (data[i] > value) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case query::CompareOperator::GT: {
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
if (data[i] <= value) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case query::CompareOperator::GTE: {
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
if (data[i] < value) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case query::CompareOperator::EQ: {
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
if (data[i] != value) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case query::CompareOperator::NE: {
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
if (data[i] == value) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset,
|
||||
std::unordered_map<std::string, DataType>& attr_type, uint64_t& nq, uint64_t& topk,
|
||||
std::vector<float>& distances, std::vector<int64_t>& labels) {
|
||||
if (bitset == nullptr) {
|
||||
bitset = std::make_shared<faiss::ConcurrentBitset>(vector_count_);
|
||||
}
|
||||
|
||||
if (general_query->leaf == nullptr) {
|
||||
Status status = Status::OK();
|
||||
if (general_query->bin->left_query != nullptr) {
|
||||
status = ExecBinaryQuery(general_query->bin->left_query, bitset, attr_type, nq, topk, distances, labels);
|
||||
}
|
||||
if (general_query->bin->right_query != nullptr) {
|
||||
status = ExecBinaryQuery(general_query->bin->right_query, bitset, attr_type, nq, topk, distances, labels);
|
||||
}
|
||||
return status;
|
||||
} else {
|
||||
if (general_query->leaf->term_query != nullptr) {
|
||||
// process attrs_data
|
||||
auto field_name = general_query->leaf->term_query->field_name;
|
||||
auto type = attr_type.at(field_name);
|
||||
auto size = attr_size_.at(field_name);
|
||||
switch (type) {
|
||||
case DataType::INT8: {
|
||||
std::vector<int8_t> data;
|
||||
data.resize(size / sizeof(int8_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
|
||||
std::vector<int8_t> term_value;
|
||||
auto term_size =
|
||||
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int8_t);
|
||||
term_value.resize(term_size);
|
||||
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
|
||||
term_size * sizeof(int8_t));
|
||||
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
bool value_in_term = false;
|
||||
for (auto query_value : term_value) {
|
||||
if (data[i] == query_value) {
|
||||
value_in_term = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!value_in_term) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
std::vector<int16_t> data;
|
||||
data.resize(size / sizeof(int16_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
std::vector<int16_t> term_value;
|
||||
auto term_size =
|
||||
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int16_t);
|
||||
term_value.resize(term_size);
|
||||
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
|
||||
term_size * sizeof(int16_t));
|
||||
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
bool value_in_term = false;
|
||||
for (auto query_value : term_value) {
|
||||
if (data[i] == query_value) {
|
||||
value_in_term = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!value_in_term) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
std::vector<int32_t> data;
|
||||
data.resize(size / sizeof(int32_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
|
||||
std::vector<int32_t> term_value;
|
||||
auto term_size =
|
||||
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int32_t);
|
||||
term_value.resize(term_size);
|
||||
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
|
||||
term_size * sizeof(int32_t));
|
||||
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
bool value_in_term = false;
|
||||
for (auto query_value : term_value) {
|
||||
if (data[i] == query_value) {
|
||||
value_in_term = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!value_in_term) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
std::vector<int64_t> data;
|
||||
data.resize(size / sizeof(int64_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
|
||||
std::vector<int64_t> term_value;
|
||||
auto term_size =
|
||||
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int64_t);
|
||||
term_value.resize(term_size);
|
||||
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
|
||||
term_size * sizeof(int64_t));
|
||||
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
bool value_in_term = false;
|
||||
for (auto query_value : term_value) {
|
||||
if (data[i] == query_value) {
|
||||
value_in_term = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!value_in_term) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
std::vector<float> data;
|
||||
data.resize(size / sizeof(float));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
|
||||
std::vector<float> term_value;
|
||||
auto term_size =
|
||||
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(float);
|
||||
term_value.resize(term_size);
|
||||
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
|
||||
term_size * sizeof(int64_t));
|
||||
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
bool value_in_term = false;
|
||||
for (auto query_value : term_value) {
|
||||
if (data[i] == query_value) {
|
||||
value_in_term = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!value_in_term) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
std::vector<double> data;
|
||||
data.resize(size / sizeof(double));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
|
||||
std::vector<double> term_value;
|
||||
auto term_size =
|
||||
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(double);
|
||||
term_value.resize(term_size);
|
||||
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
|
||||
term_size * sizeof(double));
|
||||
|
||||
for (uint64_t i = 0; i < data.size(); ++i) {
|
||||
bool value_in_term = false;
|
||||
for (auto query_value : term_value) {
|
||||
if (data[i] == query_value) {
|
||||
value_in_term = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!value_in_term) {
|
||||
if (!bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
if (general_query->leaf->range_query != nullptr) {
|
||||
auto field_name = general_query->leaf->range_query->field_name;
|
||||
auto com_expr = general_query->leaf->range_query->compare_expr;
|
||||
auto type = attr_type.at(field_name);
|
||||
auto size = attr_size_.at(field_name);
|
||||
for (uint64_t j = 0; j < com_expr.size(); ++j) {
|
||||
auto operand = com_expr[j].operand;
|
||||
switch (type) {
|
||||
case DataType::INT8: {
|
||||
std::vector<int8_t> data;
|
||||
data.resize(size / sizeof(int8_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
int8_t value = atoi(operand.c_str());
|
||||
ProcessRangeQuery<int8_t>(data, value, com_expr[j].compare_operator, bitset);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
std::vector<int16_t> data;
|
||||
data.resize(size / sizeof(int16_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
int16_t value = atoi(operand.c_str());
|
||||
ProcessRangeQuery<int16_t>(data, value, com_expr[j].compare_operator, bitset);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
std::vector<int32_t> data;
|
||||
data.resize(size / sizeof(int32_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
int32_t value = atoi(operand.c_str());
|
||||
ProcessRangeQuery<int32_t>(data, value, com_expr[j].compare_operator, bitset);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
std::vector<int64_t> data;
|
||||
data.resize(size / sizeof(int64_t));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
int64_t value = atoi(operand.c_str());
|
||||
ProcessRangeQuery<int64_t>(data, value, com_expr[j].compare_operator, bitset);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
std::vector<float> data;
|
||||
data.resize(size / sizeof(float));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
std::istringstream iss(operand);
|
||||
double value;
|
||||
iss >> value;
|
||||
ProcessRangeQuery<float>(data, value, com_expr[j].compare_operator, bitset);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
std::vector<double> data;
|
||||
data.resize(size / sizeof(double));
|
||||
memcpy(data.data(), attr_data_.at(field_name).data(), size);
|
||||
std::istringstream iss(operand);
|
||||
double value;
|
||||
iss >> value;
|
||||
ProcessRangeQuery<double>(data, value, com_expr[j].compare_operator, bitset);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
if (general_query->leaf->vector_query != nullptr) {
|
||||
// Do search
|
||||
faiss::ConcurrentBitsetPtr list;
|
||||
list = index_->GetBlacklist();
|
||||
// Do OR
|
||||
for (int64_t i = 0; i < vector_count_; ++i) {
|
||||
if (list->test(i) || bitset->test(i)) {
|
||||
bitset->set(i);
|
||||
}
|
||||
}
|
||||
index_->SetBlacklist(bitset);
|
||||
auto vector_query = general_query->leaf->vector_query;
|
||||
topk = vector_query->topk;
|
||||
nq = vector_query->query_vector.float_data.size() / dim_;
|
||||
|
||||
distances.resize(nq * topk);
|
||||
labels.resize(nq * topk);
|
||||
|
||||
return Search(nq, vector_query->query_vector.float_data.data(), topk, vector_query->extra_params,
|
||||
distances.data(), labels.data());
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, const milvus::json& extra_params, float* distances,
|
||||
int64_t* labels, bool hybrid) {
|
||||
|
@ -1163,8 +780,8 @@ ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, const milvu
|
|||
|
||||
LOG_ENGINE_DEBUG_ << LogOut("[%s][%ld] get %ld uids from index %s", "search", 0, index_->GetUids()->size(),
|
||||
location_.c_str());
|
||||
MapAndCopyResult(result, index_->GetUids(), n, k, distances, labels);
|
||||
rc.RecordSection("map uids " + std::to_string(n * k));
|
||||
CopyResult(result, n * k, distances, labels);
|
||||
rc.RecordSection("copy result " + std::to_string(n * k));
|
||||
|
||||
if (hybrid) {
|
||||
HybridUnset();
|
||||
|
@ -1204,8 +821,8 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil
|
|||
|
||||
LOG_ENGINE_DEBUG_ << LogOut("[%s][%ld] get %ld uids from index %s", "search", 0, index_->GetUids()->size(),
|
||||
location_.c_str());
|
||||
MapAndCopyResult(result, index_->GetUids(), n, k, distances, labels);
|
||||
rc.RecordSection("map uids " + std::to_string(n * k));
|
||||
CopyResult(result, n * k, distances, labels);
|
||||
rc.RecordSection("copy result " + std::to_string(n * k));
|
||||
|
||||
if (hybrid) {
|
||||
HybridUnset();
|
||||
|
@ -1214,60 +831,6 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
#if 0
|
||||
Status
|
||||
ExecutionEngineImpl::GetVectorByID(const int64_t id, float* vector, bool hybrid) {
|
||||
if (index_ == nullptr) {
|
||||
LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
|
||||
return Status(DB_ERROR, "index is null");
|
||||
}
|
||||
|
||||
if (hybrid) {
|
||||
HybridLoad();
|
||||
}
|
||||
|
||||
// Only one id for now
|
||||
std::vector<int64_t> ids{id};
|
||||
auto dataset = knowhere::GenDatasetWithIds(1, index_->Dim(), nullptr, ids.data());
|
||||
auto result = index_->GetVectorById(dataset, knowhere::Config());
|
||||
float* res_vec = (float*)(result->Get<void*>(knowhere::meta::TENSOR));
|
||||
memcpy(vector, res_vec, sizeof(float) * 1 * index_->Dim());
|
||||
|
||||
if (hybrid) {
|
||||
HybridUnset();
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) {
|
||||
if (index_ == nullptr) {
|
||||
LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
|
||||
return Status(DB_ERROR, "index is null");
|
||||
}
|
||||
|
||||
LOG_ENGINE_DEBUG_ << "Get binary vector by id: " << id;
|
||||
|
||||
if (hybrid) {
|
||||
HybridLoad();
|
||||
}
|
||||
|
||||
// Only one id for now
|
||||
std::vector<int64_t> ids{id};
|
||||
auto dataset = knowhere::GenDatasetWithIds(1, index_->Dim(), nullptr, ids.data());
|
||||
auto result = index_->GetVectorById(dataset, knowhere::Config());
|
||||
uint8_t* res_vec = (uint8_t*)(result->Get<void*>(knowhere::meta::TENSOR));
|
||||
memcpy(vector, res_vec, sizeof(uint8_t) * 1 * index_->Dim());
|
||||
|
||||
if (hybrid) {
|
||||
HybridUnset();
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::Cache() {
|
||||
auto cpu_cache_mgr = milvus::cache::CpuCacheMgr::GetInstance();
|
||||
|
@ -1275,6 +838,7 @@ ExecutionEngineImpl::Cache() {
|
|||
cpu_cache_mgr->InsertItem(location_, obj);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::FpgaCache() {
|
||||
#ifdef MILVUS_FPGA_VERSION
|
||||
|
@ -1284,6 +848,7 @@ ExecutionEngineImpl::FpgaCache() {
|
|||
#endif
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// TODO(linxj): remove.
|
||||
Status
|
||||
ExecutionEngineImpl::Init() {
|
||||
|
|
|
@ -33,12 +33,6 @@ class ExecutionEngineImpl : public ExecutionEngine {
|
|||
ExecutionEngineImpl(knowhere::VecIndexPtr index, const std::string& location, EngineType index_type,
|
||||
MetricType metric_type, const milvus::json& index_params);
|
||||
|
||||
Status
|
||||
AddWithIds(int64_t n, const float* xdata, const int64_t* xids) override;
|
||||
|
||||
Status
|
||||
AddWithIds(int64_t n, const uint8_t* xdata, const int64_t* xids) override;
|
||||
|
||||
size_t
|
||||
Count() const override;
|
||||
|
||||
|
|
|
@ -89,7 +89,7 @@ IndexAnnoy::BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
return;
|
||||
}
|
||||
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
GETTENSOR(dataset_ptr)
|
||||
|
||||
metric_type_ = config[Metric::TYPE];
|
||||
if (metric_type_ == Metric::L2) {
|
||||
|
@ -101,7 +101,7 @@ IndexAnnoy::BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
}
|
||||
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
index_->add_item(p_ids[i], (const float*)p_data + dim * i);
|
||||
index_->add_item(i, (const float*)p_data + dim * i);
|
||||
}
|
||||
|
||||
index_->build(config[IndexParams::n_trees].get<int64_t>());
|
||||
|
@ -129,11 +129,14 @@ IndexAnnoy::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
distances.reserve(k);
|
||||
index_->get_nns_by_vector((const float*)p_data + i * dim, k, search_k, &result, &distances, blacklist);
|
||||
|
||||
int64_t result_num = result.size();
|
||||
size_t result_num = result.size();
|
||||
auto local_p_id = p_id + k * i;
|
||||
auto local_p_dist = p_dist + k * i;
|
||||
memcpy(local_p_id, result.data(), result_num * sizeof(int64_t));
|
||||
memcpy(local_p_dist, distances.data(), result_num * sizeof(float));
|
||||
|
||||
MapOffsetToUid(local_p_id, result_num);
|
||||
|
||||
for (; result_num < k; result_num++) {
|
||||
local_p_id[result_num] = -1;
|
||||
local_p_dist[result_num] = 1.0 / 0.0;
|
||||
|
|
|
@ -43,11 +43,6 @@ class IndexAnnoy : public VecIndex {
|
|||
KNOWHERE_THROW_MSG("Annoy not support build item dynamically, please invoke BuildAll interface.");
|
||||
}
|
||||
|
||||
void
|
||||
Add(const DatasetPtr& dataset_ptr, const Config& config) override {
|
||||
KNOWHERE_THROW_MSG("Annoy not support add item dynamically, please invoke BuildAll interface.");
|
||||
}
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("Incremental index is not supported");
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
|
||||
#include <faiss/IndexBinaryFlat.h>
|
||||
#include <faiss/MetaIndexes.h>
|
||||
#include <faiss/index_factory.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
|
@ -77,48 +76,6 @@ BinaryIDMAP::Dim() {
|
|||
return index_->d;
|
||||
}
|
||||
|
||||
void
|
||||
BinaryIDMAP::Add(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
|
||||
index_->add_with_ids(rows, (uint8_t*)p_data, p_ids);
|
||||
}
|
||||
|
||||
void
|
||||
BinaryIDMAP::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
const char* desc = "BFlat";
|
||||
int64_t dim = config[meta::DIM].get<int64_t>();
|
||||
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
auto index = faiss::index_binary_factory(dim, desc, metric_type);
|
||||
index_.reset(index);
|
||||
}
|
||||
|
||||
const uint8_t*
|
||||
BinaryIDMAP::GetRawVectors() {
|
||||
try {
|
||||
auto file_index = dynamic_cast<faiss::IndexBinaryIDMap*>(index_.get());
|
||||
auto flat_index = dynamic_cast<faiss::IndexBinaryFlat*>(file_index->index);
|
||||
return flat_index->xb.data();
|
||||
} catch (std::exception& e) {
|
||||
KNOWHERE_THROW_MSG(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
const int64_t*
|
||||
BinaryIDMAP::GetRawIds() {
|
||||
try {
|
||||
auto file_index = dynamic_cast<faiss::IndexBinaryIDMap*>(index_.get());
|
||||
return file_index->id_map.data();
|
||||
} catch (std::exception& e) {
|
||||
KNOWHERE_THROW_MSG(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BinaryIDMAP::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_) {
|
||||
|
@ -128,34 +85,48 @@ BinaryIDMAP::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config)
|
|||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
GETTENSOR(dataset_ptr)
|
||||
|
||||
std::vector<int64_t> new_ids(rows);
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
new_ids[i] = i;
|
||||
}
|
||||
index_->add(rows, (uint8_t*)p_data);
|
||||
}
|
||||
|
||||
index_->add_with_ids(rows, (uint8_t*)p_data, new_ids.data());
|
||||
void
|
||||
BinaryIDMAP::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
int64_t dim = config[meta::DIM].get<int64_t>();
|
||||
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
auto index = std::make_shared<faiss::IndexBinaryFlat>(dim, metric_type);
|
||||
index_ = index;
|
||||
}
|
||||
|
||||
const uint8_t*
|
||||
BinaryIDMAP::GetRawVectors() {
|
||||
try {
|
||||
auto flat_index = dynamic_cast<faiss::IndexBinaryFlat*>(index_.get());
|
||||
return flat_index->xb.data();
|
||||
} catch (std::exception& e) {
|
||||
KNOWHERE_THROW_MSG(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BinaryIDMAP::QueryImpl(int64_t n, const uint8_t* data, int64_t k, float* distances, int64_t* labels,
|
||||
const Config& config) {
|
||||
auto flat_index = dynamic_cast<faiss::IndexBinaryIDMap*>(index_.get())->index;
|
||||
auto default_type = flat_index->metric_type;
|
||||
auto default_type = index_->metric_type;
|
||||
if (config.contains(Metric::TYPE))
|
||||
flat_index->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
index_->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
|
||||
int32_t* i_distances = reinterpret_cast<int32_t*>(distances);
|
||||
flat_index->search(n, (uint8_t*)data, k, i_distances, labels, GetBlacklist());
|
||||
index_->search(n, (uint8_t*)data, k, i_distances, labels, GetBlacklist());
|
||||
|
||||
// if hamming, it need transform int32 to float
|
||||
if (flat_index->metric_type == faiss::METRIC_Hamming) {
|
||||
if (index_->metric_type == faiss::METRIC_Hamming) {
|
||||
int64_t num = n * k;
|
||||
for (int64_t i = 0; i < num; i++) {
|
||||
distances[i] = static_cast<float>(i_distances[i]);
|
||||
}
|
||||
}
|
||||
|
||||
flat_index->metric_type = default_type;
|
||||
index_->metric_type = default_type;
|
||||
|
||||
MapOffsetToUid(labels, static_cast<size_t>(n * k));
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
|
|
|
@ -41,9 +41,6 @@ class BinaryIDMAP : public VecIndex, public FaissBaseBinaryIndex {
|
|||
void
|
||||
Train(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override;
|
||||
|
||||
|
@ -64,9 +61,6 @@ class BinaryIDMAP : public VecIndex, public FaissBaseBinaryIndex {
|
|||
virtual const uint8_t*
|
||||
GetRawVectors();
|
||||
|
||||
virtual const int64_t*
|
||||
GetRawIds();
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
QueryImpl(int64_t n, const uint8_t* data, int64_t k, float* distances, int64_t* labels, const Config& config);
|
||||
|
|
|
@ -73,52 +73,6 @@ BinaryIVF::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
DatasetPtr
|
||||
BinaryIVF::QueryById(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_ || !index_->is_trained) {
|
||||
KNOWHERE_THROW_MSG("index not initialize or trained");
|
||||
}
|
||||
|
||||
auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
|
||||
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
|
||||
|
||||
try {
|
||||
int64_t k = config[meta::TOPK].get<int64_t>();
|
||||
auto elems = rows * k;
|
||||
|
||||
size_t p_id_size = sizeof(int64_t) * elems;
|
||||
size_t p_dist_size = sizeof(float) * elems;
|
||||
auto p_id = (int64_t*)malloc(p_id_size);
|
||||
auto p_dist = (float*)malloc(p_dist_size);
|
||||
|
||||
int32_t* pdistances = (int32_t*)p_dist;
|
||||
index_->search_by_id(rows, p_data, k, pdistances, p_id, bitset_);
|
||||
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
if (index_->metric_type == faiss::METRIC_Hamming) {
|
||||
auto pf_dist = (float*)malloc(p_dist_size);
|
||||
int32_t* pi_dist = (int32_t*)p_dist;
|
||||
for (int i = 0; i < elems; i++) {
|
||||
*(pf_dist + i) = (float)(*(pi_dist + i));
|
||||
}
|
||||
ret_ds->Set(meta::IDS, p_id);
|
||||
ret_ds->Set(meta::DISTANCE, pf_dist);
|
||||
free(p_dist);
|
||||
} else {
|
||||
ret_ds->Set(meta::IDS, p_id);
|
||||
ret_ds->Set(meta::DISTANCE, p_dist);
|
||||
}
|
||||
|
||||
return ret_ds;
|
||||
} catch (faiss::FaissException& e) {
|
||||
KNOWHERE_THROW_MSG(e.what());
|
||||
} catch (std::exception& e) {
|
||||
KNOWHERE_THROW_MSG(e.what());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int64_t
|
||||
BinaryIVF::Count() {
|
||||
if (!index_) {
|
||||
|
@ -151,7 +105,7 @@ BinaryIVF::UpdateIndexSize() {
|
|||
|
||||
void
|
||||
BinaryIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
GETTENSOR(dataset_ptr)
|
||||
|
||||
int64_t nlist = config[IndexParams::nlist];
|
||||
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
|
@ -159,7 +113,7 @@ BinaryIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
auto index = std::make_shared<faiss::IndexBinaryIVF>(coarse_quantizer, dim, nlist, metric_type);
|
||||
index->own_fields = true;
|
||||
index->train(rows, static_cast<const uint8_t*>(p_data));
|
||||
index->add_with_ids(rows, static_cast<const uint8_t*>(p_data), p_ids);
|
||||
index->add(rows, static_cast<const uint8_t*>(p_data));
|
||||
index_ = index;
|
||||
}
|
||||
|
||||
|
@ -226,6 +180,8 @@ BinaryIVF::QueryImpl(int64_t n, const uint8_t* data, int64_t k, float* distances
|
|||
distances[i] = static_cast<float>(i_distances[i]);
|
||||
}
|
||||
}
|
||||
|
||||
MapOffsetToUid(labels, static_cast<size_t>(n * k));
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
|
|
|
@ -49,11 +49,6 @@ class BinaryIVF : public VecIndex, public FaissBaseBinaryIndex {
|
|||
void
|
||||
Train(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr& dataset_ptr, const Config& config) override {
|
||||
KNOWHERE_THROW_MSG("not support yet");
|
||||
}
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("AddWithoutIds is not supported");
|
||||
|
@ -62,11 +57,6 @@ class BinaryIVF : public VecIndex, public FaissBaseBinaryIndex {
|
|||
DatasetPtr
|
||||
Query(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
||||
#if 0
|
||||
DatasetPtr
|
||||
QueryById(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
#endif
|
||||
|
||||
int64_t
|
||||
Count() override;
|
||||
|
||||
|
@ -76,11 +66,6 @@ class BinaryIVF : public VecIndex, public FaissBaseBinaryIndex {
|
|||
void
|
||||
UpdateIndexSize() override;
|
||||
|
||||
#if 0
|
||||
DatasetPtr
|
||||
GetVectorById(const DatasetPtr& dataset_ptr, const Config& config);
|
||||
#endif
|
||||
|
||||
protected:
|
||||
virtual std::shared_ptr<faiss::IVFSearchParameters>
|
||||
GenParams(const Config& config);
|
||||
|
|
|
@ -95,38 +95,20 @@ IndexHNSW::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
}
|
||||
|
||||
void
|
||||
IndexHNSW::Add(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
IndexHNSW::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
GETTENSOR(dataset_ptr)
|
||||
|
||||
// if (normalize) {
|
||||
// std::vector<float> ep_norm_vector(Dim());
|
||||
// normalize_vector((float*)(p_data), ep_norm_vector.data(), Dim());
|
||||
// index_->addPoint((void*)(ep_norm_vector.data()), p_ids[0]);
|
||||
// #pragma omp parallel for
|
||||
// for (int i = 1; i < rows; ++i) {
|
||||
// std::vector<float> norm_vector(Dim());
|
||||
// normalize_vector((float*)(p_data + Dim() * i), norm_vector.data(), Dim());
|
||||
// index_->addPoint((void*)(norm_vector.data()), p_ids[i]);
|
||||
// }
|
||||
// } else {
|
||||
// index_->addPoint((void*)(p_data), p_ids[0]);
|
||||
// #pragma omp parallel for
|
||||
// for (int i = 1; i < rows; ++i) {
|
||||
// index_->addPoint((void*)(p_data + Dim() * i), p_ids[i]);
|
||||
// }
|
||||
// }
|
||||
|
||||
index_->addPoint(p_data, p_ids[0]);
|
||||
index_->addPoint(p_data, 0);
|
||||
#pragma omp parallel for
|
||||
for (int i = 1; i < rows; ++i) {
|
||||
faiss::BuilderSuspend::check_wait();
|
||||
index_->addPoint(((float*)p_data + Dim() * i), p_ids[i]);
|
||||
index_->addPoint(((float*)p_data + Dim() * i), i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -154,13 +136,6 @@ IndexHNSW::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
std::vector<P> ret;
|
||||
const float* single_query = (float*)p_data + i * Dim();
|
||||
|
||||
// if (normalize) {
|
||||
// std::vector<float> norm_vector(Dim());
|
||||
// normalize_vector((float*)(single_query), norm_vector.data(), Dim());
|
||||
// ret = index_->searchKnn((float*)(norm_vector.data()), config[meta::TOPK].get<int64_t>(), compare);
|
||||
// } else {
|
||||
// ret = index_->searchKnn((float*)single_query, config[meta::TOPK].get<int64_t>(), compare);
|
||||
// }
|
||||
ret = index_->searchKnn((float*)single_query, k, compare, blacklist);
|
||||
|
||||
while (ret.size() < k) {
|
||||
|
@ -179,6 +154,7 @@ IndexHNSW::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
std::transform(ret.begin(), ret.end(), std::back_inserter(ids),
|
||||
[](const std::pair<float, int64_t>& e) { return e.second; });
|
||||
|
||||
MapOffsetToUid(ids.data(), ids.size());
|
||||
memcpy(p_dist + i * k, dist.data(), dist_size);
|
||||
memcpy(p_id + i * k, ids.data(), id_size);
|
||||
}
|
||||
|
|
|
@ -38,12 +38,7 @@ class IndexHNSW : public VecIndex {
|
|||
Train(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("Incremental index is not supported");
|
||||
}
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override;
|
||||
|
||||
DatasetPtr
|
||||
Query(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#include <faiss/IndexFlat.h>
|
||||
#include <faiss/MetaIndexes.h>
|
||||
#include <faiss/clone_index.h>
|
||||
#include <faiss/index_factory.h>
|
||||
#include <faiss/index_io.h>
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include <faiss/gpu/GpuCloner.h>
|
||||
|
@ -54,22 +53,10 @@ IDMAP::Load(const BinarySet& binary_set) {
|
|||
|
||||
void
|
||||
IDMAP::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
const char* desc = "IDMap,Flat";
|
||||
int64_t dim = config[meta::DIM].get<int64_t>();
|
||||
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
auto index = faiss::index_factory(dim, desc, metric_type);
|
||||
index_.reset(index);
|
||||
}
|
||||
|
||||
void
|
||||
IDMAP::Add(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
index_->add_with_ids(rows, (float*)p_data, p_ids);
|
||||
auto index = std::make_shared<faiss::IndexFlat>(dim, metric_type);
|
||||
index_ = index;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -79,16 +66,8 @@ IDMAP::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
|
||||
auto p_data = dataset_ptr->Get<const void*>(meta::TENSOR);
|
||||
|
||||
// TODO: caiyd need check
|
||||
std::vector<int64_t> new_ids(rows);
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
new_ids[i] = i;
|
||||
}
|
||||
|
||||
index_->add_with_ids(rows, (float*)p_data, new_ids.data());
|
||||
GETTENSOR(dataset_ptr)
|
||||
index_->add(rows, (float*)p_data);
|
||||
}
|
||||
|
||||
DatasetPtr
|
||||
|
@ -105,7 +84,6 @@ IDMAP::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
auto p_id = (int64_t*)malloc(p_id_size);
|
||||
auto p_dist = (float*)malloc(p_dist_size);
|
||||
|
||||
// QueryImpl(rows, (float*)p_data, k, p_dist, p_id, Config());
|
||||
QueryImpl(rows, (float*)p_data, k, p_dist, p_id, config);
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
ret_ds->Set(meta::IDS, p_id);
|
||||
|
@ -113,35 +91,6 @@ IDMAP::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
return ret_ds;
|
||||
}
|
||||
|
||||
#if 0
|
||||
DatasetPtr
|
||||
IDMAP::QueryById(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
// GETTENSOR(dataset)
|
||||
auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
|
||||
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
|
||||
|
||||
int64_t k = config[meta::TOPK].get<int64_t>();
|
||||
auto elems = rows * k;
|
||||
size_t p_id_size = sizeof(int64_t) * elems;
|
||||
size_t p_dist_size = sizeof(float) * elems;
|
||||
auto p_id = (int64_t*)malloc(p_id_size);
|
||||
auto p_dist = (float*)malloc(p_dist_size);
|
||||
|
||||
// todo: enable search by id (zhiru)
|
||||
// auto blacklist = dataset_ptr->Get<faiss::ConcurrentBitsetPtr>("bitset");
|
||||
// index_->searchById(rows, (float*)p_data, config[meta::TOPK].get<int64_t>(), p_dist, p_id, blacklist);
|
||||
index_->search_by_id(rows, p_data, k, p_dist, p_id, bitset_);
|
||||
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
ret_ds->Set(meta::IDS, p_id);
|
||||
ret_ds->Set(meta::DISTANCE, p_dist);
|
||||
return ret_ds;
|
||||
}
|
||||
#endif
|
||||
|
||||
int64_t
|
||||
IDMAP::Count() {
|
||||
if (!index_) {
|
||||
|
@ -179,54 +128,22 @@ IDMAP::CopyCpuToGpu(const int64_t device_id, const Config& config) {
|
|||
const float*
|
||||
IDMAP::GetRawVectors() {
|
||||
try {
|
||||
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());
|
||||
auto flat_index = dynamic_cast<faiss::IndexFlat*>(file_index->index);
|
||||
auto flat_index = dynamic_cast<faiss::IndexFlat*>(index_.get());
|
||||
return flat_index->xb.data();
|
||||
} catch (std::exception& e) {
|
||||
KNOWHERE_THROW_MSG(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
const int64_t*
|
||||
IDMAP::GetRawIds() {
|
||||
try {
|
||||
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());
|
||||
return file_index->id_map.data();
|
||||
} catch (std::exception& e) {
|
||||
KNOWHERE_THROW_MSG(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
DatasetPtr
|
||||
IDMAP::GetVectorById(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
// GETTENSOR(dataset)
|
||||
// auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
|
||||
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
|
||||
auto elems = dataset_ptr->Get<int64_t>(meta::DIM);
|
||||
|
||||
size_t p_x_size = sizeof(float) * elems;
|
||||
auto p_x = (float*)malloc(p_x_size);
|
||||
|
||||
index_->get_vector_by_id(1, p_data, p_x, bitset_);
|
||||
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
ret_ds->Set(meta::TENSOR, p_x);
|
||||
return ret_ds;
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
IDMAP::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& config) {
|
||||
auto flat_index = dynamic_cast<faiss::IndexIDMap*>(index_.get())->index;
|
||||
auto default_type = flat_index->metric_type;
|
||||
auto default_type = index_->metric_type;
|
||||
if (config.contains(Metric::TYPE))
|
||||
flat_index->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
flat_index->search(n, (float*)data, k, distances, labels, GetBlacklist());
|
||||
flat_index->metric_type = default_type;
|
||||
index_->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
index_->search(n, (float*)data, k, distances, labels, GetBlacklist());
|
||||
index_->metric_type = default_type;
|
||||
|
||||
MapOffsetToUid(labels, static_cast<size_t>(n * k));
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
|
|
|
@ -39,20 +39,12 @@ class IDMAP : public VecIndex, public FaissBaseIndex {
|
|||
void
|
||||
Train(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override;
|
||||
|
||||
DatasetPtr
|
||||
Query(const DatasetPtr&, const Config&) override;
|
||||
|
||||
#if 0
|
||||
DatasetPtr
|
||||
QueryById(const DatasetPtr& dataset, const Config& config) override;
|
||||
#endif
|
||||
|
||||
int64_t
|
||||
Count() override;
|
||||
|
||||
|
@ -64,20 +56,12 @@ class IDMAP : public VecIndex, public FaissBaseIndex {
|
|||
return Count() * Dim() * sizeof(FloatType);
|
||||
}
|
||||
|
||||
#if 0
|
||||
DatasetPtr
|
||||
GetVectorById(const DatasetPtr& dataset, const Config& config) override;
|
||||
#endif
|
||||
|
||||
VecIndexPtr
|
||||
CopyCpuToGpu(const int64_t, const Config&);
|
||||
|
||||
virtual const float*
|
||||
GetRawVectors();
|
||||
|
||||
virtual const int64_t*
|
||||
GetRawIds();
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
QueryImpl(int64_t, const float*, int64_t, float*, int64_t*, const Config&);
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include <faiss/IndexIVFFlat.h>
|
||||
#include <faiss/IndexIVFPQ.h>
|
||||
#include <faiss/clone_index.h>
|
||||
#include <faiss/index_factory.h>
|
||||
#include <faiss/index_io.h>
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
#include <faiss/gpu/GpuAutoTune.h>
|
||||
|
@ -77,17 +76,6 @@ IVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
index_ = index;
|
||||
}
|
||||
|
||||
void
|
||||
IVF::Add(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_ || !index_->is_trained) {
|
||||
KNOWHERE_THROW_MSG("index not initialize or trained");
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
index_->add_with_ids(rows, (float*)p_data, p_ids);
|
||||
}
|
||||
|
||||
void
|
||||
IVF::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_ || !index_->is_trained) {
|
||||
|
@ -120,19 +108,6 @@ IVF::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
|
||||
QueryImpl(rows, (float*)p_data, k, p_dist, p_id, config);
|
||||
|
||||
// std::stringstream ss_res_id, ss_res_dist;
|
||||
// for (int i = 0; i < 10; ++i) {
|
||||
// printf("%llu", p_id[i]);
|
||||
// printf("\n");
|
||||
// printf("%.6f", p_dist[i]);
|
||||
// printf("\n");
|
||||
// ss_res_id << p_id[i] << " ";
|
||||
// ss_res_dist << p_dist[i] << " ";
|
||||
// }
|
||||
// std::cout << std::endl << "after search: " << std::endl;
|
||||
// std::cout << ss_res_id.str() << std::endl;
|
||||
// std::cout << ss_res_dist.str() << std::endl << std::endl;
|
||||
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
ret_ds->Set(meta::IDS, p_id);
|
||||
ret_ds->Set(meta::DISTANCE, p_dist);
|
||||
|
@ -339,6 +314,8 @@ IVF::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int64_
|
|||
<< ", data search cost: " << faiss::indexIVF_stats.search_time;
|
||||
faiss::indexIVF_stats.quantization_time = 0;
|
||||
faiss::indexIVF_stats.search_time = 0;
|
||||
|
||||
MapOffsetToUid(labels, static_cast<size_t>(n * k));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -44,9 +44,6 @@ class IVF : public VecIndex, public FaissBaseIndex {
|
|||
void
|
||||
Train(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override;
|
||||
|
||||
|
|
|
@ -98,6 +98,8 @@ NSG::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
blacklist);
|
||||
}
|
||||
|
||||
MapOffsetToUid(p_id, static_cast<size_t>(elems));
|
||||
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
ret_ds->Set(meta::IDS, p_id);
|
||||
ret_ds->Set(meta::DISTANCE, p_dist);
|
||||
|
@ -139,7 +141,7 @@ NSG::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
b_params.out_degree = config[IndexParams::out_degree];
|
||||
b_params.search_length = config[IndexParams::search_length];
|
||||
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
GETTENSOR(dataset_ptr)
|
||||
|
||||
impl::NsgIndex::Metric_Type metric;
|
||||
auto metric_str = config[Metric::TYPE].get<std::string>();
|
||||
|
@ -153,7 +155,7 @@ NSG::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
|
||||
index_ = std::make_shared<impl::NsgIndex>(dim, rows, metric);
|
||||
index_->SetKnnGraph(knng);
|
||||
index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params);
|
||||
index_->Build(rows, (float*)p_data, nullptr, b_params);
|
||||
}
|
||||
|
||||
int64_t
|
||||
|
|
|
@ -48,11 +48,6 @@ class NSG : public VecIndex {
|
|||
void
|
||||
Train(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("Incremental index is not supported");
|
||||
}
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("Addwithoutids is not supported");
|
||||
|
|
|
@ -195,7 +195,7 @@ CPUSPTAGRNG::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
index_ptr_->SearchIndex(query_results[i]);
|
||||
}
|
||||
|
||||
return ConvertToDataset(query_results);
|
||||
return ConvertToDataset(query_results, uids_);
|
||||
}
|
||||
|
||||
int64_t
|
||||
|
|
|
@ -41,11 +41,6 @@ class CPUSPTAGRNG : public VecIndex {
|
|||
void
|
||||
Train(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("Incremental index is not supported");
|
||||
}
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("Incremental index is not supported");
|
||||
|
|
|
@ -31,34 +31,18 @@ class VecIndex : public Index {
|
|||
virtual void
|
||||
BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
Train(dataset_ptr, config);
|
||||
Add(dataset_ptr, config);
|
||||
AddWithoutIds(dataset_ptr, config);
|
||||
}
|
||||
|
||||
virtual void
|
||||
Train(const DatasetPtr& dataset, const Config& config) = 0;
|
||||
|
||||
virtual void
|
||||
Add(const DatasetPtr& dataset, const Config& config) = 0;
|
||||
|
||||
virtual void
|
||||
AddWithoutIds(const DatasetPtr& dataset, const Config& config) = 0;
|
||||
|
||||
virtual DatasetPtr
|
||||
Query(const DatasetPtr& dataset, const Config& config) = 0;
|
||||
|
||||
#if 0
|
||||
virtual DatasetPtr
|
||||
QueryById(const DatasetPtr& dataset, const Config& config) {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// virtual DatasetPtr
|
||||
// QueryByRange(const DatasetPtr&, const Config&) = 0;
|
||||
//
|
||||
// virtual MetricType
|
||||
// metric_type() = 0;
|
||||
|
||||
virtual int64_t
|
||||
Dim() = 0;
|
||||
|
||||
|
@ -75,13 +59,6 @@ class VecIndex : public Index {
|
|||
return index_mode_;
|
||||
}
|
||||
|
||||
#if 0
|
||||
virtual DatasetPtr
|
||||
GetVectorById(const DatasetPtr& dataset, const Config& config) {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
faiss::ConcurrentBitsetPtr
|
||||
GetBlacklist() {
|
||||
std::unique_lock<std::mutex> lck(mutex_);
|
||||
|
@ -104,6 +81,17 @@ class VecIndex : public Index {
|
|||
uids_ = uids;
|
||||
}
|
||||
|
||||
void
|
||||
MapOffsetToUid(IDType* id, size_t n) {
|
||||
if (uids_) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (id[i] >= 0) {
|
||||
id[i] = uids_->at(id[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
BlacklistSize() {
|
||||
std::unique_lock<std::mutex> lck(mutex_);
|
||||
|
|
|
@ -18,13 +18,15 @@ namespace knowhere {
|
|||
std::shared_ptr<SPTAG::MetadataSet>
|
||||
ConvertToMetadataSet(const DatasetPtr& dataset_ptr) {
|
||||
auto elems = dataset_ptr->Get<int64_t>(meta::ROWS);
|
||||
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
|
||||
|
||||
auto p_id = (int64_t*)malloc(sizeof(int64_t) * elems);
|
||||
for (int64_t i = 0; i < elems; ++i) p_id[i] = i;
|
||||
|
||||
auto p_offset = (int64_t*)malloc(sizeof(int64_t) * (elems + 1));
|
||||
for (auto i = 0; i <= elems; ++i) p_offset[i] = i * 8;
|
||||
for (int64_t i = 0; i <= elems; ++i) p_offset[i] = i * 8;
|
||||
|
||||
std::shared_ptr<SPTAG::MetadataSet> metaset(
|
||||
new SPTAG::MemMetadataSet(SPTAG::ByteArray((std::uint8_t*)p_data, elems * sizeof(int64_t), false),
|
||||
new SPTAG::MemMetadataSet(SPTAG::ByteArray((std::uint8_t*)p_id, elems * sizeof(int64_t), true),
|
||||
SPTAG::ByteArray((std::uint8_t*)p_offset, elems * sizeof(int64_t), true), elems));
|
||||
|
||||
return metaset;
|
||||
|
@ -54,7 +56,7 @@ ConvertToQueryResult(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
}
|
||||
|
||||
DatasetPtr
|
||||
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
|
||||
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results, std::shared_ptr<std::vector<int64_t>> uid) {
|
||||
auto k = query_results[0].GetResultNum();
|
||||
auto elems = query_results.size() * k;
|
||||
|
||||
|
@ -64,12 +66,18 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
|
|||
auto p_dist = (float*)malloc(p_dist_size);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (auto i = 0; i < query_results.size(); ++i) {
|
||||
for (size_t i = 0; i < query_results.size(); ++i) {
|
||||
auto results = query_results[i].GetResults();
|
||||
auto num_result = query_results[i].GetResultNum();
|
||||
for (auto j = 0; j < num_result; ++j) {
|
||||
// p_id[i * k + j] = results[j].VID;
|
||||
p_id[i * k + j] = *(int64_t*)query_results[i].GetMetadata(j).Data();
|
||||
auto id = *(int64_t*)query_results[i].GetMetadata(j).Data();
|
||||
if (uid != nullptr) {
|
||||
if (id >= 0) {
|
||||
id = uid->at(id);
|
||||
}
|
||||
}
|
||||
p_id[i * k + j] = id;
|
||||
p_dist[i * k + j] = results[j].Dist;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ std::vector<SPTAG::QueryResult>
|
|||
ConvertToQueryResult(const DatasetPtr& dataset_ptr, const Config& config);
|
||||
|
||||
DatasetPtr
|
||||
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results);
|
||||
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results, std::shared_ptr<std::vector<int64_t>> uid);
|
||||
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
||||
|
|
|
@ -18,16 +18,6 @@
|
|||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
DatasetPtr
|
||||
GenDatasetWithIds(const int64_t nb, const int64_t dim, const void* xb, const int64_t* ids) {
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
ret_ds->Set(meta::ROWS, nb);
|
||||
ret_ds->Set(meta::DIM, dim);
|
||||
ret_ds->Set(meta::TENSOR, xb);
|
||||
ret_ds->Set(meta::IDS, ids);
|
||||
return ret_ds;
|
||||
}
|
||||
|
||||
DatasetPtr
|
||||
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
|
|
|
@ -23,15 +23,6 @@ namespace knowhere {
|
|||
int64_t rows = dataset_ptr->Get<int64_t>(meta::ROWS); \
|
||||
const void* p_data = dataset_ptr->Get<const void*>(meta::TENSOR);
|
||||
|
||||
#define GETTENSORWITHIDS(dataset_ptr) \
|
||||
int64_t dim = dataset_ptr->Get<int64_t>(meta::DIM); \
|
||||
int64_t rows = dataset_ptr->Get<int64_t>(meta::ROWS); \
|
||||
const void* p_data = dataset_ptr->Get<const void*>(meta::TENSOR); \
|
||||
const int64_t* p_ids = dataset_ptr->Get<const int64_t*>(meta::IDS);
|
||||
|
||||
extern DatasetPtr
|
||||
GenDatasetWithIds(const int64_t nb, const int64_t dim, const void* xb, const int64_t* ids);
|
||||
|
||||
extern DatasetPtr
|
||||
GenDataset(const int64_t nb, const int64_t dim, const void* xb);
|
||||
|
||||
|
|
|
@ -98,21 +98,17 @@ GPUIDMAP::GetRawVectors() {
|
|||
KNOWHERE_THROW_MSG("Not support");
|
||||
}
|
||||
|
||||
const int64_t*
|
||||
GPUIDMAP::GetRawIds() {
|
||||
KNOWHERE_THROW_MSG("Not support");
|
||||
}
|
||||
|
||||
void
|
||||
GPUIDMAP::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& config) {
|
||||
ResScope rs(res_, gpu_id_);
|
||||
|
||||
auto flat_index = dynamic_cast<faiss::IndexIDMap*>(index_.get())->index;
|
||||
auto default_type = flat_index->metric_type;
|
||||
auto default_type = index_->metric_type;
|
||||
if (config.contains(Metric::TYPE))
|
||||
flat_index->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
flat_index->search(n, (float*)data, k, distances, labels, GetBlacklist());
|
||||
flat_index->metric_type = default_type;
|
||||
index_->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
|
||||
index_->search(n, (float*)data, k, distances, labels, GetBlacklist());
|
||||
index_->metric_type = default_type;
|
||||
|
||||
MapOffsetToUid(labels, static_cast<size_t>(n * k));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -39,9 +39,6 @@ class GPUIDMAP : public IDMAP, public GPUIndex {
|
|||
const float*
|
||||
GetRawVectors() override;
|
||||
|
||||
const int64_t*
|
||||
GetRawIds() override;
|
||||
|
||||
void
|
||||
GenGraph(const float*, const int64_t, GraphType&, const Config&);
|
||||
|
||||
|
|
|
@ -53,10 +53,10 @@ GPUIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
|
|||
}
|
||||
|
||||
void
|
||||
GPUIVF::Add(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
GPUIVF::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (auto spt = res_.lock()) {
|
||||
ResScope rs(res_, gpu_id_);
|
||||
IVF::Add(dataset_ptr, config);
|
||||
IVF::AddWithoutIds(dataset_ptr, config);
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("Add IVF can't get gpu resource");
|
||||
}
|
||||
|
@ -152,6 +152,8 @@ GPUIVF::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int
|
|||
device_index->search(search_size, (float*)data + i * dim, k, distances + i * k, labels + i * k,
|
||||
GetBlacklist());
|
||||
}
|
||||
|
||||
MapOffsetToUid(labels, static_cast<size_t>(n * k));
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("Not a GpuIndexIVF type.");
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ class GPUIVF : public IVF, public GPUIndex {
|
|||
Train(const DatasetPtr&, const Config&) override;
|
||||
|
||||
void
|
||||
Add(const DatasetPtr&, const Config&) override;
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override;
|
||||
|
||||
VecIndexPtr
|
||||
CopyGpuToCpu(const Config&) override;
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
#include <faiss/IndexIVFPQ.h>
|
||||
#include <faiss/gpu/GpuCloner.h>
|
||||
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
||||
#include <faiss/index_factory.h>
|
||||
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/index/vector_index/IndexIVFPQ.h"
|
||||
|
|
|
@ -47,12 +47,18 @@ NsgIndex::~NsgIndex() {
|
|||
}
|
||||
|
||||
void
|
||||
NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
|
||||
NsgIndex::Build(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
|
||||
ntotal = nb;
|
||||
ori_data_ = new float[ntotal * dimension];
|
||||
ids_ = new int64_t[ntotal];
|
||||
memcpy((void*)ori_data_, (void*)data, sizeof(float) * ntotal * dimension);
|
||||
memcpy((void*)ids_, (void*)ids, sizeof(int64_t) * ntotal);
|
||||
if (ids == nullptr) {
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
ids_[i] = i;
|
||||
}
|
||||
} else {
|
||||
memcpy((void*)ids_, (void*)ids, sizeof(int64_t) * ntotal);
|
||||
}
|
||||
|
||||
search_length = parameters.search_length;
|
||||
out_degree = parameters.out_degree;
|
||||
|
|
|
@ -80,7 +80,7 @@ class NsgIndex {
|
|||
SetKnnGraph(Graph& knng);
|
||||
|
||||
virtual void
|
||||
Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters);
|
||||
Build(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters);
|
||||
|
||||
void
|
||||
Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids,
|
||||
|
|
|
@ -55,7 +55,6 @@ TEST_P(AnnoyTest, annoy_basic) {
|
|||
ASSERT_ANY_THROW(index_->Train(base_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->Serialize(conf));
|
||||
ASSERT_ANY_THROW(index_->Add(base_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->Count());
|
||||
ASSERT_ANY_THROW(index_->Dim());
|
||||
|
|
|
@ -53,16 +53,14 @@ TEST_P(BinaryIDMAPTest, binaryidmap_basic) {
|
|||
{
|
||||
ASSERT_ANY_THROW(index_->Serialize());
|
||||
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
|
||||
}
|
||||
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
ASSERT_TRUE(index_->GetRawVectors() != nullptr);
|
||||
ASSERT_TRUE(index_->GetRawIds() != nullptr);
|
||||
auto result = index_->Query(query_dataset, conf);
|
||||
AssertAnns(result, nq, k);
|
||||
// PrintResult(result, nq, k);
|
||||
|
|
|
@ -64,7 +64,6 @@ TEST_P(BinaryIVFTest, binaryivf_basic) {
|
|||
{
|
||||
ASSERT_ANY_THROW(index_->Serialize());
|
||||
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
|
||||
}
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
|
|||
|
||||
fiu_init(0);
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ TEST_F(GPURESTEST, copyandsearch) {
|
|||
|
||||
auto conf = ParamGenerator::GetInstance().Gen(index_type_);
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
auto result = index_->Query(query_dataset, conf);
|
||||
AssertAnns(result, nq, k);
|
||||
|
||||
|
@ -128,7 +128,7 @@ TEST_F(GPURESTEST, trainandsearch) {
|
|||
|
||||
auto conf = ParamGenerator::GetInstance().Gen(index_type_);
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
index_->SetIndexSize(nb * dim * sizeof(float));
|
||||
auto cpu_idx = milvus::knowhere::cloner::CopyGpuToCpu(index_, milvus::knowhere::Config());
|
||||
milvus::knowhere::IVFPtr ivf_idx = std::dynamic_pointer_cast<milvus::knowhere::IVF>(cpu_idx);
|
||||
|
@ -140,7 +140,7 @@ TEST_F(GPURESTEST, trainandsearch) {
|
|||
auto train_stage = [&] {
|
||||
for (int i = 0; i < train_count; ++i) {
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
}
|
||||
};
|
||||
auto search_stage = [&](milvus::knowhere::VecIndexPtr& search_idx) {
|
||||
|
|
|
@ -51,14 +51,13 @@ TEST_P(HNSWTest, HNSW_basic) {
|
|||
{
|
||||
ASSERT_ANY_THROW(index_->Serialize());
|
||||
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
|
||||
ASSERT_ANY_THROW(index_->Count());
|
||||
ASSERT_ANY_THROW(index_->Dim());
|
||||
}
|
||||
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
|
||||
|
@ -70,7 +69,7 @@ TEST_P(HNSWTest, HNSW_delete) {
|
|||
assert(!xb.empty());
|
||||
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
|
||||
|
@ -120,7 +119,7 @@ TEST_P(HNSWTest, HNSW_serialize) {
|
|||
|
||||
{
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
auto binaryset = index_->Serialize();
|
||||
auto bin = binaryset.GetByName("HNSW");
|
||||
|
||||
|
|
|
@ -74,16 +74,14 @@ TEST_P(IDMAPTest, idmap_basic) {
|
|||
{
|
||||
ASSERT_ANY_THROW(index_->Serialize());
|
||||
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
|
||||
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
|
||||
}
|
||||
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
ASSERT_TRUE(index_->GetRawVectors() != nullptr);
|
||||
ASSERT_TRUE(index_->GetRawIds() != nullptr);
|
||||
auto result = index_->Query(query_dataset, conf);
|
||||
AssertAnns(result, nq, k);
|
||||
// PrintResult(result, nq, k);
|
||||
|
@ -144,7 +142,7 @@ TEST_P(IDMAPTest, idmap_serialize) {
|
|||
{
|
||||
// serialize index
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, milvus::knowhere::Config());
|
||||
index_->AddWithoutIds(base_dataset, milvus::knowhere::Config());
|
||||
|
||||
if (index_mode_ == milvus::knowhere::IndexMode::MODE_GPU) {
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
|
@ -187,11 +185,10 @@ TEST_P(IDMAPTest, idmap_copy) {
|
|||
{milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}};
|
||||
|
||||
index_->Train(base_dataset, conf);
|
||||
index_->Add(base_dataset, conf);
|
||||
index_->AddWithoutIds(base_dataset, conf);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
ASSERT_TRUE(index_->GetRawVectors() != nullptr);
|
||||
ASSERT_TRUE(index_->GetRawIds() != nullptr);
|
||||
auto result = index_->Query(query_dataset, conf);
|
||||
AssertAnns(result, nq, k);
|
||||
// PrintResult(result, nq, k);
|
||||
|
@ -211,8 +208,6 @@ TEST_P(IDMAPTest, idmap_copy) {
|
|||
AssertAnns(clone_result, nq, k);
|
||||
ASSERT_THROW({ std::static_pointer_cast<milvus::knowhere::GPUIDMAP>(clone_index)->GetRawVectors(); },
|
||||
milvus::knowhere::KnowhereException);
|
||||
ASSERT_THROW({ std::static_pointer_cast<milvus::knowhere::GPUIDMAP>(clone_index)->GetRawIds(); },
|
||||
milvus::knowhere::KnowhereException);
|
||||
|
||||
fiu_init(0);
|
||||
fiu_enable("GPUIDMP.SerializeImpl.throw_exception", 1, nullptr, 0);
|
||||
|
@ -233,7 +228,6 @@ TEST_P(IDMAPTest, idmap_copy) {
|
|||
auto host_result = host_index->Query(query_dataset, conf);
|
||||
AssertAnns(host_result, nq, k);
|
||||
ASSERT_TRUE(std::static_pointer_cast<milvus::knowhere::IDMAP>(host_index)->GetRawVectors() != nullptr);
|
||||
ASSERT_TRUE(std::static_pointer_cast<milvus::knowhere::IDMAP>(host_index)->GetRawIds() != nullptr);
|
||||
|
||||
// gpu to gpu
|
||||
auto device_index = milvus::knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, conf);
|
||||
|
|
|
@ -98,7 +98,6 @@ TEST_P(IVFTest, ivf_basic_cpu) {
|
|||
}
|
||||
|
||||
// null faiss index
|
||||
ASSERT_ANY_THROW(index_->Add(base_dataset, conf_));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf_));
|
||||
|
||||
index_->Train(base_dataset, conf_);
|
||||
|
@ -158,7 +157,6 @@ TEST_P(IVFTest, ivf_basic_gpu) {
|
|||
}
|
||||
|
||||
// null faiss index
|
||||
ASSERT_ANY_THROW(index_->Add(base_dataset, conf_));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf_));
|
||||
|
||||
index_->BuildAll(base_dataset, conf_);
|
||||
|
@ -197,7 +195,7 @@ TEST_P(IVFTest, ivf_serialize) {
|
|||
{
|
||||
// serialize index
|
||||
index_->Train(base_dataset, conf_);
|
||||
index_->Add(base_dataset, conf_);
|
||||
index_->AddWithoutIds(base_dataset, conf_);
|
||||
auto binaryset = index_->Serialize();
|
||||
auto bin = binaryset.GetByName("IVF");
|
||||
|
||||
|
@ -223,7 +221,7 @@ TEST_P(IVFTest, clone_test) {
|
|||
assert(!xb.empty());
|
||||
|
||||
index_->Train(base_dataset, conf_);
|
||||
index_->Add(base_dataset, conf_);
|
||||
index_->AddWithoutIds(base_dataset, conf_);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
|
||||
|
@ -310,7 +308,7 @@ TEST_P(IVFTest, gpu_seal_test) {
|
|||
ASSERT_ANY_THROW(index_->Seal());
|
||||
|
||||
index_->Train(base_dataset, conf_);
|
||||
index_->Add(base_dataset, conf_);
|
||||
index_->AddWithoutIds(base_dataset, conf_);
|
||||
EXPECT_EQ(index_->Count(), nb);
|
||||
EXPECT_EQ(index_->Dim(), dim);
|
||||
|
||||
|
|
|
@ -82,7 +82,6 @@ TEST_F(NSGInterfaceTest, basic_test) {
|
|||
{
|
||||
ASSERT_ANY_THROW(index_->Serialize());
|
||||
ASSERT_ANY_THROW(index_->Query(query_dataset, search_conf));
|
||||
ASSERT_ANY_THROW(index_->Add(base_dataset, search_conf));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, search_conf));
|
||||
}
|
||||
|
||||
|
|
|
@ -61,10 +61,7 @@ TEST_P(SPTAGTest, sptag_basic) {
|
|||
assert(!xb.empty());
|
||||
|
||||
// null faiss index
|
||||
{
|
||||
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
|
||||
}
|
||||
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
|
||||
|
||||
index_->BuildAll(base_dataset, conf);
|
||||
// index_->Add(base_dataset, conf);
|
||||
|
|
|
@ -44,7 +44,7 @@ DataGen::Generate(const int dim, const int nb, const int nq, const bool is_binar
|
|||
assert(xb.size() == (size_t)dim * nb);
|
||||
assert(xq.size() == (size_t)dim * nq);
|
||||
|
||||
base_dataset = milvus::knowhere::GenDatasetWithIds(nb, dim, xb.data(), ids.data());
|
||||
base_dataset = milvus::knowhere::GenDataset(nb, dim, xb.data());
|
||||
query_dataset = milvus::knowhere::GenDataset(nq, dim, xq.data());
|
||||
} else {
|
||||
int64_t dim_x = dim / 8;
|
||||
|
@ -52,12 +52,12 @@ DataGen::Generate(const int dim, const int nb, const int nq, const bool is_binar
|
|||
assert(xb_bin.size() == (size_t)dim_x * nb);
|
||||
assert(xq_bin.size() == (size_t)dim_x * nq);
|
||||
|
||||
base_dataset = milvus::knowhere::GenDatasetWithIds(nb, dim, xb_bin.data(), ids.data());
|
||||
base_dataset = milvus::knowhere::GenDataset(nb, dim, xb_bin.data());
|
||||
query_dataset = milvus::knowhere::GenDataset(nq, dim, xq_bin.data());
|
||||
}
|
||||
|
||||
id_dataset = milvus::knowhere::GenDatasetWithIds(nq, dim, nullptr, ids.data());
|
||||
xid_dataset = milvus::knowhere::GenDatasetWithIds(nq, dim, nullptr, xids.data());
|
||||
id_dataset = milvus::knowhere::GenDataset(nq, dim, nullptr);
|
||||
xid_dataset = milvus::knowhere::GenDataset(nq, dim, nullptr);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "db/engine/EngineFactory.h"
|
||||
#include "db/engine/ExecutionEngineImpl.h"
|
||||
#include "db/utils.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include <fiu-local.h>
|
||||
#include <fiu-control.h>
|
||||
|
||||
|
@ -47,8 +48,11 @@ CreateExecEngine(const milvus::json& json_params, milvus::engine::MetricType met
|
|||
}
|
||||
}
|
||||
|
||||
auto status = engine_ptr->AddWithIds((int64_t)ids->size(), data.data(), ids->data());
|
||||
(std::static_pointer_cast<milvus::engine::ExecutionEngineImpl>(engine_ptr))->index_->SetUids(ids);
|
||||
auto engine_impl = (std::static_pointer_cast<milvus::engine::ExecutionEngineImpl>(engine_ptr));
|
||||
|
||||
auto dataset = milvus::knowhere::GenDataset(ROW_COUNT, DIMENSION, data.data());
|
||||
engine_impl->index_->AddWithoutIds(dataset, milvus::knowhere::Config());
|
||||
engine_impl->index_->SetUids(ids);
|
||||
return engine_ptr;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue