Optimize the process of indexing and querying (#4455)

* fix index

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* fix engine

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* fix ut

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>
pull/4486/head
shengjun.li 2020-12-18 17:53:39 +08:00 committed by GitHub
parent e5d218f405
commit a07526c98e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 146 additions and 886 deletions

View File

@ -8,6 +8,7 @@ Please mark all change in change log and use the issue from GitHub
## Feature
## Improvement
- \#4454 Optimize the process of indexing and querying
## Task

View File

@ -73,12 +73,6 @@ enum class DataType {
class ExecutionEngine {
public:
virtual Status
AddWithIds(int64_t n, const float* xdata, const int64_t* xids) = 0;
virtual Status
AddWithIds(int64_t n, const uint8_t* xdata, const int64_t* xids) = 0;
virtual size_t
Count() const = 0;

View File

@ -325,20 +325,6 @@ ExecutionEngineImpl::HybridUnset() const {
#endif
}
Status
ExecutionEngineImpl::AddWithIds(int64_t n, const float* xdata, const int64_t* xids) {
auto dataset = knowhere::GenDatasetWithIds(n, index_->Dim(), xdata, xids);
index_->Add(dataset, knowhere::Config());
return Status::OK();
}
Status
ExecutionEngineImpl::AddWithIds(int64_t n, const uint8_t* xdata, const int64_t* xids) {
auto dataset = knowhere::GenDatasetWithIds(n, index_->Dim(), xdata, xids);
index_->Add(dataset, knowhere::Config());
return Status::OK();
}
size_t
ExecutionEngineImpl::Count() const {
if (index_ == nullptr) {
@ -723,14 +709,12 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
std::shared_ptr<std::vector<segment::doc_id_t>> uids;
faiss::ConcurrentBitsetPtr blacklist;
if (from_index) {
auto dataset =
knowhere::GenDatasetWithIds(Count(), Dimension(), from_index->GetRawVectors(), from_index->GetRawIds());
auto dataset = knowhere::GenDataset(Count(), Dimension(), from_index->GetRawVectors());
to_index->BuildAll(dataset, conf);
uids = from_index->GetUids();
blacklist = from_index->GetBlacklist();
} else if (bin_from_index) {
auto dataset = knowhere::GenDatasetWithIds(Count(), Dimension(), bin_from_index->GetRawVectors(),
bin_from_index->GetRawIds());
auto dataset = knowhere::GenDataset(Count(), Dimension(), bin_from_index->GetRawVectors());
to_index->BuildAll(dataset, conf);
uids = bin_from_index->GetUids();
blacklist = bin_from_index->GetBlacklist();
@ -754,383 +738,16 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
}
void
MapAndCopyResult(const knowhere::DatasetPtr& dataset, std::shared_ptr<std::vector<milvus::segment::doc_id_t>> uids,
int64_t nq, int64_t k, float* distances, int64_t* labels) {
int64_t* res_ids = dataset->Get<int64_t*>(knowhere::meta::IDS);
CopyResult(const knowhere::DatasetPtr& dataset, int64_t result_len, float* distances, int64_t* labels) {
float* res_dist = dataset->Get<float*>(knowhere::meta::DISTANCE);
memcpy(distances, res_dist, sizeof(float) * nq * k);
/* map offsets to ids */
int64_t num = nq * k;
for (int64_t i = 0; i < num; ++i) {
int64_t offset = res_ids[i];
if (offset != -1) {
labels[i] = (*uids)[offset];
} else {
labels[i] = -1;
}
}
free(res_ids);
memcpy(distances, res_dist, sizeof(float) * result_len);
free(res_dist);
int64_t* res_ids = dataset->Get<int64_t*>(knowhere::meta::IDS);
memcpy(labels, res_ids, sizeof(int64_t) * result_len);
free(res_ids);
}
#if 0
template <typename T>
void
ProcessRangeQuery(std::vector<T> data, T value, query::CompareOperator type, faiss::ConcurrentBitsetPtr& bitset) {
switch (type) {
case query::CompareOperator::LT: {
for (uint64_t i = 0; i < data.size(); ++i) {
if (data[i] >= value) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case query::CompareOperator::LTE: {
for (uint64_t i = 0; i < data.size(); ++i) {
if (data[i] > value) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case query::CompareOperator::GT: {
for (uint64_t i = 0; i < data.size(); ++i) {
if (data[i] <= value) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case query::CompareOperator::GTE: {
for (uint64_t i = 0; i < data.size(); ++i) {
if (data[i] < value) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case query::CompareOperator::EQ: {
for (uint64_t i = 0; i < data.size(); ++i) {
if (data[i] != value) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
}
case query::CompareOperator::NE: {
for (uint64_t i = 0; i < data.size(); ++i) {
if (data[i] == value) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
}
}
Status
ExecutionEngineImpl::ExecBinaryQuery(milvus::query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr bitset,
std::unordered_map<std::string, DataType>& attr_type, uint64_t& nq, uint64_t& topk,
std::vector<float>& distances, std::vector<int64_t>& labels) {
if (bitset == nullptr) {
bitset = std::make_shared<faiss::ConcurrentBitset>(vector_count_);
}
if (general_query->leaf == nullptr) {
Status status = Status::OK();
if (general_query->bin->left_query != nullptr) {
status = ExecBinaryQuery(general_query->bin->left_query, bitset, attr_type, nq, topk, distances, labels);
}
if (general_query->bin->right_query != nullptr) {
status = ExecBinaryQuery(general_query->bin->right_query, bitset, attr_type, nq, topk, distances, labels);
}
return status;
} else {
if (general_query->leaf->term_query != nullptr) {
// process attrs_data
auto field_name = general_query->leaf->term_query->field_name;
auto type = attr_type.at(field_name);
auto size = attr_size_.at(field_name);
switch (type) {
case DataType::INT8: {
std::vector<int8_t> data;
data.resize(size / sizeof(int8_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int8_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int8_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int8_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
}
}
if (!value_in_term) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case DataType::INT16: {
std::vector<int16_t> data;
data.resize(size / sizeof(int16_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int16_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int16_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int16_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
}
}
if (!value_in_term) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case DataType::INT32: {
std::vector<int32_t> data;
data.resize(size / sizeof(int32_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int32_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int32_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int32_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
}
}
if (!value_in_term) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case DataType::INT64: {
std::vector<int64_t> data;
data.resize(size / sizeof(int64_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<int64_t> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(int64_t);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int64_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
}
}
if (!value_in_term) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case DataType::FLOAT: {
std::vector<float> data;
data.resize(size / sizeof(float));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<float> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(float);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(int64_t));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
}
}
if (!value_in_term) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
case DataType::DOUBLE: {
std::vector<double> data;
data.resize(size / sizeof(double));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::vector<double> term_value;
auto term_size =
general_query->leaf->term_query->field_value.size() * (sizeof(int8_t)) / sizeof(double);
term_value.resize(term_size);
memcpy(term_value.data(), general_query->leaf->term_query->field_value.data(),
term_size * sizeof(double));
for (uint64_t i = 0; i < data.size(); ++i) {
bool value_in_term = false;
for (auto query_value : term_value) {
if (data[i] == query_value) {
value_in_term = true;
break;
}
}
if (!value_in_term) {
if (!bitset->test(i)) {
bitset->set(i);
}
}
}
break;
}
default:
break;
}
return Status::OK();
}
if (general_query->leaf->range_query != nullptr) {
auto field_name = general_query->leaf->range_query->field_name;
auto com_expr = general_query->leaf->range_query->compare_expr;
auto type = attr_type.at(field_name);
auto size = attr_size_.at(field_name);
for (uint64_t j = 0; j < com_expr.size(); ++j) {
auto operand = com_expr[j].operand;
switch (type) {
case DataType::INT8: {
std::vector<int8_t> data;
data.resize(size / sizeof(int8_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
int8_t value = atoi(operand.c_str());
ProcessRangeQuery<int8_t>(data, value, com_expr[j].compare_operator, bitset);
break;
}
case DataType::INT16: {
std::vector<int16_t> data;
data.resize(size / sizeof(int16_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
int16_t value = atoi(operand.c_str());
ProcessRangeQuery<int16_t>(data, value, com_expr[j].compare_operator, bitset);
break;
}
case DataType::INT32: {
std::vector<int32_t> data;
data.resize(size / sizeof(int32_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
int32_t value = atoi(operand.c_str());
ProcessRangeQuery<int32_t>(data, value, com_expr[j].compare_operator, bitset);
break;
}
case DataType::INT64: {
std::vector<int64_t> data;
data.resize(size / sizeof(int64_t));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
int64_t value = atoi(operand.c_str());
ProcessRangeQuery<int64_t>(data, value, com_expr[j].compare_operator, bitset);
break;
}
case DataType::FLOAT: {
std::vector<float> data;
data.resize(size / sizeof(float));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::istringstream iss(operand);
double value;
iss >> value;
ProcessRangeQuery<float>(data, value, com_expr[j].compare_operator, bitset);
break;
}
case DataType::DOUBLE: {
std::vector<double> data;
data.resize(size / sizeof(double));
memcpy(data.data(), attr_data_.at(field_name).data(), size);
std::istringstream iss(operand);
double value;
iss >> value;
ProcessRangeQuery<double>(data, value, com_expr[j].compare_operator, bitset);
break;
}
default:
break;
}
}
return Status::OK();
}
if (general_query->leaf->vector_query != nullptr) {
// Do search
faiss::ConcurrentBitsetPtr list;
list = index_->GetBlacklist();
// Do OR
for (int64_t i = 0; i < vector_count_; ++i) {
if (list->test(i) || bitset->test(i)) {
bitset->set(i);
}
}
index_->SetBlacklist(bitset);
auto vector_query = general_query->leaf->vector_query;
topk = vector_query->topk;
nq = vector_query->query_vector.float_data.size() / dim_;
distances.resize(nq * topk);
labels.resize(nq * topk);
return Search(nq, vector_query->query_vector.float_data.data(), topk, vector_query->extra_params,
distances.data(), labels.data());
}
}
return Status::OK();
}
#endif
Status
ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, const milvus::json& extra_params, float* distances,
int64_t* labels, bool hybrid) {
@ -1163,8 +780,8 @@ ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, const milvu
LOG_ENGINE_DEBUG_ << LogOut("[%s][%ld] get %ld uids from index %s", "search", 0, index_->GetUids()->size(),
location_.c_str());
MapAndCopyResult(result, index_->GetUids(), n, k, distances, labels);
rc.RecordSection("map uids " + std::to_string(n * k));
CopyResult(result, n * k, distances, labels);
rc.RecordSection("copy result " + std::to_string(n * k));
if (hybrid) {
HybridUnset();
@ -1204,8 +821,8 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil
LOG_ENGINE_DEBUG_ << LogOut("[%s][%ld] get %ld uids from index %s", "search", 0, index_->GetUids()->size(),
location_.c_str());
MapAndCopyResult(result, index_->GetUids(), n, k, distances, labels);
rc.RecordSection("map uids " + std::to_string(n * k));
CopyResult(result, n * k, distances, labels);
rc.RecordSection("copy result " + std::to_string(n * k));
if (hybrid) {
HybridUnset();
@ -1214,60 +831,6 @@ ExecutionEngineImpl::Search(int64_t n, const uint8_t* data, int64_t k, const mil
return Status::OK();
}
#if 0
Status
ExecutionEngineImpl::GetVectorByID(const int64_t id, float* vector, bool hybrid) {
if (index_ == nullptr) {
LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
return Status(DB_ERROR, "index is null");
}
if (hybrid) {
HybridLoad();
}
// Only one id for now
std::vector<int64_t> ids{id};
auto dataset = knowhere::GenDatasetWithIds(1, index_->Dim(), nullptr, ids.data());
auto result = index_->GetVectorById(dataset, knowhere::Config());
float* res_vec = (float*)(result->Get<void*>(knowhere::meta::TENSOR));
memcpy(vector, res_vec, sizeof(float) * 1 * index_->Dim());
if (hybrid) {
HybridUnset();
}
return Status::OK();
}
Status
ExecutionEngineImpl::GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) {
if (index_ == nullptr) {
LOG_ENGINE_ERROR_ << "ExecutionEngineImpl: index is null, failed to search";
return Status(DB_ERROR, "index is null");
}
LOG_ENGINE_DEBUG_ << "Get binary vector by id: " << id;
if (hybrid) {
HybridLoad();
}
// Only one id for now
std::vector<int64_t> ids{id};
auto dataset = knowhere::GenDatasetWithIds(1, index_->Dim(), nullptr, ids.data());
auto result = index_->GetVectorById(dataset, knowhere::Config());
uint8_t* res_vec = (uint8_t*)(result->Get<void*>(knowhere::meta::TENSOR));
memcpy(vector, res_vec, sizeof(uint8_t) * 1 * index_->Dim());
if (hybrid) {
HybridUnset();
}
return Status::OK();
}
#endif
Status
ExecutionEngineImpl::Cache() {
auto cpu_cache_mgr = milvus::cache::CpuCacheMgr::GetInstance();
@ -1275,6 +838,7 @@ ExecutionEngineImpl::Cache() {
cpu_cache_mgr->InsertItem(location_, obj);
return Status::OK();
}
Status
ExecutionEngineImpl::FpgaCache() {
#ifdef MILVUS_FPGA_VERSION
@ -1284,6 +848,7 @@ ExecutionEngineImpl::FpgaCache() {
#endif
return Status::OK();
}
// TODO(linxj): remove.
Status
ExecutionEngineImpl::Init() {

View File

@ -33,12 +33,6 @@ class ExecutionEngineImpl : public ExecutionEngine {
ExecutionEngineImpl(knowhere::VecIndexPtr index, const std::string& location, EngineType index_type,
MetricType metric_type, const milvus::json& index_params);
Status
AddWithIds(int64_t n, const float* xdata, const int64_t* xids) override;
Status
AddWithIds(int64_t n, const uint8_t* xdata, const int64_t* xids) override;
size_t
Count() const override;

View File

@ -89,7 +89,7 @@ IndexAnnoy::BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
return;
}
GETTENSORWITHIDS(dataset_ptr)
GETTENSOR(dataset_ptr)
metric_type_ = config[Metric::TYPE];
if (metric_type_ == Metric::L2) {
@ -101,7 +101,7 @@ IndexAnnoy::BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
}
for (int i = 0; i < rows; ++i) {
index_->add_item(p_ids[i], (const float*)p_data + dim * i);
index_->add_item(i, (const float*)p_data + dim * i);
}
index_->build(config[IndexParams::n_trees].get<int64_t>());
@ -129,11 +129,14 @@ IndexAnnoy::Query(const DatasetPtr& dataset_ptr, const Config& config) {
distances.reserve(k);
index_->get_nns_by_vector((const float*)p_data + i * dim, k, search_k, &result, &distances, blacklist);
int64_t result_num = result.size();
size_t result_num = result.size();
auto local_p_id = p_id + k * i;
auto local_p_dist = p_dist + k * i;
memcpy(local_p_id, result.data(), result_num * sizeof(int64_t));
memcpy(local_p_dist, distances.data(), result_num * sizeof(float));
MapOffsetToUid(local_p_id, result_num);
for (; result_num < k; result_num++) {
local_p_id[result_num] = -1;
local_p_dist[result_num] = 1.0 / 0.0;

View File

@ -43,11 +43,6 @@ class IndexAnnoy : public VecIndex {
KNOWHERE_THROW_MSG("Annoy not support build item dynamically, please invoke BuildAll interface.");
}
void
Add(const DatasetPtr& dataset_ptr, const Config& config) override {
KNOWHERE_THROW_MSG("Annoy not support add item dynamically, please invoke BuildAll interface.");
}
void
AddWithoutIds(const DatasetPtr&, const Config&) override {
KNOWHERE_THROW_MSG("Incremental index is not supported");

View File

@ -13,7 +13,6 @@
#include <faiss/IndexBinaryFlat.h>
#include <faiss/MetaIndexes.h>
#include <faiss/index_factory.h>
#include <string>
@ -77,48 +76,6 @@ BinaryIDMAP::Dim() {
return index_->d;
}
void
BinaryIDMAP::Add(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
std::lock_guard<std::mutex> lk(mutex_);
GETTENSORWITHIDS(dataset_ptr)
index_->add_with_ids(rows, (uint8_t*)p_data, p_ids);
}
void
BinaryIDMAP::Train(const DatasetPtr& dataset_ptr, const Config& config) {
const char* desc = "BFlat";
int64_t dim = config[meta::DIM].get<int64_t>();
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
auto index = faiss::index_binary_factory(dim, desc, metric_type);
index_.reset(index);
}
const uint8_t*
BinaryIDMAP::GetRawVectors() {
try {
auto file_index = dynamic_cast<faiss::IndexBinaryIDMap*>(index_.get());
auto flat_index = dynamic_cast<faiss::IndexBinaryFlat*>(file_index->index);
return flat_index->xb.data();
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
const int64_t*
BinaryIDMAP::GetRawIds() {
try {
auto file_index = dynamic_cast<faiss::IndexBinaryIDMap*>(index_.get());
return file_index->id_map.data();
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void
BinaryIDMAP::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_) {
@ -128,34 +85,48 @@ BinaryIDMAP::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config)
std::lock_guard<std::mutex> lk(mutex_);
GETTENSOR(dataset_ptr)
std::vector<int64_t> new_ids(rows);
for (int i = 0; i < rows; ++i) {
new_ids[i] = i;
}
index_->add(rows, (uint8_t*)p_data);
}
index_->add_with_ids(rows, (uint8_t*)p_data, new_ids.data());
void
BinaryIDMAP::Train(const DatasetPtr& dataset_ptr, const Config& config) {
int64_t dim = config[meta::DIM].get<int64_t>();
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
auto index = std::make_shared<faiss::IndexBinaryFlat>(dim, metric_type);
index_ = index;
}
const uint8_t*
BinaryIDMAP::GetRawVectors() {
try {
auto flat_index = dynamic_cast<faiss::IndexBinaryFlat*>(index_.get());
return flat_index->xb.data();
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
void
BinaryIDMAP::QueryImpl(int64_t n, const uint8_t* data, int64_t k, float* distances, int64_t* labels,
const Config& config) {
auto flat_index = dynamic_cast<faiss::IndexBinaryIDMap*>(index_.get())->index;
auto default_type = flat_index->metric_type;
auto default_type = index_->metric_type;
if (config.contains(Metric::TYPE))
flat_index->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
index_->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
int32_t* i_distances = reinterpret_cast<int32_t*>(distances);
flat_index->search(n, (uint8_t*)data, k, i_distances, labels, GetBlacklist());
index_->search(n, (uint8_t*)data, k, i_distances, labels, GetBlacklist());
// if hamming, it need transform int32 to float
if (flat_index->metric_type == faiss::METRIC_Hamming) {
if (index_->metric_type == faiss::METRIC_Hamming) {
int64_t num = n * k;
for (int64_t i = 0; i < num; i++) {
distances[i] = static_cast<float>(i_distances[i]);
}
}
flat_index->metric_type = default_type;
index_->metric_type = default_type;
MapOffsetToUid(labels, static_cast<size_t>(n * k));
}
} // namespace knowhere

View File

@ -41,9 +41,6 @@ class BinaryIDMAP : public VecIndex, public FaissBaseBinaryIndex {
void
Train(const DatasetPtr&, const Config&) override;
void
Add(const DatasetPtr&, const Config&) override;
void
AddWithoutIds(const DatasetPtr&, const Config&) override;
@ -64,9 +61,6 @@ class BinaryIDMAP : public VecIndex, public FaissBaseBinaryIndex {
virtual const uint8_t*
GetRawVectors();
virtual const int64_t*
GetRawIds();
protected:
virtual void
QueryImpl(int64_t n, const uint8_t* data, int64_t k, float* distances, int64_t* labels, const Config& config);

View File

@ -73,52 +73,6 @@ BinaryIVF::Query(const DatasetPtr& dataset_ptr, const Config& config) {
}
}
#if 0
DatasetPtr
BinaryIVF::QueryById(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
try {
int64_t k = config[meta::TOPK].get<int64_t>();
auto elems = rows * k;
size_t p_id_size = sizeof(int64_t) * elems;
size_t p_dist_size = sizeof(float) * elems;
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
int32_t* pdistances = (int32_t*)p_dist;
index_->search_by_id(rows, p_data, k, pdistances, p_id, bitset_);
auto ret_ds = std::make_shared<Dataset>();
if (index_->metric_type == faiss::METRIC_Hamming) {
auto pf_dist = (float*)malloc(p_dist_size);
int32_t* pi_dist = (int32_t*)p_dist;
for (int i = 0; i < elems; i++) {
*(pf_dist + i) = (float)(*(pi_dist + i));
}
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, pf_dist);
free(p_dist);
} else {
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
}
return ret_ds;
} catch (faiss::FaissException& e) {
KNOWHERE_THROW_MSG(e.what());
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
#endif
int64_t
BinaryIVF::Count() {
if (!index_) {
@ -151,7 +105,7 @@ BinaryIVF::UpdateIndexSize() {
void
BinaryIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
GETTENSORWITHIDS(dataset_ptr)
GETTENSOR(dataset_ptr)
int64_t nlist = config[IndexParams::nlist];
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
@ -159,7 +113,7 @@ BinaryIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
auto index = std::make_shared<faiss::IndexBinaryIVF>(coarse_quantizer, dim, nlist, metric_type);
index->own_fields = true;
index->train(rows, static_cast<const uint8_t*>(p_data));
index->add_with_ids(rows, static_cast<const uint8_t*>(p_data), p_ids);
index->add(rows, static_cast<const uint8_t*>(p_data));
index_ = index;
}
@ -226,6 +180,8 @@ BinaryIVF::QueryImpl(int64_t n, const uint8_t* data, int64_t k, float* distances
distances[i] = static_cast<float>(i_distances[i]);
}
}
MapOffsetToUid(labels, static_cast<size_t>(n * k));
}
} // namespace knowhere

View File

@ -49,11 +49,6 @@ class BinaryIVF : public VecIndex, public FaissBaseBinaryIndex {
void
Train(const DatasetPtr& dataset_ptr, const Config& config) override;
void
Add(const DatasetPtr& dataset_ptr, const Config& config) override {
KNOWHERE_THROW_MSG("not support yet");
}
void
AddWithoutIds(const DatasetPtr&, const Config&) override {
KNOWHERE_THROW_MSG("AddWithoutIds is not supported");
@ -62,11 +57,6 @@ class BinaryIVF : public VecIndex, public FaissBaseBinaryIndex {
DatasetPtr
Query(const DatasetPtr& dataset_ptr, const Config& config) override;
#if 0
DatasetPtr
QueryById(const DatasetPtr& dataset_ptr, const Config& config) override;
#endif
int64_t
Count() override;
@ -76,11 +66,6 @@ class BinaryIVF : public VecIndex, public FaissBaseBinaryIndex {
void
UpdateIndexSize() override;
#if 0
DatasetPtr
GetVectorById(const DatasetPtr& dataset_ptr, const Config& config);
#endif
protected:
virtual std::shared_ptr<faiss::IVFSearchParameters>
GenParams(const Config& config);

View File

@ -95,38 +95,20 @@ IndexHNSW::Train(const DatasetPtr& dataset_ptr, const Config& config) {
}
void
IndexHNSW::Add(const DatasetPtr& dataset_ptr, const Config& config) {
IndexHNSW::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
std::lock_guard<std::mutex> lk(mutex_);
GETTENSORWITHIDS(dataset_ptr)
GETTENSOR(dataset_ptr)
// if (normalize) {
// std::vector<float> ep_norm_vector(Dim());
// normalize_vector((float*)(p_data), ep_norm_vector.data(), Dim());
// index_->addPoint((void*)(ep_norm_vector.data()), p_ids[0]);
// #pragma omp parallel for
// for (int i = 1; i < rows; ++i) {
// std::vector<float> norm_vector(Dim());
// normalize_vector((float*)(p_data + Dim() * i), norm_vector.data(), Dim());
// index_->addPoint((void*)(norm_vector.data()), p_ids[i]);
// }
// } else {
// index_->addPoint((void*)(p_data), p_ids[0]);
// #pragma omp parallel for
// for (int i = 1; i < rows; ++i) {
// index_->addPoint((void*)(p_data + Dim() * i), p_ids[i]);
// }
// }
index_->addPoint(p_data, p_ids[0]);
index_->addPoint(p_data, 0);
#pragma omp parallel for
for (int i = 1; i < rows; ++i) {
faiss::BuilderSuspend::check_wait();
index_->addPoint(((float*)p_data + Dim() * i), p_ids[i]);
index_->addPoint(((float*)p_data + Dim() * i), i);
}
}
@ -154,13 +136,6 @@ IndexHNSW::Query(const DatasetPtr& dataset_ptr, const Config& config) {
std::vector<P> ret;
const float* single_query = (float*)p_data + i * Dim();
// if (normalize) {
// std::vector<float> norm_vector(Dim());
// normalize_vector((float*)(single_query), norm_vector.data(), Dim());
// ret = index_->searchKnn((float*)(norm_vector.data()), config[meta::TOPK].get<int64_t>(), compare);
// } else {
// ret = index_->searchKnn((float*)single_query, config[meta::TOPK].get<int64_t>(), compare);
// }
ret = index_->searchKnn((float*)single_query, k, compare, blacklist);
while (ret.size() < k) {
@ -179,6 +154,7 @@ IndexHNSW::Query(const DatasetPtr& dataset_ptr, const Config& config) {
std::transform(ret.begin(), ret.end(), std::back_inserter(ids),
[](const std::pair<float, int64_t>& e) { return e.second; });
MapOffsetToUid(ids.data(), ids.size());
memcpy(p_dist + i * k, dist.data(), dist_size);
memcpy(p_id + i * k, ids.data(), id_size);
}

View File

@ -38,12 +38,7 @@ class IndexHNSW : public VecIndex {
Train(const DatasetPtr& dataset_ptr, const Config& config) override;
void
Add(const DatasetPtr& dataset_ptr, const Config& config) override;
void
AddWithoutIds(const DatasetPtr&, const Config&) override {
KNOWHERE_THROW_MSG("Incremental index is not supported");
}
AddWithoutIds(const DatasetPtr&, const Config&) override;
DatasetPtr
Query(const DatasetPtr& dataset_ptr, const Config& config) override;

View File

@ -15,7 +15,6 @@
#include <faiss/IndexFlat.h>
#include <faiss/MetaIndexes.h>
#include <faiss/clone_index.h>
#include <faiss/index_factory.h>
#include <faiss/index_io.h>
#ifdef MILVUS_GPU_VERSION
#include <faiss/gpu/GpuCloner.h>
@ -54,22 +53,10 @@ IDMAP::Load(const BinarySet& binary_set) {
void
IDMAP::Train(const DatasetPtr& dataset_ptr, const Config& config) {
const char* desc = "IDMap,Flat";
int64_t dim = config[meta::DIM].get<int64_t>();
faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
auto index = faiss::index_factory(dim, desc, metric_type);
index_.reset(index);
}
void
IDMAP::Add(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
std::lock_guard<std::mutex> lk(mutex_);
GETTENSORWITHIDS(dataset_ptr)
index_->add_with_ids(rows, (float*)p_data, p_ids);
auto index = std::make_shared<faiss::IndexFlat>(dim, metric_type);
index_ = index;
}
void
@ -79,16 +66,8 @@ IDMAP::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
}
std::lock_guard<std::mutex> lk(mutex_);
auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
auto p_data = dataset_ptr->Get<const void*>(meta::TENSOR);
// TODO: caiyd need check
std::vector<int64_t> new_ids(rows);
for (int i = 0; i < rows; ++i) {
new_ids[i] = i;
}
index_->add_with_ids(rows, (float*)p_data, new_ids.data());
GETTENSOR(dataset_ptr)
index_->add(rows, (float*)p_data);
}
DatasetPtr
@ -105,7 +84,6 @@ IDMAP::Query(const DatasetPtr& dataset_ptr, const Config& config) {
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
// QueryImpl(rows, (float*)p_data, k, p_dist, p_id, Config());
QueryImpl(rows, (float*)p_data, k, p_dist, p_id, config);
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
@ -113,35 +91,6 @@ IDMAP::Query(const DatasetPtr& dataset_ptr, const Config& config) {
return ret_ds;
}
#if 0
DatasetPtr
IDMAP::QueryById(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
// GETTENSOR(dataset)
auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
int64_t k = config[meta::TOPK].get<int64_t>();
auto elems = rows * k;
size_t p_id_size = sizeof(int64_t) * elems;
size_t p_dist_size = sizeof(float) * elems;
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
// todo: enable search by id (zhiru)
// auto blacklist = dataset_ptr->Get<faiss::ConcurrentBitsetPtr>("bitset");
// index_->searchById(rows, (float*)p_data, config[meta::TOPK].get<int64_t>(), p_dist, p_id, blacklist);
index_->search_by_id(rows, p_data, k, p_dist, p_id, bitset_);
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
return ret_ds;
}
#endif
int64_t
IDMAP::Count() {
if (!index_) {
@ -179,54 +128,22 @@ IDMAP::CopyCpuToGpu(const int64_t device_id, const Config& config) {
const float*
IDMAP::GetRawVectors() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());
auto flat_index = dynamic_cast<faiss::IndexFlat*>(file_index->index);
auto flat_index = dynamic_cast<faiss::IndexFlat*>(index_.get());
return flat_index->xb.data();
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
const int64_t*
IDMAP::GetRawIds() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());
return file_index->id_map.data();
} catch (std::exception& e) {
KNOWHERE_THROW_MSG(e.what());
}
}
#if 0
DatasetPtr
IDMAP::GetVectorById(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
// GETTENSOR(dataset)
// auto rows = dataset_ptr->Get<int64_t>(meta::ROWS);
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
auto elems = dataset_ptr->Get<int64_t>(meta::DIM);
size_t p_x_size = sizeof(float) * elems;
auto p_x = (float*)malloc(p_x_size);
index_->get_vector_by_id(1, p_data, p_x, bitset_);
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::TENSOR, p_x);
return ret_ds;
}
#endif
void
IDMAP::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& config) {
auto flat_index = dynamic_cast<faiss::IndexIDMap*>(index_.get())->index;
auto default_type = flat_index->metric_type;
auto default_type = index_->metric_type;
if (config.contains(Metric::TYPE))
flat_index->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
flat_index->search(n, (float*)data, k, distances, labels, GetBlacklist());
flat_index->metric_type = default_type;
index_->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
index_->search(n, (float*)data, k, distances, labels, GetBlacklist());
index_->metric_type = default_type;
MapOffsetToUid(labels, static_cast<size_t>(n * k));
}
} // namespace knowhere

View File

@ -39,20 +39,12 @@ class IDMAP : public VecIndex, public FaissBaseIndex {
void
Train(const DatasetPtr&, const Config&) override;
void
Add(const DatasetPtr&, const Config&) override;
void
AddWithoutIds(const DatasetPtr&, const Config&) override;
DatasetPtr
Query(const DatasetPtr&, const Config&) override;
#if 0
DatasetPtr
QueryById(const DatasetPtr& dataset, const Config& config) override;
#endif
int64_t
Count() override;
@ -64,20 +56,12 @@ class IDMAP : public VecIndex, public FaissBaseIndex {
return Count() * Dim() * sizeof(FloatType);
}
#if 0
DatasetPtr
GetVectorById(const DatasetPtr& dataset, const Config& config) override;
#endif
VecIndexPtr
CopyCpuToGpu(const int64_t, const Config&);
virtual const float*
GetRawVectors();
virtual const int64_t*
GetRawIds();
protected:
virtual void
QueryImpl(int64_t, const float*, int64_t, float*, int64_t*, const Config&);

View File

@ -16,7 +16,6 @@
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/clone_index.h>
#include <faiss/index_factory.h>
#include <faiss/index_io.h>
#ifdef MILVUS_GPU_VERSION
#include <faiss/gpu/GpuAutoTune.h>
@ -77,17 +76,6 @@ IVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
index_ = index;
}
void
IVF::Add(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_ || !index_->is_trained) {
KNOWHERE_THROW_MSG("index not initialize or trained");
}
std::lock_guard<std::mutex> lk(mutex_);
GETTENSORWITHIDS(dataset_ptr)
index_->add_with_ids(rows, (float*)p_data, p_ids);
}
void
IVF::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
if (!index_ || !index_->is_trained) {
@ -120,19 +108,6 @@ IVF::Query(const DatasetPtr& dataset_ptr, const Config& config) {
QueryImpl(rows, (float*)p_data, k, p_dist, p_id, config);
// std::stringstream ss_res_id, ss_res_dist;
// for (int i = 0; i < 10; ++i) {
// printf("%llu", p_id[i]);
// printf("\n");
// printf("%.6f", p_dist[i]);
// printf("\n");
// ss_res_id << p_id[i] << " ";
// ss_res_dist << p_dist[i] << " ";
// }
// std::cout << std::endl << "after search: " << std::endl;
// std::cout << ss_res_id.str() << std::endl;
// std::cout << ss_res_dist.str() << std::endl << std::endl;
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
@ -339,6 +314,8 @@ IVF::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int64_
<< ", data search cost: " << faiss::indexIVF_stats.search_time;
faiss::indexIVF_stats.quantization_time = 0;
faiss::indexIVF_stats.search_time = 0;
MapOffsetToUid(labels, static_cast<size_t>(n * k));
}
void

View File

@ -44,9 +44,6 @@ class IVF : public VecIndex, public FaissBaseIndex {
void
Train(const DatasetPtr&, const Config&) override;
void
Add(const DatasetPtr&, const Config&) override;
void
AddWithoutIds(const DatasetPtr&, const Config&) override;

View File

@ -98,6 +98,8 @@ NSG::Query(const DatasetPtr& dataset_ptr, const Config& config) {
blacklist);
}
MapOffsetToUid(p_id, static_cast<size_t>(elems));
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
@ -139,7 +141,7 @@ NSG::Train(const DatasetPtr& dataset_ptr, const Config& config) {
b_params.out_degree = config[IndexParams::out_degree];
b_params.search_length = config[IndexParams::search_length];
GETTENSORWITHIDS(dataset_ptr)
GETTENSOR(dataset_ptr)
impl::NsgIndex::Metric_Type metric;
auto metric_str = config[Metric::TYPE].get<std::string>();
@ -153,7 +155,7 @@ NSG::Train(const DatasetPtr& dataset_ptr, const Config& config) {
index_ = std::make_shared<impl::NsgIndex>(dim, rows, metric);
index_->SetKnnGraph(knng);
index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params);
index_->Build(rows, (float*)p_data, nullptr, b_params);
}
int64_t

View File

@ -48,11 +48,6 @@ class NSG : public VecIndex {
void
Train(const DatasetPtr&, const Config&) override;
void
Add(const DatasetPtr&, const Config&) override {
KNOWHERE_THROW_MSG("Incremental index is not supported");
}
void
AddWithoutIds(const DatasetPtr&, const Config&) override {
KNOWHERE_THROW_MSG("Addwithoutids is not supported");

View File

@ -195,7 +195,7 @@ CPUSPTAGRNG::Query(const DatasetPtr& dataset_ptr, const Config& config) {
index_ptr_->SearchIndex(query_results[i]);
}
return ConvertToDataset(query_results);
return ConvertToDataset(query_results, uids_);
}
int64_t

View File

@ -41,11 +41,6 @@ class CPUSPTAGRNG : public VecIndex {
void
Train(const DatasetPtr& dataset_ptr, const Config& config) override;
void
Add(const DatasetPtr&, const Config&) override {
KNOWHERE_THROW_MSG("Incremental index is not supported");
}
void
AddWithoutIds(const DatasetPtr&, const Config&) override {
KNOWHERE_THROW_MSG("Incremental index is not supported");

View File

@ -31,34 +31,18 @@ class VecIndex : public Index {
virtual void
BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
Train(dataset_ptr, config);
Add(dataset_ptr, config);
AddWithoutIds(dataset_ptr, config);
}
virtual void
Train(const DatasetPtr& dataset, const Config& config) = 0;
virtual void
Add(const DatasetPtr& dataset, const Config& config) = 0;
virtual void
AddWithoutIds(const DatasetPtr& dataset, const Config& config) = 0;
virtual DatasetPtr
Query(const DatasetPtr& dataset, const Config& config) = 0;
#if 0
virtual DatasetPtr
QueryById(const DatasetPtr& dataset, const Config& config) {
return nullptr;
}
#endif
// virtual DatasetPtr
// QueryByRange(const DatasetPtr&, const Config&) = 0;
//
// virtual MetricType
// metric_type() = 0;
virtual int64_t
Dim() = 0;
@ -75,13 +59,6 @@ class VecIndex : public Index {
return index_mode_;
}
#if 0
virtual DatasetPtr
GetVectorById(const DatasetPtr& dataset, const Config& config) {
return nullptr;
}
#endif
faiss::ConcurrentBitsetPtr
GetBlacklist() {
std::unique_lock<std::mutex> lck(mutex_);
@ -104,6 +81,17 @@ class VecIndex : public Index {
uids_ = uids;
}
void
MapOffsetToUid(IDType* id, size_t n) {
if (uids_) {
for (size_t i = 0; i < n; i++) {
if (id[i] >= 0) {
id[i] = uids_->at(id[i]);
}
}
}
}
size_t
BlacklistSize() {
std::unique_lock<std::mutex> lck(mutex_);

View File

@ -18,13 +18,15 @@ namespace knowhere {
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr& dataset_ptr) {
auto elems = dataset_ptr->Get<int64_t>(meta::ROWS);
auto p_data = dataset_ptr->Get<const int64_t*>(meta::IDS);
auto p_id = (int64_t*)malloc(sizeof(int64_t) * elems);
for (int64_t i = 0; i < elems; ++i) p_id[i] = i;
auto p_offset = (int64_t*)malloc(sizeof(int64_t) * (elems + 1));
for (auto i = 0; i <= elems; ++i) p_offset[i] = i * 8;
for (int64_t i = 0; i <= elems; ++i) p_offset[i] = i * 8;
std::shared_ptr<SPTAG::MetadataSet> metaset(
new SPTAG::MemMetadataSet(SPTAG::ByteArray((std::uint8_t*)p_data, elems * sizeof(int64_t), false),
new SPTAG::MemMetadataSet(SPTAG::ByteArray((std::uint8_t*)p_id, elems * sizeof(int64_t), true),
SPTAG::ByteArray((std::uint8_t*)p_offset, elems * sizeof(int64_t), true), elems));
return metaset;
@ -54,7 +56,7 @@ ConvertToQueryResult(const DatasetPtr& dataset_ptr, const Config& config) {
}
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results, std::shared_ptr<std::vector<int64_t>> uid) {
auto k = query_results[0].GetResultNum();
auto elems = query_results.size() * k;
@ -64,12 +66,18 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
auto p_dist = (float*)malloc(p_dist_size);
#pragma omp parallel for
for (auto i = 0; i < query_results.size(); ++i) {
for (size_t i = 0; i < query_results.size(); ++i) {
auto results = query_results[i].GetResults();
auto num_result = query_results[i].GetResultNum();
for (auto j = 0; j < num_result; ++j) {
// p_id[i * k + j] = results[j].VID;
p_id[i * k + j] = *(int64_t*)query_results[i].GetMetadata(j).Data();
auto id = *(int64_t*)query_results[i].GetMetadata(j).Data();
if (uid != nullptr) {
if (id >= 0) {
id = uid->at(id);
}
}
p_id[i * k + j] = id;
p_dist[i * k + j] = results[j].Dist;
}
}

View File

@ -31,7 +31,7 @@ std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr& dataset_ptr, const Config& config);
DatasetPtr
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results);
ConvertToDataset(std::vector<SPTAG::QueryResult> query_results, std::shared_ptr<std::vector<int64_t>> uid);
} // namespace knowhere
} // namespace milvus

View File

@ -18,16 +18,6 @@
namespace milvus {
namespace knowhere {
DatasetPtr
GenDatasetWithIds(const int64_t nb, const int64_t dim, const void* xb, const int64_t* ids) {
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::ROWS, nb);
ret_ds->Set(meta::DIM, dim);
ret_ds->Set(meta::TENSOR, xb);
ret_ds->Set(meta::IDS, ids);
return ret_ds;
}
DatasetPtr
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
auto ret_ds = std::make_shared<Dataset>();

View File

@ -23,15 +23,6 @@ namespace knowhere {
int64_t rows = dataset_ptr->Get<int64_t>(meta::ROWS); \
const void* p_data = dataset_ptr->Get<const void*>(meta::TENSOR);
#define GETTENSORWITHIDS(dataset_ptr) \
int64_t dim = dataset_ptr->Get<int64_t>(meta::DIM); \
int64_t rows = dataset_ptr->Get<int64_t>(meta::ROWS); \
const void* p_data = dataset_ptr->Get<const void*>(meta::TENSOR); \
const int64_t* p_ids = dataset_ptr->Get<const int64_t*>(meta::IDS);
extern DatasetPtr
GenDatasetWithIds(const int64_t nb, const int64_t dim, const void* xb, const int64_t* ids);
extern DatasetPtr
GenDataset(const int64_t nb, const int64_t dim, const void* xb);

View File

@ -98,21 +98,17 @@ GPUIDMAP::GetRawVectors() {
KNOWHERE_THROW_MSG("Not support");
}
const int64_t*
GPUIDMAP::GetRawIds() {
KNOWHERE_THROW_MSG("Not support");
}
void
GPUIDMAP::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& config) {
ResScope rs(res_, gpu_id_);
auto flat_index = dynamic_cast<faiss::IndexIDMap*>(index_.get())->index;
auto default_type = flat_index->metric_type;
auto default_type = index_->metric_type;
if (config.contains(Metric::TYPE))
flat_index->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
flat_index->search(n, (float*)data, k, distances, labels, GetBlacklist());
flat_index->metric_type = default_type;
index_->metric_type = GetMetricType(config[Metric::TYPE].get<std::string>());
index_->search(n, (float*)data, k, distances, labels, GetBlacklist());
index_->metric_type = default_type;
MapOffsetToUid(labels, static_cast<size_t>(n * k));
}
void

View File

@ -39,9 +39,6 @@ class GPUIDMAP : public IDMAP, public GPUIndex {
const float*
GetRawVectors() override;
const int64_t*
GetRawIds() override;
void
GenGraph(const float*, const int64_t, GraphType&, const Config&);

View File

@ -53,10 +53,10 @@ GPUIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
}
void
GPUIVF::Add(const DatasetPtr& dataset_ptr, const Config& config) {
GPUIVF::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
if (auto spt = res_.lock()) {
ResScope rs(res_, gpu_id_);
IVF::Add(dataset_ptr, config);
IVF::AddWithoutIds(dataset_ptr, config);
} else {
KNOWHERE_THROW_MSG("Add IVF can't get gpu resource");
}
@ -152,6 +152,8 @@ GPUIVF::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int
device_index->search(search_size, (float*)data + i * dim, k, distances + i * k, labels + i * k,
GetBlacklist());
}
MapOffsetToUid(labels, static_cast<size_t>(n * k));
} else {
KNOWHERE_THROW_MSG("Not a GpuIndexIVF type.");
}

View File

@ -35,7 +35,7 @@ class GPUIVF : public IVF, public GPUIndex {
Train(const DatasetPtr&, const Config&) override;
void
Add(const DatasetPtr&, const Config&) override;
AddWithoutIds(const DatasetPtr&, const Config&) override;
VecIndexPtr
CopyGpuToCpu(const Config&) override;

View File

@ -14,7 +14,6 @@
#include <faiss/IndexIVFPQ.h>
#include <faiss/gpu/GpuCloner.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/index_factory.h>
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"

View File

@ -47,12 +47,18 @@ NsgIndex::~NsgIndex() {
}
void
NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
NsgIndex::Build(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) {
ntotal = nb;
ori_data_ = new float[ntotal * dimension];
ids_ = new int64_t[ntotal];
memcpy((void*)ori_data_, (void*)data, sizeof(float) * ntotal * dimension);
memcpy((void*)ids_, (void*)ids, sizeof(int64_t) * ntotal);
if (ids == nullptr) {
for (size_t i = 0; i < nb; i++) {
ids_[i] = i;
}
} else {
memcpy((void*)ids_, (void*)ids, sizeof(int64_t) * ntotal);
}
search_length = parameters.search_length;
out_degree = parameters.out_degree;

View File

@ -80,7 +80,7 @@ class NsgIndex {
SetKnnGraph(Graph& knng);
virtual void
Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters);
Build(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters);
void
Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids,

View File

@ -55,7 +55,6 @@ TEST_P(AnnoyTest, annoy_basic) {
ASSERT_ANY_THROW(index_->Train(base_dataset, conf));
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
ASSERT_ANY_THROW(index_->Serialize(conf));
ASSERT_ANY_THROW(index_->Add(base_dataset, conf));
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf));
ASSERT_ANY_THROW(index_->Count());
ASSERT_ANY_THROW(index_->Dim());

View File

@ -53,16 +53,14 @@ TEST_P(BinaryIDMAPTest, binaryidmap_basic) {
{
ASSERT_ANY_THROW(index_->Serialize());
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
}
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);
ASSERT_TRUE(index_->GetRawVectors() != nullptr);
ASSERT_TRUE(index_->GetRawIds() != nullptr);
auto result = index_->Query(query_dataset, conf);
AssertAnns(result, nq, k);
// PrintResult(result, nq, k);

View File

@ -64,7 +64,6 @@ TEST_P(BinaryIVFTest, binaryivf_basic) {
{
ASSERT_ANY_THROW(index_->Serialize());
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
}

View File

@ -54,7 +54,7 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
fiu_init(0);
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);

View File

@ -73,7 +73,7 @@ TEST_F(GPURESTEST, copyandsearch) {
auto conf = ParamGenerator::GetInstance().Gen(index_type_);
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
auto result = index_->Query(query_dataset, conf);
AssertAnns(result, nq, k);
@ -128,7 +128,7 @@ TEST_F(GPURESTEST, trainandsearch) {
auto conf = ParamGenerator::GetInstance().Gen(index_type_);
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
index_->SetIndexSize(nb * dim * sizeof(float));
auto cpu_idx = milvus::knowhere::cloner::CopyGpuToCpu(index_, milvus::knowhere::Config());
milvus::knowhere::IVFPtr ivf_idx = std::dynamic_pointer_cast<milvus::knowhere::IVF>(cpu_idx);
@ -140,7 +140,7 @@ TEST_F(GPURESTEST, trainandsearch) {
auto train_stage = [&] {
for (int i = 0; i < train_count; ++i) {
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
}
};
auto search_stage = [&](milvus::knowhere::VecIndexPtr& search_idx) {

View File

@ -51,14 +51,13 @@ TEST_P(HNSWTest, HNSW_basic) {
{
ASSERT_ANY_THROW(index_->Serialize());
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
ASSERT_ANY_THROW(index_->Count());
ASSERT_ANY_THROW(index_->Dim());
}
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);
@ -70,7 +69,7 @@ TEST_P(HNSWTest, HNSW_delete) {
assert(!xb.empty());
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);
@ -120,7 +119,7 @@ TEST_P(HNSWTest, HNSW_serialize) {
{
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
auto binaryset = index_->Serialize();
auto bin = binaryset.GetByName("HNSW");

View File

@ -74,16 +74,14 @@ TEST_P(IDMAPTest, idmap_basic) {
{
ASSERT_ANY_THROW(index_->Serialize());
ASSERT_ANY_THROW(index_->Query(query_dataset, conf));
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
}
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);
ASSERT_TRUE(index_->GetRawVectors() != nullptr);
ASSERT_TRUE(index_->GetRawIds() != nullptr);
auto result = index_->Query(query_dataset, conf);
AssertAnns(result, nq, k);
// PrintResult(result, nq, k);
@ -144,7 +142,7 @@ TEST_P(IDMAPTest, idmap_serialize) {
{
// serialize index
index_->Train(base_dataset, conf);
index_->Add(base_dataset, milvus::knowhere::Config());
index_->AddWithoutIds(base_dataset, milvus::knowhere::Config());
if (index_mode_ == milvus::knowhere::IndexMode::MODE_GPU) {
#ifdef MILVUS_GPU_VERSION
@ -187,11 +185,10 @@ TEST_P(IDMAPTest, idmap_copy) {
{milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}};
index_->Train(base_dataset, conf);
index_->Add(base_dataset, conf);
index_->AddWithoutIds(base_dataset, conf);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);
ASSERT_TRUE(index_->GetRawVectors() != nullptr);
ASSERT_TRUE(index_->GetRawIds() != nullptr);
auto result = index_->Query(query_dataset, conf);
AssertAnns(result, nq, k);
// PrintResult(result, nq, k);
@ -211,8 +208,6 @@ TEST_P(IDMAPTest, idmap_copy) {
AssertAnns(clone_result, nq, k);
ASSERT_THROW({ std::static_pointer_cast<milvus::knowhere::GPUIDMAP>(clone_index)->GetRawVectors(); },
milvus::knowhere::KnowhereException);
ASSERT_THROW({ std::static_pointer_cast<milvus::knowhere::GPUIDMAP>(clone_index)->GetRawIds(); },
milvus::knowhere::KnowhereException);
fiu_init(0);
fiu_enable("GPUIDMP.SerializeImpl.throw_exception", 1, nullptr, 0);
@ -233,7 +228,6 @@ TEST_P(IDMAPTest, idmap_copy) {
auto host_result = host_index->Query(query_dataset, conf);
AssertAnns(host_result, nq, k);
ASSERT_TRUE(std::static_pointer_cast<milvus::knowhere::IDMAP>(host_index)->GetRawVectors() != nullptr);
ASSERT_TRUE(std::static_pointer_cast<milvus::knowhere::IDMAP>(host_index)->GetRawIds() != nullptr);
// gpu to gpu
auto device_index = milvus::knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, conf);

View File

@ -98,7 +98,6 @@ TEST_P(IVFTest, ivf_basic_cpu) {
}
// null faiss index
ASSERT_ANY_THROW(index_->Add(base_dataset, conf_));
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf_));
index_->Train(base_dataset, conf_);
@ -158,7 +157,6 @@ TEST_P(IVFTest, ivf_basic_gpu) {
}
// null faiss index
ASSERT_ANY_THROW(index_->Add(base_dataset, conf_));
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf_));
index_->BuildAll(base_dataset, conf_);
@ -197,7 +195,7 @@ TEST_P(IVFTest, ivf_serialize) {
{
// serialize index
index_->Train(base_dataset, conf_);
index_->Add(base_dataset, conf_);
index_->AddWithoutIds(base_dataset, conf_);
auto binaryset = index_->Serialize();
auto bin = binaryset.GetByName("IVF");
@ -223,7 +221,7 @@ TEST_P(IVFTest, clone_test) {
assert(!xb.empty());
index_->Train(base_dataset, conf_);
index_->Add(base_dataset, conf_);
index_->AddWithoutIds(base_dataset, conf_);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);
@ -310,7 +308,7 @@ TEST_P(IVFTest, gpu_seal_test) {
ASSERT_ANY_THROW(index_->Seal());
index_->Train(base_dataset, conf_);
index_->Add(base_dataset, conf_);
index_->AddWithoutIds(base_dataset, conf_);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);

View File

@ -82,7 +82,6 @@ TEST_F(NSGInterfaceTest, basic_test) {
{
ASSERT_ANY_THROW(index_->Serialize());
ASSERT_ANY_THROW(index_->Query(query_dataset, search_conf));
ASSERT_ANY_THROW(index_->Add(base_dataset, search_conf));
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, search_conf));
}

View File

@ -61,10 +61,7 @@ TEST_P(SPTAGTest, sptag_basic) {
assert(!xb.empty());
// null faiss index
{
ASSERT_ANY_THROW(index_->Add(nullptr, conf));
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
}
ASSERT_ANY_THROW(index_->AddWithoutIds(nullptr, conf));
index_->BuildAll(base_dataset, conf);
// index_->Add(base_dataset, conf);

View File

@ -44,7 +44,7 @@ DataGen::Generate(const int dim, const int nb, const int nq, const bool is_binar
assert(xb.size() == (size_t)dim * nb);
assert(xq.size() == (size_t)dim * nq);
base_dataset = milvus::knowhere::GenDatasetWithIds(nb, dim, xb.data(), ids.data());
base_dataset = milvus::knowhere::GenDataset(nb, dim, xb.data());
query_dataset = milvus::knowhere::GenDataset(nq, dim, xq.data());
} else {
int64_t dim_x = dim / 8;
@ -52,12 +52,12 @@ DataGen::Generate(const int dim, const int nb, const int nq, const bool is_binar
assert(xb_bin.size() == (size_t)dim_x * nb);
assert(xq_bin.size() == (size_t)dim_x * nq);
base_dataset = milvus::knowhere::GenDatasetWithIds(nb, dim, xb_bin.data(), ids.data());
base_dataset = milvus::knowhere::GenDataset(nb, dim, xb_bin.data());
query_dataset = milvus::knowhere::GenDataset(nq, dim, xq_bin.data());
}
id_dataset = milvus::knowhere::GenDatasetWithIds(nq, dim, nullptr, ids.data());
xid_dataset = milvus::knowhere::GenDatasetWithIds(nq, dim, nullptr, xids.data());
id_dataset = milvus::knowhere::GenDataset(nq, dim, nullptr);
xid_dataset = milvus::knowhere::GenDataset(nq, dim, nullptr);
}
void

View File

@ -18,6 +18,7 @@
#include "db/engine/EngineFactory.h"
#include "db/engine/ExecutionEngineImpl.h"
#include "db/utils.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include <fiu-local.h>
#include <fiu-control.h>
@ -47,8 +48,11 @@ CreateExecEngine(const milvus::json& json_params, milvus::engine::MetricType met
}
}
auto status = engine_ptr->AddWithIds((int64_t)ids->size(), data.data(), ids->data());
(std::static_pointer_cast<milvus::engine::ExecutionEngineImpl>(engine_ptr))->index_->SetUids(ids);
auto engine_impl = (std::static_pointer_cast<milvus::engine::ExecutionEngineImpl>(engine_ptr));
auto dataset = milvus::knowhere::GenDataset(ROW_COUNT, DIMENSION, data.data());
engine_impl->index_->AddWithoutIds(dataset, milvus::knowhere::Config());
engine_impl->index_->SetUids(ids);
return engine_ptr;
}