mirror of https://github.com/milvus-io/milvus.git
related: #30308 Signed-off-by: MrPresent-Han <chun.han@zilliz.com>pull/30750/head
parent
18aac076de
commit
77eb6defb1
|
@ -20,6 +20,7 @@
|
|||
#include <map>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <boost/align/aligned_allocator.hpp>
|
||||
|
@ -31,6 +32,115 @@
|
|||
#include "knowhere/index_node.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
struct OffsetDisPair {
|
||||
private:
|
||||
std::pair<int64_t, float> off_dis_;
|
||||
int iterator_idx_;
|
||||
|
||||
public:
|
||||
OffsetDisPair(std::pair<int64_t, float> off_dis, int iter_idx)
|
||||
: off_dis_(off_dis), iterator_idx_(iter_idx) {
|
||||
}
|
||||
|
||||
const std::pair<int64_t, float>&
|
||||
GetOffDis() const {
|
||||
return off_dis_;
|
||||
}
|
||||
|
||||
int
|
||||
GetIteratorIdx() const {
|
||||
return iterator_idx_;
|
||||
}
|
||||
};
|
||||
|
||||
struct OffsetDisPairComparator {
|
||||
bool
|
||||
operator()(const std::shared_ptr<OffsetDisPair>& left,
|
||||
const std::shared_ptr<OffsetDisPair>& right) const {
|
||||
if (left->GetOffDis().second != right->GetOffDis().second) {
|
||||
return left->GetOffDis().second < right->GetOffDis().second;
|
||||
}
|
||||
return left->GetOffDis().first < right->GetOffDis().first;
|
||||
}
|
||||
};
|
||||
struct VectorIterator {
|
||||
public:
|
||||
VectorIterator(int chunk_count, int64_t chunk_rows = -1)
|
||||
: chunk_rows_(chunk_rows) {
|
||||
iterators_.reserve(chunk_count);
|
||||
}
|
||||
|
||||
std::optional<std::pair<int64_t, float>>
|
||||
Next() {
|
||||
if (!heap_.empty()) {
|
||||
auto top = heap_.top();
|
||||
heap_.pop();
|
||||
if (iterators_[top->GetIteratorIdx()]->HasNext()) {
|
||||
auto origin_pair = iterators_[top->GetIteratorIdx()]->Next();
|
||||
origin_pair.first = convert_to_segment_offset(
|
||||
origin_pair.first, top->GetIteratorIdx());
|
||||
auto off_dis_pair = std::make_shared<OffsetDisPair>(
|
||||
origin_pair, top->GetIteratorIdx());
|
||||
heap_.push(off_dis_pair);
|
||||
}
|
||||
return top->GetOffDis();
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
bool
|
||||
HasNext() {
|
||||
return !heap_.empty();
|
||||
}
|
||||
bool
|
||||
AddIterator(std::shared_ptr<knowhere::IndexNode::iterator> iter) {
|
||||
if (!sealed && iter != nullptr) {
|
||||
iterators_.emplace_back(iter);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void
|
||||
seal() {
|
||||
sealed = true;
|
||||
int idx = 0;
|
||||
for (auto& iter : iterators_) {
|
||||
if (iter->HasNext()) {
|
||||
auto off_dis_pair =
|
||||
std::make_shared<OffsetDisPair>(iter->Next(), idx++);
|
||||
heap_.push(off_dis_pair);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t
|
||||
convert_to_segment_offset(int64_t chunk_offset, int chunk_idx) {
|
||||
if (chunk_rows_ == -1) {
|
||||
AssertInfo(
|
||||
iterators_.size() == 1,
|
||||
"Wrong state for vectorIterators, which having incorrect "
|
||||
"kw_iterator count:{} "
|
||||
"without setting value for chunk_rows, "
|
||||
"cannot convert chunk_offset to segment_offset correctly",
|
||||
iterators_.size());
|
||||
return chunk_offset;
|
||||
}
|
||||
return chunk_idx * chunk_rows_ + chunk_offset;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>> iterators_;
|
||||
std::priority_queue<std::shared_ptr<OffsetDisPair>,
|
||||
std::vector<std::shared_ptr<OffsetDisPair>>,
|
||||
OffsetDisPairComparator>
|
||||
heap_;
|
||||
bool sealed = false;
|
||||
int64_t chunk_rows_ = -1;
|
||||
//currently, VectorIterator is guaranteed to be used serially without concurrent problem, in the future
|
||||
//we may need to add mutex to protect the variable sealed
|
||||
};
|
||||
|
||||
struct SearchResult {
|
||||
SearchResult() = default;
|
||||
|
||||
|
@ -45,6 +155,37 @@ struct SearchResult {
|
|||
return topk_per_nq_prefix_sum_[total_nq_];
|
||||
}
|
||||
|
||||
public:
|
||||
void
|
||||
AssembleChunkVectorIterators(
|
||||
int64_t nq,
|
||||
int chunk_count,
|
||||
int64_t rows_per_chunk,
|
||||
const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
||||
kw_iterators) {
|
||||
AssertInfo(kw_iterators.size() == nq * chunk_count,
|
||||
"kw_iterators count:{} is not equal to nq*chunk_count:{}, "
|
||||
"wrong state",
|
||||
kw_iterators.size(),
|
||||
nq * chunk_count);
|
||||
std::vector<std::shared_ptr<VectorIterator>> vector_iterators;
|
||||
vector_iterators.reserve(nq);
|
||||
for (int i = 0, vec_iter_idx = 0; i < kw_iterators.size(); i++) {
|
||||
vec_iter_idx = vec_iter_idx % nq;
|
||||
if (vector_iterators.size() < nq) {
|
||||
auto vector_iterator = std::make_shared<VectorIterator>(
|
||||
chunk_count, rows_per_chunk);
|
||||
vector_iterators.emplace_back(vector_iterator);
|
||||
}
|
||||
auto kw_iterator = kw_iterators[i];
|
||||
vector_iterators[vec_iter_idx++]->AddIterator(kw_iterator);
|
||||
}
|
||||
for (auto vector_iter : vector_iterators) {
|
||||
vector_iter->seal();
|
||||
}
|
||||
this->vector_iterators_ = vector_iterators;
|
||||
}
|
||||
|
||||
public:
|
||||
int64_t total_nq_;
|
||||
int64_t unity_topK_;
|
||||
|
@ -70,9 +211,9 @@ struct SearchResult {
|
|||
// used for reduce, filter invalid pk, get real topks count
|
||||
std::vector<size_t> topk_per_nq_prefix_sum_;
|
||||
|
||||
//knowhere iterators, used for group by or other operators in the future
|
||||
std::optional<std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
iterators;
|
||||
//Vector iterators, used for group by
|
||||
std::optional<std::vector<std::shared_ptr<VectorIterator>>>
|
||||
vector_iterators_;
|
||||
};
|
||||
|
||||
using SearchResultPtr = std::shared_ptr<SearchResult>;
|
||||
|
|
|
@ -314,10 +314,11 @@ VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<SearchResult>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::Query(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) {
|
||||
const BitsetView& bitset,
|
||||
SearchResult& search_result) const {
|
||||
AssertInfo(GetMetricType() == search_info.metric_type_,
|
||||
"Metric type of field index isn't the same with search info");
|
||||
auto num_queries = dataset->GetRows();
|
||||
|
@ -392,16 +393,21 @@ VectorDiskAnnIndex<T>::Query(const DatasetPtr dataset,
|
|||
distances[i] = std::round(distances[i] * multiplier) / multiplier;
|
||||
}
|
||||
}
|
||||
auto result = std::make_unique<SearchResult>();
|
||||
result->seg_offsets_.resize(total_num);
|
||||
result->distances_.resize(total_num);
|
||||
result->total_nq_ = num_queries;
|
||||
result->unity_topK_ = topk;
|
||||
search_result.seg_offsets_.resize(total_num);
|
||||
search_result.distances_.resize(total_num);
|
||||
search_result.total_nq_ = num_queries;
|
||||
search_result.unity_topK_ = topk;
|
||||
std::copy_n(ids, total_num, search_result.seg_offsets_.data());
|
||||
std::copy_n(distances, total_num, search_result.distances_.data());
|
||||
}
|
||||
|
||||
std::copy_n(ids, total_num, result->seg_offsets_.data());
|
||||
std::copy_n(distances, total_num, result->distances_.data());
|
||||
|
||||
return result;
|
||||
template <typename T>
|
||||
knowhere::expected<std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
VectorDiskAnnIndex<T>::VectorIterators(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) const {
|
||||
return this->index_.AnnIterator(
|
||||
*dataset, search_info.search_params_, bitset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -86,10 +86,11 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||
void
|
||||
BuildV2(const Config& config = {}) override;
|
||||
|
||||
std::unique_ptr<SearchResult>
|
||||
void
|
||||
Query(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) override;
|
||||
const BitsetView& bitset,
|
||||
SearchResult& search_result) const override;
|
||||
|
||||
const bool
|
||||
HasRawData() const override;
|
||||
|
@ -100,6 +101,12 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||
void
|
||||
CleanLocalData() override;
|
||||
|
||||
knowhere::expected<
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
VectorIterators(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) const override;
|
||||
|
||||
private:
|
||||
knowhere::Json
|
||||
update_load_json(const Config& config);
|
||||
|
|
|
@ -54,10 +54,21 @@ class VectorIndex : public IndexBase {
|
|||
PanicInfo(Unsupported, "vector index don't support add with dataset");
|
||||
}
|
||||
|
||||
virtual std::unique_ptr<SearchResult>
|
||||
virtual void
|
||||
Query(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) = 0;
|
||||
const BitsetView& bitset,
|
||||
SearchResult& search_result) const = 0;
|
||||
|
||||
virtual knowhere::expected<
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
VectorIterators(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) const {
|
||||
throw std::runtime_error(
|
||||
"VectorIndex didn't implement VectorIterator interface, "
|
||||
"there must be sth wrong in the code");
|
||||
}
|
||||
|
||||
virtual const bool
|
||||
HasRawData() const = 0;
|
||||
|
@ -89,7 +100,7 @@ class VectorIndex : public IndexBase {
|
|||
CleanLocalData() {
|
||||
}
|
||||
|
||||
void
|
||||
virtual void
|
||||
CheckCompatible(const IndexVersion& version) {
|
||||
std::string err_msg =
|
||||
"version not support : " + std::to_string(version) +
|
||||
|
|
|
@ -128,6 +128,15 @@ VectorMemIndex<T>::UploadV2(const Config& config) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
knowhere::expected<std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
VectorMemIndex<T>::VectorIterators(const milvus::DatasetPtr dataset,
|
||||
const milvus::SearchInfo& search_info,
|
||||
const milvus::BitsetView& bitset) const {
|
||||
return this->index_.AnnIterator(
|
||||
*dataset, search_info.search_params_, bitset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
VectorMemIndex<T>::Upload(const Config& config) {
|
||||
|
@ -522,48 +531,16 @@ VectorMemIndex<T>::AddWithDataset(const DatasetPtr& dataset,
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<SearchResult>
|
||||
void
|
||||
VectorMemIndex<T>::Query(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) {
|
||||
const BitsetView& bitset,
|
||||
SearchResult& search_result) const {
|
||||
// AssertInfo(GetMetricType() == search_info.metric_type_,
|
||||
// "Metric type of field index isn't the same with search info");
|
||||
|
||||
auto num_queries = dataset->GetRows();
|
||||
knowhere::Json search_conf = search_info.search_params_;
|
||||
if (search_info.group_by_field_id_.has_value()) {
|
||||
auto result = std::make_unique<SearchResult>();
|
||||
if (search_conf.contains(knowhere::indexparam::EF)) {
|
||||
search_conf[knowhere::indexparam::SEED_EF] =
|
||||
search_conf[knowhere::indexparam::EF];
|
||||
}
|
||||
try {
|
||||
knowhere::expected<
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
iterators_val =
|
||||
index_.AnnIterator(*dataset, search_conf, bitset);
|
||||
if (iterators_val.has_value()) {
|
||||
result->iterators = iterators_val.value();
|
||||
} else {
|
||||
LOG_ERROR(
|
||||
"Returned knowhere iterator has non-ready iterators "
|
||||
"inside, terminate group_by operation");
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"Returned knowhere iterator has non-ready iterators "
|
||||
"inside, terminate group_by operation");
|
||||
}
|
||||
} catch (const std::runtime_error& e) {
|
||||
LOG_ERROR(
|
||||
"Caught error:{} when trying to initialize ann iterators for "
|
||||
"group_by: "
|
||||
"group_by operation will be terminated",
|
||||
e.what());
|
||||
throw e;
|
||||
}
|
||||
return result;
|
||||
//if the target index doesn't support iterators, directly return empty search result
|
||||
//and the reduce process to filter empty results
|
||||
}
|
||||
auto topk = search_info.topk_;
|
||||
// TODO :: check dim of search data
|
||||
auto final = [&] {
|
||||
|
@ -615,16 +592,12 @@ VectorMemIndex<T>::Query(const DatasetPtr dataset,
|
|||
distances[i] = std::round(distances[i] * multiplier) / multiplier;
|
||||
}
|
||||
}
|
||||
auto result = std::make_unique<SearchResult>();
|
||||
result->seg_offsets_.resize(total_num);
|
||||
result->distances_.resize(total_num);
|
||||
result->total_nq_ = num_queries;
|
||||
result->unity_topK_ = topk;
|
||||
|
||||
std::copy_n(ids, total_num, result->seg_offsets_.data());
|
||||
std::copy_n(distances, total_num, result->distances_.data());
|
||||
|
||||
return result;
|
||||
search_result.seg_offsets_.resize(total_num);
|
||||
search_result.distances_.resize(total_num);
|
||||
search_result.total_nq_ = num_queries;
|
||||
search_result.unity_topK_ = topk;
|
||||
std::copy_n(ids, total_num, search_result.seg_offsets_.data());
|
||||
std::copy_n(distances, total_num, search_result.distances_.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -73,10 +73,11 @@ class VectorMemIndex : public VectorIndex {
|
|||
return index_.Count();
|
||||
}
|
||||
|
||||
std::unique_ptr<SearchResult>
|
||||
void
|
||||
Query(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) override;
|
||||
const BitsetView& bitset,
|
||||
SearchResult& search_result) const override;
|
||||
|
||||
const bool
|
||||
HasRawData() const override;
|
||||
|
@ -90,6 +91,12 @@ class VectorMemIndex : public VectorIndex {
|
|||
BinarySet
|
||||
UploadV2(const Config& config = {}) override;
|
||||
|
||||
knowhere::expected<
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
VectorIterators(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) const override;
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config);
|
||||
|
|
|
@ -85,7 +85,9 @@ VecIndexCreator::Query(const milvus::DatasetPtr& dataset,
|
|||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) {
|
||||
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
|
||||
return vector_index->Query(dataset, search_info, bitset);
|
||||
auto search_result = std::make_unique<SearchResult>();
|
||||
vector_index->Query(dataset, search_info, bitset, *search_result);
|
||||
return search_result;
|
||||
}
|
||||
|
||||
BinarySet
|
||||
|
|
|
@ -22,33 +22,22 @@ namespace milvus {
|
|||
namespace query {
|
||||
|
||||
void
|
||||
GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
||||
iterators,
|
||||
GroupBy(const std::vector<std::shared_ptr<VectorIterator>>& iterators,
|
||||
const SearchInfo& search_info,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
const segcore::SegmentInternalInterface& segment,
|
||||
std::vector<int64_t>& seg_offsets,
|
||||
std::vector<float>& distances) {
|
||||
//0. check segment type, for period-1, only support group by for sealed segments
|
||||
if (!dynamic_cast<const segcore::SegmentSealedImpl*>(&segment)) {
|
||||
LOG_ERROR(
|
||||
"Not support group_by operation for non-sealed segment, "
|
||||
"segment_id:{}",
|
||||
segment.get_segment_id());
|
||||
return;
|
||||
}
|
||||
|
||||
//1. get search meta
|
||||
FieldId group_by_field_id = search_info.group_by_field_id_.value();
|
||||
auto data_type = segment.GetFieldDataType(group_by_field_id);
|
||||
|
||||
switch (data_type) {
|
||||
case DataType::INT8: {
|
||||
DataGetter<int8_t> dataGetter(segment, group_by_field_id);
|
||||
auto dataGetter = GetDataGetter<int8_t>(segment, group_by_field_id);
|
||||
GroupIteratorsByType<int8_t>(iterators,
|
||||
group_by_field_id,
|
||||
search_info.topk_,
|
||||
dataGetter,
|
||||
*dataGetter,
|
||||
group_by_values,
|
||||
seg_offsets,
|
||||
distances,
|
||||
|
@ -56,11 +45,11 @@ GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
|||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
DataGetter<int16_t> dataGetter(segment, group_by_field_id);
|
||||
auto dataGetter =
|
||||
GetDataGetter<int16_t>(segment, group_by_field_id);
|
||||
GroupIteratorsByType<int16_t>(iterators,
|
||||
group_by_field_id,
|
||||
search_info.topk_,
|
||||
dataGetter,
|
||||
*dataGetter,
|
||||
group_by_values,
|
||||
seg_offsets,
|
||||
distances,
|
||||
|
@ -68,11 +57,11 @@ GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
|||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
DataGetter<int32_t> dataGetter(segment, group_by_field_id);
|
||||
auto dataGetter =
|
||||
GetDataGetter<int32_t>(segment, group_by_field_id);
|
||||
GroupIteratorsByType<int32_t>(iterators,
|
||||
group_by_field_id,
|
||||
search_info.topk_,
|
||||
dataGetter,
|
||||
*dataGetter,
|
||||
group_by_values,
|
||||
seg_offsets,
|
||||
distances,
|
||||
|
@ -80,11 +69,11 @@ GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
|||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
DataGetter<int64_t> dataGetter(segment, group_by_field_id);
|
||||
auto dataGetter =
|
||||
GetDataGetter<int64_t>(segment, group_by_field_id);
|
||||
GroupIteratorsByType<int64_t>(iterators,
|
||||
group_by_field_id,
|
||||
search_info.topk_,
|
||||
dataGetter,
|
||||
*dataGetter,
|
||||
group_by_values,
|
||||
seg_offsets,
|
||||
distances,
|
||||
|
@ -92,11 +81,10 @@ GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
|||
break;
|
||||
}
|
||||
case DataType::BOOL: {
|
||||
DataGetter<bool> dataGetter(segment, group_by_field_id);
|
||||
auto dataGetter = GetDataGetter<bool>(segment, group_by_field_id);
|
||||
GroupIteratorsByType<bool>(iterators,
|
||||
group_by_field_id,
|
||||
search_info.topk_,
|
||||
dataGetter,
|
||||
*dataGetter,
|
||||
group_by_values,
|
||||
seg_offsets,
|
||||
distances,
|
||||
|
@ -104,11 +92,11 @@ GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
|||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
DataGetter<std::string> dataGetter(segment, group_by_field_id);
|
||||
auto dataGetter =
|
||||
GetDataGetter<std::string>(segment, group_by_field_id);
|
||||
GroupIteratorsByType<std::string>(iterators,
|
||||
group_by_field_id,
|
||||
search_info.topk_,
|
||||
dataGetter,
|
||||
*dataGetter,
|
||||
group_by_values,
|
||||
seg_offsets,
|
||||
distances,
|
||||
|
@ -127,9 +115,7 @@ GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
|||
template <typename T>
|
||||
void
|
||||
GroupIteratorsByType(
|
||||
const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
||||
iterators,
|
||||
FieldId field_id,
|
||||
const std::vector<std::shared_ptr<VectorIterator>>& iterators,
|
||||
int64_t topK,
|
||||
const DataGetter<T>& data_getter,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
|
@ -138,7 +124,6 @@ GroupIteratorsByType(
|
|||
const knowhere::MetricType& metrics_type) {
|
||||
for (auto& iterator : iterators) {
|
||||
GroupIteratorResult<T>(iterator,
|
||||
field_id,
|
||||
topK,
|
||||
data_getter,
|
||||
group_by_values,
|
||||
|
@ -150,15 +135,13 @@ GroupIteratorsByType(
|
|||
|
||||
template <typename T>
|
||||
void
|
||||
GroupIteratorResult(
|
||||
const std::shared_ptr<knowhere::IndexNode::iterator>& iterator,
|
||||
FieldId field_id,
|
||||
int64_t topK,
|
||||
const DataGetter<T>& data_getter,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
std::vector<int64_t>& offsets,
|
||||
std::vector<float>& distances,
|
||||
const knowhere::MetricType& metrics_type) {
|
||||
GroupIteratorResult(const std::shared_ptr<VectorIterator>& iterator,
|
||||
int64_t topK,
|
||||
const DataGetter<T>& data_getter,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
std::vector<int64_t>& offsets,
|
||||
std::vector<float>& distances,
|
||||
const knowhere::MetricType& metrics_type) {
|
||||
//1.
|
||||
std::unordered_map<T, std::pair<int64_t, float>> groupMap;
|
||||
|
||||
|
@ -171,7 +154,13 @@ GroupIteratorResult(
|
|||
return l < r;
|
||||
};
|
||||
while (iterator->HasNext() && groupMap.size() < topK) {
|
||||
auto [offset, dis] = iterator->Next();
|
||||
auto offset_dis_pair = iterator->Next();
|
||||
AssertInfo(
|
||||
offset_dis_pair.has_value(),
|
||||
"Wrong state! iterator cannot return valid result whereas it still"
|
||||
"tells hasNext, terminate groupBy operation");
|
||||
auto offset = offset_dis_pair.value().first;
|
||||
auto dis = offset_dis_pair.value().second;
|
||||
T row_data = data_getter.Get(offset);
|
||||
auto it = groupMap.find(row_data);
|
||||
if (it == groupMap.end()) {
|
||||
|
|
|
@ -19,19 +19,51 @@
|
|||
#include "common/QueryInfo.h"
|
||||
#include "knowhere/index_node.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/ConcurrentVector.h"
|
||||
#include "common/Span.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace query {
|
||||
|
||||
template <typename T>
|
||||
struct DataGetter {
|
||||
class DataGetter {
|
||||
public:
|
||||
virtual T
|
||||
Get(int64_t idx) const = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class GrowingDataGetter : public DataGetter<T> {
|
||||
public:
|
||||
const segcore::ConcurrentVector<T>* growing_raw_data_;
|
||||
GrowingDataGetter(const segcore::SegmentGrowingImpl& segment,
|
||||
FieldId fieldId) {
|
||||
growing_raw_data_ =
|
||||
segment.get_insert_record().get_field_data<T>(fieldId);
|
||||
}
|
||||
|
||||
GrowingDataGetter(const GrowingDataGetter<T>& other)
|
||||
: growing_raw_data_(other.growing_raw_data_) {
|
||||
}
|
||||
|
||||
T
|
||||
Get(int64_t idx) const {
|
||||
return growing_raw_data_->operator[](idx);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class SealedDataGetter : public DataGetter<T> {
|
||||
private:
|
||||
std::shared_ptr<Span<T>> field_data_;
|
||||
std::shared_ptr<Span<std::string_view>> str_field_data_;
|
||||
const index::ScalarIndex<T>* field_index_;
|
||||
|
||||
DataGetter(const segcore::SegmentInternalInterface& segment,
|
||||
FieldId& field_id) {
|
||||
public:
|
||||
SealedDataGetter(const segcore::SegmentSealedImpl& segment,
|
||||
FieldId& field_id) {
|
||||
if (segment.HasFieldData(field_id)) {
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
auto span = segment.chunk_data<std::string_view>(field_id, 0);
|
||||
|
@ -52,7 +84,12 @@ struct DataGetter {
|
|||
}
|
||||
}
|
||||
|
||||
public:
|
||||
SealedDataGetter(const SealedDataGetter<T>& other)
|
||||
: field_data_(other.field_data_),
|
||||
str_field_data_(other.str_field_data_),
|
||||
field_index_(other.field_index_) {
|
||||
}
|
||||
|
||||
T
|
||||
Get(int64_t idx) const {
|
||||
if (field_data_ || str_field_data_) {
|
||||
|
@ -68,9 +105,65 @@ struct DataGetter {
|
|||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
static const std::shared_ptr<DataGetter<T>>
|
||||
GetDataGetter(const segcore::SegmentInternalInterface& segment,
|
||||
FieldId fieldId) {
|
||||
if (const segcore::SegmentGrowingImpl* growing_segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment)) {
|
||||
return std::make_shared<GrowingDataGetter<T>>(*growing_segment,
|
||||
fieldId);
|
||||
} else if (const segcore::SegmentSealedImpl* sealed_segment =
|
||||
dynamic_cast<const segcore::SegmentSealedImpl*>(&segment)) {
|
||||
return std::make_shared<SealedDataGetter<T>>(*sealed_segment, fieldId);
|
||||
} else {
|
||||
PanicInfo(UnexpectedError,
|
||||
"The segment used to init data getter is neither growing or "
|
||||
"sealed, wrong state");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
PrepareVectorIteratorsFromIndex(const SearchInfo& search_info,
|
||||
int nq,
|
||||
const DatasetPtr dataset,
|
||||
SearchResult& search_result,
|
||||
const BitsetView& bitset,
|
||||
const index::VectorIndex& index) {
|
||||
if (search_info.group_by_field_id_.has_value()) {
|
||||
try {
|
||||
knowhere::expected<
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
iterators_val =
|
||||
index.VectorIterators(dataset, search_info, bitset);
|
||||
if (iterators_val.has_value()) {
|
||||
search_result.AssembleChunkVectorIterators(
|
||||
nq, 1, -1, iterators_val.value());
|
||||
} else {
|
||||
LOG_ERROR(
|
||||
"Returned knowhere iterator has non-ready iterators "
|
||||
"inside, terminate group_by operation");
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"Returned knowhere iterator has non-ready iterators "
|
||||
"inside, terminate group_by operation");
|
||||
}
|
||||
search_result.total_nq_ = dataset->GetRows();
|
||||
search_result.unity_topK_ = search_info.topk_;
|
||||
} catch (const std::runtime_error& e) {
|
||||
LOG_ERROR(
|
||||
"Caught error:{} when trying to initialize ann iterators for "
|
||||
"group_by: "
|
||||
"group_by operation will be terminated",
|
||||
e.what());
|
||||
throw e;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
||||
iterators,
|
||||
GroupBy(const std::vector<std::shared_ptr<VectorIterator>>& iterators,
|
||||
const SearchInfo& searchInfo,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
const segcore::SegmentInternalInterface& segment,
|
||||
|
@ -80,9 +173,7 @@ GroupBy(const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
|||
template <typename T>
|
||||
void
|
||||
GroupIteratorsByType(
|
||||
const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
||||
iterators,
|
||||
FieldId field_id,
|
||||
const std::vector<std::shared_ptr<VectorIterator>>& iterators,
|
||||
int64_t topK,
|
||||
const DataGetter<T>& data_getter,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
|
@ -92,15 +183,13 @@ GroupIteratorsByType(
|
|||
|
||||
template <typename T>
|
||||
void
|
||||
GroupIteratorResult(
|
||||
const std::shared_ptr<knowhere::IndexNode::iterator>& iterator,
|
||||
FieldId field_id,
|
||||
int64_t topK,
|
||||
const DataGetter<T>& data_getter,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
std::vector<int64_t>& offsets,
|
||||
std::vector<float>& distances,
|
||||
const knowhere::MetricType& metrics_type);
|
||||
GroupIteratorResult(const std::shared_ptr<VectorIterator>& iterator,
|
||||
int64_t topK,
|
||||
const DataGetter<T>& data_getter,
|
||||
std::vector<GroupByValueType>& group_by_values,
|
||||
std::vector<int64_t>& offsets,
|
||||
std::vector<float>& distances,
|
||||
const knowhere::MetricType& metrics_type);
|
||||
|
||||
} // namespace query
|
||||
} // namespace milvus
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "SubSearchResult.h"
|
||||
#include "knowhere/comp/brute_force.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
#include "knowhere/index_node.h"
|
||||
#include "log/Log.h"
|
||||
namespace milvus::query {
|
||||
|
||||
void
|
||||
|
@ -145,4 +147,61 @@ BruteForceSearch(const dataset::SearchDataset& dataset,
|
|||
sub_result.round_values();
|
||||
return sub_result;
|
||||
}
|
||||
|
||||
SubSearchResult
|
||||
BruteForceSearchIterators(const dataset::SearchDataset& dataset,
|
||||
const void* chunk_data_raw,
|
||||
int64_t chunk_rows,
|
||||
const knowhere::Json& conf,
|
||||
const BitsetView& bitset,
|
||||
DataType data_type) {
|
||||
auto nq = dataset.num_queries;
|
||||
auto dim = dataset.dim;
|
||||
auto base_dataset = knowhere::GenDataSet(chunk_rows, dim, chunk_data_raw);
|
||||
auto query_dataset = knowhere::GenDataSet(nq, dim, dataset.query_data);
|
||||
|
||||
knowhere::expected<
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
iterators_val;
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT:
|
||||
iterators_val = knowhere::BruteForce::AnnIterator<float>(
|
||||
base_dataset, query_dataset, conf, bitset);
|
||||
break;
|
||||
case DataType::VECTOR_FLOAT16:
|
||||
iterators_val = knowhere::BruteForce::AnnIterator<float16>(
|
||||
base_dataset, query_dataset, conf, bitset);
|
||||
break;
|
||||
case DataType::VECTOR_BFLOAT16:
|
||||
iterators_val = knowhere::BruteForce::AnnIterator<bfloat16>(
|
||||
base_dataset, query_dataset, conf, bitset);
|
||||
break;
|
||||
default:
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"Unsupported dataType for chunk brute force iterator:{}",
|
||||
data_type);
|
||||
}
|
||||
if (iterators_val.has_value()) {
|
||||
AssertInfo(
|
||||
iterators_val.value().size() == nq,
|
||||
"Wrong state, initialized knowhere_iterators count:{} is not "
|
||||
"equal to nq:{} for single chunk",
|
||||
iterators_val.value().size(),
|
||||
nq);
|
||||
SubSearchResult subSearchResult(dataset.num_queries,
|
||||
dataset.topk,
|
||||
dataset.metric_type,
|
||||
dataset.round_decimal,
|
||||
iterators_val.value());
|
||||
return std::move(subSearchResult);
|
||||
} else {
|
||||
LOG_ERROR(
|
||||
"Failed to get valid knowhere brute-force-iterators from chunk, "
|
||||
"terminate search_group_by operation");
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"Returned knowhere brute-force-iterator has non-ready "
|
||||
"iterators inside, terminate search_group_by operation");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
|
@ -31,4 +31,12 @@ BruteForceSearch(const dataset::SearchDataset& dataset,
|
|||
const BitsetView& bitset,
|
||||
DataType data_type);
|
||||
|
||||
SubSearchResult
|
||||
BruteForceSearchIterators(const dataset::SearchDataset& dataset,
|
||||
const void* chunk_data_raw,
|
||||
int64_t chunk_rows,
|
||||
const knowhere::Json& conf,
|
||||
const BitsetView& bitset,
|
||||
DataType data_type);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -24,9 +24,8 @@ FloatSegmentIndexSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
const SearchInfo& info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
int64_t ins_barrier,
|
||||
const BitsetView& bitset,
|
||||
SubSearchResult& results) {
|
||||
SearchResult& search_result) {
|
||||
auto& schema = segment.get_schema();
|
||||
auto& indexing_record = segment.get_indexing_record();
|
||||
auto& record = segment.get_insert_record();
|
||||
|
@ -49,9 +48,8 @@ FloatSegmentIndexSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
auto indexing = field_indexing.get_segment_indexing();
|
||||
SearchInfo search_conf = field_indexing.get_search_params(info);
|
||||
auto vec_index = dynamic_cast<index::VectorIndex*>(indexing);
|
||||
auto result =
|
||||
SearchOnIndex(search_dataset, *vec_index, search_conf, bitset);
|
||||
results.merge(result);
|
||||
SearchOnIndex(
|
||||
search_dataset, *vec_index, search_conf, bitset, search_result);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,7 +60,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& results) {
|
||||
SearchResult& search_result) {
|
||||
auto& schema = segment.get_schema();
|
||||
auto& record = segment.get_insert_record();
|
||||
auto active_count =
|
||||
|
@ -84,23 +82,13 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||
auto round_decimal = info.round_decimal_;
|
||||
|
||||
// step 2: small indexing search
|
||||
SubSearchResult final_qr(num_queries, topk, metric_type, round_decimal);
|
||||
dataset::SearchDataset search_dataset{
|
||||
metric_type, num_queries, topk, round_decimal, dim, query_data};
|
||||
|
||||
if (segment.get_indexing_record().SyncDataWithIndex(field.get_id())) {
|
||||
FloatSegmentIndexSearch(segment,
|
||||
info,
|
||||
query_data,
|
||||
num_queries,
|
||||
active_count,
|
||||
bitset,
|
||||
final_qr);
|
||||
results.distances_ = std::move(final_qr.mutable_distances());
|
||||
results.seg_offsets_ = std::move(final_qr.mutable_seg_offsets());
|
||||
results.unity_topK_ = topk;
|
||||
results.total_nq_ = num_queries;
|
||||
FloatSegmentIndexSearch(
|
||||
segment, info, query_data, num_queries, bitset, search_result);
|
||||
} else {
|
||||
SubSearchResult final_qr(num_queries, topk, metric_type, round_decimal);
|
||||
dataset::SearchDataset search_dataset{
|
||||
metric_type, num_queries, topk, round_decimal, dim, query_data};
|
||||
std::shared_lock<std::shared_mutex> read_chunk_mutex(
|
||||
segment.get_chunk_mutex());
|
||||
int32_t current_chunk_id = 0;
|
||||
|
@ -119,25 +107,44 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||
auto size_per_chunk = element_end - element_begin;
|
||||
|
||||
auto sub_view = bitset.subview(element_begin, size_per_chunk);
|
||||
auto sub_qr = BruteForceSearch(search_dataset,
|
||||
chunk_data,
|
||||
size_per_chunk,
|
||||
info.search_params_,
|
||||
sub_view,
|
||||
data_type);
|
||||
if (info.group_by_field_id_.has_value()) {
|
||||
auto sub_qr = BruteForceSearchIterators(search_dataset,
|
||||
chunk_data,
|
||||
size_per_chunk,
|
||||
info.search_params_,
|
||||
sub_view,
|
||||
data_type);
|
||||
final_qr.merge(sub_qr);
|
||||
} else {
|
||||
auto sub_qr = BruteForceSearch(search_dataset,
|
||||
chunk_data,
|
||||
size_per_chunk,
|
||||
info.search_params_,
|
||||
sub_view,
|
||||
data_type);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_seg_offsets()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * vec_size_per_chunk;
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_seg_offsets()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * vec_size_per_chunk;
|
||||
}
|
||||
}
|
||||
final_qr.merge(sub_qr);
|
||||
}
|
||||
final_qr.merge(sub_qr);
|
||||
}
|
||||
results.distances_ = std::move(final_qr.mutable_distances());
|
||||
results.seg_offsets_ = std::move(final_qr.mutable_seg_offsets());
|
||||
results.unity_topK_ = topk;
|
||||
results.total_nq_ = num_queries;
|
||||
if (info.group_by_field_id_.has_value()) {
|
||||
search_result.AssembleChunkVectorIterators(
|
||||
num_queries,
|
||||
max_chunk,
|
||||
vec_size_per_chunk,
|
||||
final_qr.chunk_iterators());
|
||||
} else {
|
||||
search_result.distances_ = std::move(final_qr.mutable_distances());
|
||||
search_result.seg_offsets_ =
|
||||
std::move(final_qr.mutable_seg_offsets());
|
||||
}
|
||||
search_result.unity_topK_ = topk;
|
||||
search_result.total_nq_ = num_queries;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,6 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& results);
|
||||
SearchResult& search_result);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -10,33 +10,28 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "SearchOnIndex.h"
|
||||
#include "query/GroupByOperator.h"
|
||||
|
||||
namespace milvus::query {
|
||||
SubSearchResult
|
||||
void
|
||||
SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
||||
const index::VectorIndex& indexing,
|
||||
const SearchInfo& search_conf,
|
||||
const BitsetView& bitset) {
|
||||
const BitsetView& bitset,
|
||||
SearchResult& search_result) {
|
||||
auto num_queries = search_dataset.num_queries;
|
||||
auto topK = search_dataset.topk;
|
||||
auto dim = search_dataset.dim;
|
||||
auto metric_type = search_dataset.metric_type;
|
||||
auto round_decimal = search_dataset.round_decimal;
|
||||
auto dataset =
|
||||
knowhere::GenDataSet(num_queries, dim, search_dataset.query_data);
|
||||
|
||||
// NOTE: VecIndex Query API forget to add const qualifier
|
||||
// NOTE: use const_cast as a workaround
|
||||
auto& indexing_nonconst = const_cast<index::VectorIndex&>(indexing);
|
||||
auto ans = indexing_nonconst.Query(dataset, search_conf, bitset);
|
||||
|
||||
SubSearchResult sub_qr(num_queries, topK, metric_type, round_decimal);
|
||||
std::copy_n(
|
||||
ans->distances_.data(), num_queries * topK, sub_qr.get_distances());
|
||||
std::copy_n(
|
||||
ans->seg_offsets_.data(), num_queries * topK, sub_qr.get_seg_offsets());
|
||||
sub_qr.round_values();
|
||||
return sub_qr;
|
||||
if (!PrepareVectorIteratorsFromIndex(search_conf,
|
||||
num_queries,
|
||||
dataset,
|
||||
search_result,
|
||||
bitset,
|
||||
indexing)) {
|
||||
indexing.Query(dataset, search_conf, bitset, search_result);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -19,10 +19,11 @@
|
|||
|
||||
namespace milvus::query {
|
||||
|
||||
SubSearchResult
|
||||
void
|
||||
SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
||||
const index::VectorIndex& indexing,
|
||||
const SearchInfo& search_conf,
|
||||
const BitsetView& bitset);
|
||||
const BitsetView& bitset,
|
||||
SearchResult& search_result);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "query/SearchBruteForce.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "query/helper.h"
|
||||
#include "query/GroupByOperator.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
|
@ -27,8 +28,8 @@ SearchOnSealedIndex(const Schema& schema,
|
|||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& result) {
|
||||
auto topk = search_info.topk_;
|
||||
SearchResult& search_result) {
|
||||
auto topK = search_info.topk_;
|
||||
auto round_decimal = search_info.round_decimal_;
|
||||
|
||||
auto field_id = search_info.field_id_;
|
||||
|
@ -42,20 +43,19 @@ SearchOnSealedIndex(const Schema& schema,
|
|||
AssertInfo(field_indexing->metric_type_ == search_info.metric_type_,
|
||||
"Metric type of field index isn't the same with search info");
|
||||
|
||||
auto final = [&] {
|
||||
auto ds = knowhere::GenDataSet(num_queries, dim, query_data);
|
||||
|
||||
auto vec_index =
|
||||
dynamic_cast<index::VectorIndex*>(field_indexing->indexing_.get());
|
||||
auto dataset = knowhere::GenDataSet(num_queries, dim, query_data);
|
||||
auto vec_index =
|
||||
dynamic_cast<index::VectorIndex*>(field_indexing->indexing_.get());
|
||||
if (!PrepareVectorIteratorsFromIndex(search_info,
|
||||
num_queries,
|
||||
dataset,
|
||||
search_result,
|
||||
bitset,
|
||||
*vec_index)) {
|
||||
auto index_type = vec_index->GetIndexType();
|
||||
return vec_index->Query(ds, search_info, bitset);
|
||||
}();
|
||||
if (final->iterators.has_value()) {
|
||||
result.iterators = std::move(final->iterators);
|
||||
} else {
|
||||
float* distances = final->distances_.data();
|
||||
|
||||
auto total_num = num_queries * topk;
|
||||
vec_index->Query(dataset, search_info, bitset, search_result);
|
||||
float* distances = search_result.distances_.data();
|
||||
auto total_num = num_queries * topK;
|
||||
if (round_decimal != -1) {
|
||||
const float multiplier = pow(10.0, round_decimal);
|
||||
for (int i = 0; i < total_num; i++) {
|
||||
|
@ -63,11 +63,9 @@ SearchOnSealedIndex(const Schema& schema,
|
|||
std::round(distances[i] * multiplier) / multiplier;
|
||||
}
|
||||
}
|
||||
result.seg_offsets_ = std::move(final->seg_offsets_);
|
||||
result.distances_ = std::move(final->distances_);
|
||||
}
|
||||
result.total_nq_ = num_queries;
|
||||
result.unity_topK_ = topk;
|
||||
search_result.total_nq_ = num_queries;
|
||||
search_result.unity_topK_ = topK;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -91,15 +89,26 @@ SearchOnSealed(const Schema& schema,
|
|||
|
||||
auto data_type = field.get_data_type();
|
||||
CheckBruteForceSearchParam(field, search_info);
|
||||
auto sub_qr = BruteForceSearch(dataset,
|
||||
vec_data,
|
||||
row_count,
|
||||
search_info.search_params_,
|
||||
bitset,
|
||||
data_type);
|
||||
if (search_info.group_by_field_id_.has_value()) {
|
||||
auto sub_qr = BruteForceSearchIterators(dataset,
|
||||
vec_data,
|
||||
row_count,
|
||||
search_info.search_params_,
|
||||
bitset,
|
||||
data_type);
|
||||
result.AssembleChunkVectorIterators(
|
||||
num_queries, 1, -1, sub_qr.chunk_iterators());
|
||||
} else {
|
||||
auto sub_qr = BruteForceSearch(dataset,
|
||||
vec_data,
|
||||
row_count,
|
||||
search_info.search_params_,
|
||||
bitset,
|
||||
data_type);
|
||||
|
||||
result.distances_ = std::move(sub_qr.mutable_distances());
|
||||
result.seg_offsets_ = std::move(sub_qr.mutable_seg_offsets());
|
||||
result.distances_ = std::move(sub_qr.mutable_distances());
|
||||
result.seg_offsets_ = std::move(sub_qr.mutable_seg_offsets());
|
||||
}
|
||||
result.unity_topK_ = dataset.topk;
|
||||
result.total_nq_ = dataset.num_queries;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ SearchOnSealedIndex(const Schema& schema,
|
|||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
const BitsetView& view,
|
||||
SearchResult& result);
|
||||
SearchResult& search_result);
|
||||
|
||||
void
|
||||
SearchOnSealed(const Schema& schema,
|
||||
|
|
|
@ -74,13 +74,19 @@ SubSearchResult::merge_impl(const SubSearchResult& right) {
|
|||
}
|
||||
|
||||
void
|
||||
SubSearchResult::merge(const SubSearchResult& sub_result) {
|
||||
AssertInfo(metric_type_ == sub_result.metric_type_,
|
||||
SubSearchResult::merge(const SubSearchResult& other) {
|
||||
AssertInfo(metric_type_ == other.metric_type_,
|
||||
"[SubSearchResult]Metric type check failed when merge");
|
||||
if (PositivelyRelated(metric_type_)) {
|
||||
this->merge_impl<true>(sub_result);
|
||||
if (!other.chunk_iterators_.empty()) {
|
||||
std::move(std::begin(other.chunk_iterators_),
|
||||
std::end(other.chunk_iterators_),
|
||||
std::back_inserter(this->chunk_iterators_));
|
||||
} else {
|
||||
this->merge_impl<false>(sub_result);
|
||||
if (PositivelyRelated(metric_type_)) {
|
||||
this->merge_impl<true>(other);
|
||||
} else {
|
||||
this->merge_impl<false>(other);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,21 +17,37 @@
|
|||
|
||||
#include "common/Types.h"
|
||||
#include "common/Utils.h"
|
||||
#include "knowhere/index_node.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
class SubSearchResult {
|
||||
public:
|
||||
SubSearchResult(int64_t num_queries,
|
||||
int64_t topk,
|
||||
const MetricType& metric_type,
|
||||
int64_t round_decimal)
|
||||
SubSearchResult(
|
||||
int64_t num_queries,
|
||||
int64_t topk,
|
||||
const MetricType& metric_type,
|
||||
int64_t round_decimal,
|
||||
const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
||||
iters)
|
||||
: num_queries_(num_queries),
|
||||
topk_(topk),
|
||||
round_decimal_(round_decimal),
|
||||
metric_type_(metric_type),
|
||||
seg_offsets_(num_queries * topk, INVALID_SEG_OFFSET),
|
||||
distances_(num_queries * topk, init_value(metric_type)) {
|
||||
distances_(num_queries * topk, init_value(metric_type)),
|
||||
chunk_iterators_(std::move(iters)) {
|
||||
}
|
||||
|
||||
SubSearchResult(int64_t num_queries,
|
||||
int64_t topk,
|
||||
const MetricType& metric_type,
|
||||
int64_t round_decimal)
|
||||
: SubSearchResult(
|
||||
num_queries,
|
||||
topk,
|
||||
metric_type,
|
||||
round_decimal,
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>{}) {
|
||||
}
|
||||
|
||||
SubSearchResult(SubSearchResult&& other) noexcept
|
||||
|
@ -40,7 +56,8 @@ class SubSearchResult {
|
|||
round_decimal_(other.round_decimal_),
|
||||
metric_type_(std::move(other.metric_type_)),
|
||||
seg_offsets_(std::move(other.seg_offsets_)),
|
||||
distances_(std::move(other.distances_)) {
|
||||
distances_(std::move(other.distances_)),
|
||||
chunk_iterators_(std::move(other.chunk_iterators_)) {
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -95,7 +112,12 @@ class SubSearchResult {
|
|||
round_values();
|
||||
|
||||
void
|
||||
merge(const SubSearchResult& sub_result);
|
||||
merge(const SubSearchResult& other);
|
||||
|
||||
const std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>&
|
||||
chunk_iterators() {
|
||||
return this->chunk_iterators_;
|
||||
}
|
||||
|
||||
private:
|
||||
template <bool is_desc>
|
||||
|
@ -109,6 +131,8 @@ class SubSearchResult {
|
|||
knowhere::MetricType metric_type_;
|
||||
std::vector<int64_t> seg_offsets_;
|
||||
std::vector<float> distances_;
|
||||
std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>
|
||||
chunk_iterators_;
|
||||
};
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -190,8 +190,8 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
|
|||
timestamp_,
|
||||
final_view,
|
||||
search_result);
|
||||
if (search_result.iterators.has_value()) {
|
||||
GroupBy(search_result.iterators.value(),
|
||||
if (search_result.vector_iterators_.has_value()) {
|
||||
GroupBy(search_result.vector_iterators_.value(),
|
||||
node.search_info_,
|
||||
search_result.group_by_values_,
|
||||
*segment,
|
||||
|
|
|
@ -87,23 +87,24 @@ VectorFieldIndexing::GetDataFromIndex(const int64_t* seg_offsets,
|
|||
void
|
||||
VectorFieldIndexing::AppendSegmentIndex(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const VectorBase* vec_base,
|
||||
const VectorBase* field_raw_data,
|
||||
const void* data_source) {
|
||||
AssertInfo(field_meta_.get_data_type() == DataType::VECTOR_FLOAT,
|
||||
"Data type of vector field is not VECTOR_FLOAT");
|
||||
|
||||
auto dim = field_meta_.get_dim();
|
||||
auto conf = get_build_params();
|
||||
auto source = dynamic_cast<const ConcurrentVector<FloatVector>*>(vec_base);
|
||||
auto source =
|
||||
dynamic_cast<const ConcurrentVector<FloatVector>*>(field_raw_data);
|
||||
|
||||
auto per_chunk = source->get_size_per_chunk();
|
||||
auto size_per_chunk = source->get_size_per_chunk();
|
||||
//append vector [vector_id_beg, vector_id_end] into index
|
||||
//build index [vector_id_beg, build_threshold) when index not exist
|
||||
if (!build) {
|
||||
idx_t vector_id_beg = index_cur_.load();
|
||||
idx_t vector_id_end = get_build_threshold() - 1;
|
||||
auto chunk_id_beg = vector_id_beg / per_chunk;
|
||||
auto chunk_id_end = vector_id_end / per_chunk;
|
||||
auto chunk_id_beg = vector_id_beg / size_per_chunk;
|
||||
auto chunk_id_end = vector_id_end / size_per_chunk;
|
||||
|
||||
int64_t vec_num = vector_id_end - vector_id_beg + 1;
|
||||
// for train index
|
||||
|
@ -111,7 +112,7 @@ VectorFieldIndexing::AppendSegmentIndex(int64_t reserved_offset,
|
|||
unique_ptr<float[]> vec_data;
|
||||
//all train data in one chunk
|
||||
if (chunk_id_beg == chunk_id_end) {
|
||||
data_addr = vec_base->get_chunk_data(chunk_id_beg);
|
||||
data_addr = field_raw_data->get_chunk_data(chunk_id_beg);
|
||||
} else {
|
||||
//merge data from multiple chunks together
|
||||
vec_data = std::make_unique<float[]>(vec_num * dim);
|
||||
|
@ -122,12 +123,13 @@ VectorFieldIndexing::AppendSegmentIndex(int64_t reserved_offset,
|
|||
int chunk_offset = 0;
|
||||
int chunk_copysz =
|
||||
chunk_id == chunk_id_end
|
||||
? vector_id_end - chunk_id * per_chunk + 1
|
||||
: per_chunk;
|
||||
std::memcpy(vec_data.get() + offset * dim,
|
||||
(const float*)vec_base->get_chunk_data(chunk_id) +
|
||||
chunk_offset * dim,
|
||||
chunk_copysz * dim * sizeof(float));
|
||||
? vector_id_end - chunk_id * size_per_chunk + 1
|
||||
: size_per_chunk;
|
||||
std::memcpy(
|
||||
vec_data.get() + offset * dim,
|
||||
(const float*)field_raw_data->get_chunk_data(chunk_id) +
|
||||
chunk_offset * dim,
|
||||
chunk_copysz * dim * sizeof(float));
|
||||
offset += chunk_copysz;
|
||||
}
|
||||
data_addr = vec_data.get();
|
||||
|
@ -146,8 +148,8 @@ VectorFieldIndexing::AppendSegmentIndex(int64_t reserved_offset,
|
|||
//append rest data when index has built
|
||||
idx_t vector_id_beg = index_cur_.load();
|
||||
idx_t vector_id_end = reserved_offset + size - 1;
|
||||
auto chunk_id_beg = vector_id_beg / per_chunk;
|
||||
auto chunk_id_end = vector_id_end / per_chunk;
|
||||
auto chunk_id_beg = vector_id_beg / size_per_chunk;
|
||||
auto chunk_id_end = vector_id_end / size_per_chunk;
|
||||
int64_t vec_num = vector_id_end - vector_id_beg + 1;
|
||||
|
||||
if (vec_num <= 0) {
|
||||
|
@ -163,11 +165,12 @@ VectorFieldIndexing::AppendSegmentIndex(int64_t reserved_offset,
|
|||
for (int chunk_id = chunk_id_beg; chunk_id <= chunk_id_end;
|
||||
chunk_id++) {
|
||||
int chunk_offset = chunk_id == chunk_id_beg
|
||||
? index_cur_ - chunk_id * per_chunk
|
||||
? index_cur_ - chunk_id * size_per_chunk
|
||||
: 0;
|
||||
int chunk_sz = chunk_id == chunk_id_end
|
||||
? vector_id_end % per_chunk - chunk_offset + 1
|
||||
: per_chunk - chunk_offset;
|
||||
int chunk_sz =
|
||||
chunk_id == chunk_id_end
|
||||
? vector_id_end % size_per_chunk - chunk_offset + 1
|
||||
: size_per_chunk - chunk_offset;
|
||||
auto dataset = knowhere::GenDataSet(
|
||||
chunk_sz,
|
||||
dim,
|
||||
|
|
|
@ -173,7 +173,7 @@ class VectorFieldIndexing : public FieldIndexing {
|
|||
void
|
||||
AppendSegmentIndex(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const VectorBase* vec_base,
|
||||
const VectorBase* field_raw_data,
|
||||
const void* data_source) override;
|
||||
|
||||
void
|
||||
|
@ -289,11 +289,11 @@ class IndexingRecord {
|
|||
indexing->get_field_meta().get_data_type() ==
|
||||
DataType::VECTOR_FLOAT &&
|
||||
reserved_offset + size >= indexing->get_build_threshold()) {
|
||||
auto vec_base = record.get_field_data_base(fieldId);
|
||||
auto field_raw_data = record.get_field_data_base(fieldId);
|
||||
indexing->AppendSegmentIndex(
|
||||
reserved_offset,
|
||||
size,
|
||||
vec_base,
|
||||
field_raw_data,
|
||||
stream_data->vectors().float_vector().data().data());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -441,24 +441,8 @@ SegmentGrowingImpl::vector_search(SearchInfo& search_info,
|
|||
Timestamp timestamp,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& output) const {
|
||||
auto& sealed_indexing = this->get_sealed_indexing_record();
|
||||
if (sealed_indexing.is_ready(search_info.field_id_)) {
|
||||
query::SearchOnSealedIndex(this->get_schema(),
|
||||
sealed_indexing,
|
||||
search_info,
|
||||
query_data,
|
||||
query_count,
|
||||
bitset,
|
||||
output);
|
||||
} else {
|
||||
query::SearchOnGrowing(*this,
|
||||
search_info,
|
||||
query_data,
|
||||
query_count,
|
||||
timestamp,
|
||||
bitset,
|
||||
output);
|
||||
}
|
||||
query::SearchOnGrowing(
|
||||
*this, search_info, query_data, query_count, timestamp, bitset, output);
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
|
|
|
@ -2125,10 +2125,10 @@ TEST(CApiTest, Indexing_Without_Predicate) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -2276,10 +2276,10 @@ TEST(CApiTest, Indexing_Expr_Without_Predicate) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -2456,10 +2456,10 @@ TEST(CApiTest, Indexing_With_float_Predicate_Range) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -2638,10 +2638,10 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -2812,10 +2812,10 @@ TEST(CApiTest, Indexing_With_float_Predicate_Term) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -2987,10 +2987,10 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -3169,10 +3169,10 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -3351,10 +3351,10 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -3527,10 +3527,10 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -3725,10 +3725,10 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -3945,9 +3945,9 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
|
|||
search_info.metric_type_ = knowhere::metric::L2;
|
||||
search_info.search_params_ = generate_search_conf(
|
||||
IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::metric::L2);
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
EXPECT_EQ(result_on_index->distances_.size(), num_queries * TOPK);
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
EXPECT_EQ(result_on_index.distances_.size(), num_queries * TOPK);
|
||||
|
||||
status = LoadFieldRawData(segment, 101, counter_col.data(), N);
|
||||
ASSERT_EQ(status.error_code, Success);
|
||||
|
@ -4192,10 +4192,10 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -4943,10 +4943,10 @@ TEST(CApiTest, Indexing_Without_Predicate_float16) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
@ -5095,10 +5095,10 @@ TEST(CApiTest, Indexing_Without_Predicate_bfloat16) {
|
|||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
auto result_on_index =
|
||||
vec_index->Query(query_dataset, search_info, nullptr);
|
||||
auto ids = result_on_index->seg_offsets_.data();
|
||||
auto dis = result_on_index->distances_.data();
|
||||
SearchResult result_on_index;
|
||||
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
|
||||
auto ids = result_on_index.seg_offsets_.data();
|
||||
auto dis = result_on_index.distances_.data();
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
|
|
|
@ -9,10 +9,6 @@
|
|||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
//
|
||||
// Created by zilliz on 2023/12/1.
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "common/Schema.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
@ -52,7 +48,30 @@ prepareSegmentSystemFieldData(const std::unique_ptr<SegmentSealed>& segment,
|
|||
segment->LoadFieldData(TimestampFieldID, field_data_info);
|
||||
}
|
||||
|
||||
TEST(GroupBY, Normal2) {
|
||||
int GetSearchResultBound(const SearchResult& search_result){
|
||||
int i = 0;
|
||||
for(; i < search_result.seg_offsets_.size(); i++){
|
||||
if(search_result.seg_offsets_[i]==INVALID_SEG_OFFSET) break;
|
||||
}
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
void CheckGroupBySearchResult(const SearchResult& search_result, int topK, int nq, bool strict){
|
||||
int total = topK * nq;
|
||||
ASSERT_EQ(search_result.group_by_values_.size(), total);
|
||||
ASSERT_EQ(search_result.seg_offsets_.size(), total);
|
||||
ASSERT_EQ(search_result.distances_.size(), total);
|
||||
ASSERT_TRUE(search_result.seg_offsets_[0]!=INVALID_SEG_OFFSET);
|
||||
int res_bound = GetSearchResultBound(search_result);
|
||||
ASSERT_TRUE(res_bound>0);
|
||||
if(strict){
|
||||
ASSERT_TRUE(res_bound==total-1);
|
||||
} else {
|
||||
ASSERT_TRUE(res_bound==total-1||search_result.seg_offsets_[res_bound+1]==INVALID_SEG_OFFSET);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GroupBY, SealedIndex) {
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
@ -97,6 +116,7 @@ TEST(GroupBY, Normal2) {
|
|||
load_index_info.index = std::move(indexing);
|
||||
load_index_info.index_params[METRICS_TYPE] = knowhere::metric::L2;
|
||||
segment->LoadIndex(load_index_info);
|
||||
int topK = 100;
|
||||
|
||||
//4. search group by int8
|
||||
{
|
||||
|
@ -111,7 +131,6 @@ TEST(GroupBY, Normal2) {
|
|||
placeholder_tag: "$0"
|
||||
|
||||
>)";
|
||||
|
||||
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size());
|
||||
|
@ -122,12 +141,9 @@ TEST(GroupBY, Normal2) {
|
|||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
ASSERT_EQ(search_result->group_by_values_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
ASSERT_EQ(search_result->distances_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, false);
|
||||
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
int size = group_by_values.size();
|
||||
std::unordered_set<int8_t> i8_set;
|
||||
float lastDistance = 0.0;
|
||||
|
@ -174,12 +190,9 @@ TEST(GroupBY, Normal2) {
|
|||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
ASSERT_EQ(search_result->group_by_values_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
ASSERT_EQ(search_result->distances_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, false);
|
||||
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
int size = group_by_values.size();
|
||||
std::unordered_set<int16_t> i16_set;
|
||||
float lastDistance = 0.0;
|
||||
|
@ -226,13 +239,10 @@ TEST(GroupBY, Normal2) {
|
|||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, false);
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
ASSERT_EQ(search_result->group_by_values_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
ASSERT_EQ(search_result->distances_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
|
||||
int size = group_by_values.size();
|
||||
|
||||
std::unordered_set<int32_t> i32_set;
|
||||
float lastDistance = 0.0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
|
@ -278,11 +288,8 @@ TEST(GroupBY, Normal2) {
|
|||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, false);
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
ASSERT_EQ(search_result->group_by_values_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
ASSERT_EQ(search_result->distances_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
|
||||
int size = group_by_values.size();
|
||||
std::unordered_set<int64_t> i64_set;
|
||||
|
@ -330,12 +337,8 @@ TEST(GroupBY, Normal2) {
|
|||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, false);
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
ASSERT_EQ(search_result->group_by_values_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
ASSERT_EQ(search_result->distances_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
|
||||
int size = group_by_values.size();
|
||||
std::unordered_set<std::string> strs_set;
|
||||
float lastDistance = 0.0;
|
||||
|
@ -383,12 +386,8 @@ TEST(GroupBY, Normal2) {
|
|||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, false);
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
ASSERT_EQ(search_result->group_by_values_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
ASSERT_EQ(search_result->distances_.size(),
|
||||
search_result->seg_offsets_.size());
|
||||
|
||||
int size = group_by_values.size();
|
||||
std::unordered_set<bool> bools_set;
|
||||
int boolValCount = 0;
|
||||
|
@ -415,6 +414,92 @@ TEST(GroupBY, Normal2) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(GroupBY, SealedData){
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
//0. prepare schema
|
||||
int dim = 64;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
||||
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
|
||||
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
|
||||
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
|
||||
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
||||
auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR);
|
||||
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
|
||||
schema->set_primary_field_id(str_fid);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
size_t N = 100;
|
||||
|
||||
//2. load raw data
|
||||
auto raw_data = DataGen(schema, N);
|
||||
auto fields = schema->get_fields();
|
||||
for (auto field_data : raw_data.raw_->fields_data()) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
|
||||
segment->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
prepareSegmentSystemFieldData(segment, N, raw_data);
|
||||
|
||||
int topK = 100;
|
||||
//3. search group by int8
|
||||
{
|
||||
const char* raw_plan = R"(vector_anns: <
|
||||
field_id: 100
|
||||
query_info: <
|
||||
topk: 100
|
||||
metric_type: "L2"
|
||||
search_params: "{\"ef\": 10}"
|
||||
group_by_field_id: 101
|
||||
>
|
||||
placeholder_tag: "$0"
|
||||
|
||||
>)";
|
||||
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 1;
|
||||
auto seed = 1024;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, false);
|
||||
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
int size = group_by_values.size();
|
||||
std::unordered_set<int8_t> i8_set;
|
||||
float lastDistance = 0.0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (std::holds_alternative<int8_t>(group_by_values[i])) {
|
||||
int8_t g_val = std::get<int8_t>(group_by_values[i]);
|
||||
ASSERT_FALSE(i8_set.count(g_val) >
|
||||
0); //no repetition on groupBy field
|
||||
i8_set.insert(g_val);
|
||||
auto distance = search_result->distances_.at(i);
|
||||
ASSERT_TRUE(
|
||||
lastDistance <=
|
||||
distance); //distance should be decreased as metrics_type is L2
|
||||
lastDistance = distance;
|
||||
} else {
|
||||
//check padding
|
||||
ASSERT_EQ(search_result->seg_offsets_[i], INVALID_SEG_OFFSET);
|
||||
ASSERT_EQ(search_result->distances_[i], 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GroupBY, Reduce) {
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
|
@ -540,3 +625,171 @@ TEST(GroupBY, Reduce) {
|
|||
DeleteSegment(c_segment_1);
|
||||
DeleteSegment(c_segment_2);
|
||||
}
|
||||
|
||||
TEST(GroupBY, GrowingRawData){
|
||||
//0. set up growing segment
|
||||
int dim = 128;
|
||||
uint64_t seed = 512;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto int64_field_id = schema->AddDebugField("int64", DataType::INT64);
|
||||
auto int32_field_id = schema->AddDebugField("int32", DataType::INT32);
|
||||
auto vec_field_id = schema->AddDebugField(
|
||||
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
|
||||
schema->set_primary_field_id(int64_field_id);
|
||||
|
||||
auto config = SegcoreConfig::default_config();
|
||||
config.set_chunk_rows(128);
|
||||
config.set_enable_interim_segment_index(false);//no growing index, test brute force
|
||||
auto segment_growing = CreateGrowingSegment(schema, nullptr, 1, config);
|
||||
auto segment_growing_impl = dynamic_cast<SegmentGrowingImpl*>(segment_growing.get());
|
||||
|
||||
//1. prepare raw data in growing segment
|
||||
int64_t rows_per_batch = 512;
|
||||
int n_batch = 3;
|
||||
for(int i = 0; i < n_batch; i++){
|
||||
auto data_set = DataGen(schema, rows_per_batch);
|
||||
auto offset = segment_growing_impl->PreInsert(rows_per_batch);
|
||||
segment_growing_impl->Insert(offset, rows_per_batch, data_set.row_ids_.data(),
|
||||
data_set.timestamps_.data(),
|
||||
data_set.raw_);
|
||||
}
|
||||
|
||||
|
||||
//2. Search group by
|
||||
const char* raw_plan = R"(vector_anns: <
|
||||
field_id: 102
|
||||
query_info: <
|
||||
topk: 100
|
||||
metric_type: "L2"
|
||||
search_params: "{\"ef\": 10}"
|
||||
group_by_field_id: 101
|
||||
>
|
||||
placeholder_tag: "$0"
|
||||
|
||||
>)";
|
||||
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
|
||||
auto plan = CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 10;
|
||||
auto topK = 100;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, true);
|
||||
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
int idx = 0;
|
||||
for (int i = 0; i < num_queries; i++){
|
||||
std::unordered_set<int32_t> i32_set;
|
||||
float lastDistance = 0.0;
|
||||
for (int j = 0; j < topK; j++){
|
||||
if (std::holds_alternative<int32_t>(group_by_values[idx])) {
|
||||
int32_t g_val = std::get<int32_t>(group_by_values[idx]);
|
||||
ASSERT_FALSE(i32_set.count(g_val) >0); //no repetition on groupBy field
|
||||
i32_set.insert(g_val);
|
||||
auto distance = search_result->distances_.at(idx);
|
||||
ASSERT_TRUE(
|
||||
lastDistance <=
|
||||
distance); //distance should be decreased as metrics_type is L2
|
||||
lastDistance = distance;
|
||||
} else {
|
||||
//check padding
|
||||
ASSERT_EQ(search_result->seg_offsets_[idx], INVALID_SEG_OFFSET);
|
||||
ASSERT_EQ(search_result->distances_[idx], 0.0);
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GroupBY, GrowingIndex){
|
||||
//0. set up growing segment
|
||||
int dim = 128;
|
||||
uint64_t seed = 512;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto int64_field_id = schema->AddDebugField("int64", DataType::INT64);
|
||||
auto int32_field_id = schema->AddDebugField("int32", DataType::INT32);
|
||||
auto vec_field_id = schema->AddDebugField(
|
||||
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
|
||||
schema->set_primary_field_id(int64_field_id);
|
||||
|
||||
std::map<std::string, std::string> index_params = {
|
||||
{"index_type", "IVF_FLAT"},
|
||||
{"metric_type", metric_type},
|
||||
{"nlist", "128"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||
FieldIndexMeta fieldIndexMeta(
|
||||
vec_field_id, std::move(index_params), std::move(type_params));
|
||||
std::map<FieldId, FieldIndexMeta> fieldMap = {{vec_field_id, fieldIndexMeta}};
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(10000, std::move(fieldMap));
|
||||
|
||||
auto config = SegcoreConfig::default_config();
|
||||
config.set_chunk_rows(128);
|
||||
config.set_enable_interim_segment_index(true);//no growing index, test growing inter index
|
||||
config.set_nlist(128);
|
||||
auto segment_growing = CreateGrowingSegment(schema, metaPtr, 1, config);
|
||||
auto segment_growing_impl = dynamic_cast<SegmentGrowingImpl*>(segment_growing.get());
|
||||
|
||||
//1. prepare raw data in growing segment
|
||||
int64_t rows_per_batch = 1024;
|
||||
int n_batch = 10;
|
||||
for(int i = 0; i < n_batch; i++){
|
||||
auto data_set = DataGen(schema, rows_per_batch);
|
||||
auto offset = segment_growing_impl->PreInsert(rows_per_batch);
|
||||
segment_growing_impl->Insert(offset, rows_per_batch, data_set.row_ids_.data(),
|
||||
data_set.timestamps_.data(),
|
||||
data_set.raw_);
|
||||
}
|
||||
|
||||
|
||||
//2. Search group by int32
|
||||
const char* raw_plan = R"(vector_anns: <
|
||||
field_id: 102
|
||||
query_info: <
|
||||
topk: 100
|
||||
metric_type: "L2"
|
||||
search_params: "{\"ef\": 10}"
|
||||
group_by_field_id: 101
|
||||
>
|
||||
placeholder_tag: "$0"
|
||||
|
||||
>)";
|
||||
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
|
||||
auto plan = CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 10;
|
||||
auto topK = 100;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
auto search_result =
|
||||
segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
CheckGroupBySearchResult(*search_result, topK, num_queries, true);
|
||||
|
||||
auto& group_by_values = search_result->group_by_values_;
|
||||
int idx = 0;
|
||||
for (int i = 0; i < num_queries; i++){
|
||||
std::unordered_set<int32_t> i32_set;
|
||||
float lastDistance = 0.0;
|
||||
for (int j = 0; j < topK; j++){
|
||||
if (std::holds_alternative<int32_t>(group_by_values[idx])) {
|
||||
int32_t g_val = std::get<int32_t>(group_by_values[idx]);
|
||||
ASSERT_FALSE(i32_set.count(g_val) >0); //no repetition on groupBy field
|
||||
i32_set.insert(g_val);
|
||||
auto distance = search_result->distances_.at(idx);
|
||||
ASSERT_TRUE(
|
||||
lastDistance <=
|
||||
distance); //distance should be decreased as metrics_type is L2
|
||||
lastDistance = distance;
|
||||
} else {
|
||||
//check padding
|
||||
ASSERT_EQ(search_result->seg_offsets_[idx], INVALID_SEG_OFFSET);
|
||||
ASSERT_EQ(search_result->distances_[idx], 0.0);
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -288,13 +288,14 @@ TEST(Indexing, Naive) {
|
|||
searchInfo.metric_type_ = knowhere::metric::L2;
|
||||
searchInfo.search_params_ = search_conf;
|
||||
auto vec_index = dynamic_cast<index::VectorIndex*>(index.get());
|
||||
auto result = vec_index->Query(query_ds, searchInfo, view);
|
||||
SearchResult result;
|
||||
vec_index->Query(query_ds, searchInfo, view, result);
|
||||
|
||||
for (int i = 0; i < TOPK; ++i) {
|
||||
if (result->seg_offsets_[i] < N / 2) {
|
||||
if (result.seg_offsets_[i] < N / 2) {
|
||||
std::cout << "WRONG: ";
|
||||
}
|
||||
std::cout << result->seg_offsets_[i] << "->" << result->distances_[i]
|
||||
std::cout << result.seg_offsets_[i] << "->" << result.distances_[i]
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
@ -397,6 +398,73 @@ INSTANTIATE_TEST_CASE_P(
|
|||
#endif
|
||||
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2)));
|
||||
|
||||
TEST(Indexing, Iterator) {
|
||||
constexpr int N = 10240;
|
||||
constexpr int TOPK = 100;
|
||||
constexpr int dim = 128;
|
||||
constexpr int chunk_size = 5120;
|
||||
|
||||
auto [raw_data, timestamps, uids] = generate_data<dim>(N);
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
||||
create_index_info.metric_type = knowhere::metric::L2;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC;
|
||||
create_index_info.index_engine_version =
|
||||
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, milvus::storage::FileManagerContext());
|
||||
|
||||
auto build_conf = knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::meta::DIM, std::to_string(dim)},
|
||||
{knowhere::indexparam::NLIST, "128"},
|
||||
};
|
||||
|
||||
auto search_conf = knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
|
||||
{knowhere::indexparam::NPROBE, 4},
|
||||
};
|
||||
|
||||
std::vector<knowhere::DataSetPtr> datasets;
|
||||
auto raw = raw_data.data();
|
||||
for (int beg = 0; beg < N; beg += chunk_size) {
|
||||
auto end = beg + chunk_size;
|
||||
if (end > N) {
|
||||
end = N;
|
||||
}
|
||||
std::vector<float> ft(raw + dim * beg, raw + dim * end);
|
||||
auto ds = knowhere::GenDataSet(end - beg, dim, ft.data());
|
||||
datasets.push_back(ds);
|
||||
}
|
||||
|
||||
for (auto& ds : datasets) {
|
||||
index->BuildWithDataset(ds, build_conf);
|
||||
}
|
||||
|
||||
auto bitmap = BitsetType(N, false);
|
||||
|
||||
BitsetView view = bitmap;
|
||||
auto query_ds = knowhere::GenDataSet(1, dim, raw_data.data());
|
||||
|
||||
milvus::SearchInfo searchInfo;
|
||||
searchInfo.topk_ = TOPK;
|
||||
searchInfo.metric_type_ = knowhere::metric::L2;
|
||||
searchInfo.search_params_ = search_conf;
|
||||
auto vec_index = dynamic_cast<index::VectorIndex*>(index.get());
|
||||
|
||||
knowhere::expected<std::vector<std::shared_ptr<knowhere::IndexNode::iterator>>>
|
||||
kw_iterators = vec_index->VectorIterators(query_ds, searchInfo, view);
|
||||
ASSERT_TRUE(kw_iterators.has_value());
|
||||
ASSERT_EQ(kw_iterators.value().size(), 1);
|
||||
auto iterator = kw_iterators.value()[0];
|
||||
ASSERT_TRUE(iterator->HasNext());
|
||||
while(iterator->HasNext()){
|
||||
auto [off, dis] = iterator->Next();
|
||||
ASSERT_TRUE(off >= 0);
|
||||
ASSERT_TRUE(dis >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(IndexTest, BuildAndQuery) {
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.index_type = index_type;
|
||||
|
@ -439,16 +507,17 @@ TEST_P(IndexTest, BuildAndQuery) {
|
|||
search_info.topk_ = K;
|
||||
search_info.metric_type_ = metric_type;
|
||||
search_info.search_params_ = search_conf;
|
||||
auto result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
EXPECT_EQ(result->total_nq_, NQ);
|
||||
EXPECT_EQ(result->unity_topK_, K);
|
||||
EXPECT_EQ(result->distances_.size(), NQ * K);
|
||||
EXPECT_EQ(result->seg_offsets_.size(), NQ * K);
|
||||
SearchResult result;
|
||||
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
||||
EXPECT_EQ(result.total_nq_, NQ);
|
||||
EXPECT_EQ(result.unity_topK_, K);
|
||||
EXPECT_EQ(result.distances_.size(), NQ * K);
|
||||
EXPECT_EQ(result.seg_offsets_.size(), NQ * K);
|
||||
if (!is_binary) {
|
||||
EXPECT_EQ(result->seg_offsets_[0], query_offset);
|
||||
EXPECT_EQ(result.seg_offsets_[0], query_offset);
|
||||
}
|
||||
search_info.search_params_ = range_search_conf;
|
||||
vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
||||
}
|
||||
|
||||
TEST_P(IndexTest, Mmap) {
|
||||
|
@ -497,16 +566,17 @@ TEST_P(IndexTest, Mmap) {
|
|||
search_info.topk_ = K;
|
||||
search_info.metric_type_ = metric_type;
|
||||
search_info.search_params_ = search_conf;
|
||||
auto result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
EXPECT_EQ(result->total_nq_, NQ);
|
||||
EXPECT_EQ(result->unity_topK_, K);
|
||||
EXPECT_EQ(result->distances_.size(), NQ * K);
|
||||
EXPECT_EQ(result->seg_offsets_.size(), NQ * K);
|
||||
SearchResult result;
|
||||
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
||||
EXPECT_EQ(result.total_nq_, NQ);
|
||||
EXPECT_EQ(result.unity_topK_, K);
|
||||
EXPECT_EQ(result.distances_.size(), NQ * K);
|
||||
EXPECT_EQ(result.seg_offsets_.size(), NQ * K);
|
||||
if (!is_binary) {
|
||||
EXPECT_EQ(result->seg_offsets_[0], query_offset);
|
||||
EXPECT_EQ(result.seg_offsets_[0], query_offset);
|
||||
}
|
||||
search_info.search_params_ = range_search_conf;
|
||||
vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
vec_index->Query(xq_dataset, search_info, nullptr, result);
|
||||
}
|
||||
|
||||
TEST_P(IndexTest, GetVector) {
|
||||
|
@ -658,7 +728,8 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
|||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{milvus::index::DISK_ANN_QUERY_LIST, K - 1},
|
||||
};
|
||||
EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr),
|
||||
SearchResult result;
|
||||
EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result),
|
||||
std::runtime_error);
|
||||
}
|
||||
|
||||
|
|
|
@ -115,8 +115,9 @@ TEST(Sealed, without_predicate) {
|
|||
searchInfo.topk_ = topK;
|
||||
searchInfo.metric_type_ = knowhere::metric::L2;
|
||||
searchInfo.search_params_ = search_conf;
|
||||
auto result = vec_index->Query(query_dataset, searchInfo, nullptr);
|
||||
auto ref_result = SearchResultToJson(*result);
|
||||
SearchResult result;
|
||||
vec_index->Query(query_dataset, searchInfo, nullptr, result);
|
||||
auto ref_result = SearchResultToJson(result);
|
||||
|
||||
LoadIndexInfo load_info;
|
||||
load_info.field_id = fake_id.get();
|
||||
|
@ -235,7 +236,8 @@ TEST(Sealed, with_predicate) {
|
|||
searchInfo.topk_ = topK;
|
||||
searchInfo.metric_type_ = knowhere::metric::L2;
|
||||
searchInfo.search_params_ = search_conf;
|
||||
auto result = vec_index->Query(query_dataset, searchInfo, nullptr);
|
||||
SearchResult result;
|
||||
vec_index->Query(query_dataset, searchInfo, nullptr, result);
|
||||
|
||||
LoadIndexInfo load_info;
|
||||
load_info.field_id = fake_id.get();
|
||||
|
|
|
@ -78,10 +78,6 @@ func initSearchRequest(ctx context.Context, t *searchTask) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if queryInfo.GroupByFieldId != 0 {
|
||||
t.SearchRequest.IgnoreGrowing = true
|
||||
// for group by operation, currently, we ignore growing segments
|
||||
}
|
||||
t.offset = offset
|
||||
|
||||
plan, err := planparserv2.CreateSearchPlan(t.schema.CollectionSchema, t.request.Dsl, annsField, queryInfo)
|
||||
|
|
Loading…
Reference in New Issue