Optimize API vector_search parameter in segcore (#18827)

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
pull/18832/head
Cai Yudong 2022-08-25 16:16:54 +08:00 committed by GitHub
parent c924f73105
commit dcf45df029
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 18 additions and 25 deletions

View File

@ -74,15 +74,16 @@ FloatIndexSearch(const segcore::SegmentGrowingImpl& segment,
void
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
int64_t ins_barrier,
const query::SearchInfo& info,
const void* query_data,
int64_t num_queries,
Timestamp timestamp,
const BitsetView& bitset,
SearchResult& results) {
auto& schema = segment.get_schema();
auto& indexing_record = segment.get_indexing_record();
auto& record = segment.get_insert_record();
auto active_count = segment.get_active_count(timestamp);
// step 1.1: get meta
// step 1.2: get which vector field to search
@ -102,19 +103,19 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
int32_t current_chunk_id = 0;
if (field.get_data_type() == DataType::VECTOR_FLOAT) {
current_chunk_id = FloatIndexSearch(segment, info, query_data, num_queries, ins_barrier, bitset, final_qr);
current_chunk_id = FloatIndexSearch(segment, info, query_data, num_queries, active_count, bitset, final_qr);
}
// step 3: brute force search where small indexing is unavailable
auto vec_ptr = record.get_field_data_base(vecfield_id);
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
auto max_chunk = upper_div(active_count, vec_size_per_chunk);
for (int chunk_id = current_chunk_id; chunk_id < max_chunk; ++chunk_id) {
auto chunk_data = vec_ptr->get_chunk_data(chunk_id);
auto element_begin = chunk_id * vec_size_per_chunk;
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
auto element_end = std::min(active_count, (chunk_id + 1) * vec_size_per_chunk);
auto size_per_chunk = element_end - element_begin;
auto sub_view = bitset.subview(element_begin, size_per_chunk);

View File

@ -18,10 +18,10 @@ namespace milvus::query {
void
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
int64_t ins_barrier,
const query::SearchInfo& info,
const void* query_data,
int64_t num_queries,
Timestamp timestamp,
const BitsetView& bitset,
SearchResult& results);

View File

@ -27,8 +27,7 @@ SearchOnSealed(const Schema& schema,
const void* query_data,
int64_t num_queries,
const BitsetView& bitset,
SearchResult& result,
int64_t segment_id) {
SearchResult& result) {
auto topk = search_info.topk_;
auto round_decimal = search_info.round_decimal_;

View File

@ -25,7 +25,6 @@ SearchOnSealed(const Schema& schema,
const void* query_data,
int64_t num_queries,
const BitsetView& view,
SearchResult& result,
int64_t segment_id);
SearchResult& result);
} // namespace milvus::query

View File

@ -104,8 +104,7 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
return;
}
BitsetView final_view = bitset_holder;
segment->vector_search(active_count, node.search_info_, src_data, num_queries, timestamp_, final_view,
search_result);
segment->vector_search(node.search_info_, src_data, num_queries, timestamp_, final_view, search_result);
search_result_opt_ = std::move(search_result);
}

View File

@ -176,8 +176,7 @@ SegmentGrowingImpl::num_chunk() const {
}
void
SegmentGrowingImpl::vector_search(int64_t vec_count,
query::SearchInfo& search_info,
SegmentGrowingImpl::vector_search(query::SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,
@ -185,10 +184,10 @@ SegmentGrowingImpl::vector_search(int64_t vec_count,
SearchResult& output) const {
auto& sealed_indexing = this->get_sealed_indexing_record();
if (sealed_indexing.is_ready(search_info.field_id_)) {
query::SearchOnSealed(this->get_schema(), sealed_indexing, search_info, query_data, query_count, bitset, output,
id_);
query::SearchOnSealed(this->get_schema(), sealed_indexing, search_info, query_data, query_count, bitset,
output);
} else {
SearchOnGrowing(*this, vec_count, search_info, query_data, query_count, bitset, output);
query::SearchOnGrowing(*this, search_info, query_data, query_count, timestamp, bitset, output);
}
}

View File

@ -174,8 +174,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const override;
void
vector_search(int64_t vec_count,
query::SearchInfo& search_info,
vector_search(query::SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,

View File

@ -129,8 +129,7 @@ class SegmentInternalInterface : public SegmentInterface {
public:
virtual void
vector_search(int64_t vec_count,
query::SearchInfo& search_info,
vector_search(query::SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,

View File

@ -349,8 +349,7 @@ SegmentSealedImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Tim
}
void
SegmentSealedImpl::vector_search(int64_t vec_count,
query::SearchInfo& search_info,
SegmentSealedImpl::vector_search(query::SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,
@ -364,7 +363,7 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
if (get_bit(index_ready_bitset_, field_id)) {
AssertInfo(vector_indexings_.is_ready(field_id),
"vector indexes isn't ready for field " + std::to_string(field_id.get()));
query::SearchOnSealed(*schema_, vector_indexings_, search_info, query_data, query_count, bitset, output, id_);
query::SearchOnSealed(*schema_, vector_indexings_, search_info, query_data, query_count, bitset, output);
return;
} else if (!get_bit(field_data_ready_bitset_, field_id)) {
PanicInfo("Field Data is not loaded");

View File

@ -141,8 +141,7 @@ class SegmentSealedImpl : public SegmentSealed {
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const override;
void
vector_search(int64_t vec_count,
query::SearchInfo& search_info,
vector_search(query::SearchInfo& search_info,
const void* query_data,
int64_t query_count,
Timestamp timestamp,