mirror of https://github.com/milvus-io/milvus.git
Use bitset_view at search
Signed-off-by: FluorineDog <guilin.gou@zilliz.com>pull/4973/head^2
parent
7d81222550
commit
8fccf7e630
|
@ -13,6 +13,8 @@
|
|||
#include <type_traits>
|
||||
#include "common/Types.h"
|
||||
#include <cassert>
|
||||
#include "VectorTrait.h"
|
||||
|
||||
namespace milvus {
|
||||
// type erasure to work around virtual restriction
|
||||
class SpanBase {
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "utils/Types.h"
|
||||
#include "faiss/utils/BitsetView.h"
|
||||
#include <faiss/MetricType.h>
|
||||
#include <string>
|
||||
#include <boost/align/aligned_allocator.hpp>
|
||||
|
@ -75,6 +76,14 @@ using FieldId = fluent::NamedType<int64_t, struct FieldIdTag, fluent::Comparable
|
|||
using FieldName = fluent::NamedType<std::string, struct FieldNameTag, fluent::Comparable, fluent::Hashable>;
|
||||
using FieldOffset = fluent::NamedType<int64_t, struct FieldOffsetTag, fluent::Comparable, fluent::Hashable>;
|
||||
|
||||
} // namespace milvus
|
||||
using BitsetView = faiss::BitsetView;
|
||||
inline BitsetView
|
||||
BitsetSubView(const BitsetView& view, int64_t offset, int64_t size) {
|
||||
if (view.empty()) {
|
||||
return BitsetView();
|
||||
}
|
||||
assert(offset % 8 == 0);
|
||||
return BitsetView(view.data() + offset / 8, size);
|
||||
}
|
||||
|
||||
#include "VectorTrait.h"
|
||||
} // namespace milvus
|
||||
|
|
|
@ -40,7 +40,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
const float* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
const BitsetView& bitset,
|
||||
QueryResult& results) {
|
||||
auto& schema = segment.get_schema();
|
||||
auto& indexing_record = segment.get_indexing_record();
|
||||
|
@ -79,14 +79,16 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
|
||||
// TODO: use sub_qr
|
||||
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
|
||||
auto bitset = create_bitmap_view(bitmaps_opt, chunk_id);
|
||||
auto chunk_size = indexing_entry.get_chunk_size();
|
||||
auto indexing = indexing_entry.get_vec_indexing(chunk_id);
|
||||
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, bitset);
|
||||
|
||||
auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size);
|
||||
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_labels()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * indexing_entry.get_chunk_size();
|
||||
x += chunk_id * chunk_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -100,15 +102,14 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
auto max_chunk = upper_div(ins_barrier, vec_chunk_size);
|
||||
|
||||
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
||||
auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id);
|
||||
|
||||
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
||||
|
||||
auto element_begin = chunk_id * vec_chunk_size;
|
||||
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size);
|
||||
auto chunk_size = element_end - element_begin;
|
||||
|
||||
auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), chunk_size, bitmap_view);
|
||||
auto sub_view = BitsetSubView(bitset, element_begin, chunk_size);
|
||||
auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), chunk_size, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_labels()) {
|
||||
|
@ -133,7 +134,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
const uint8_t* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results) {
|
||||
auto& schema = segment.get_schema();
|
||||
auto& indexing_record = segment.get_indexing_record();
|
||||
|
@ -160,8 +161,6 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
auto total_count = topK * num_queries;
|
||||
|
||||
// step 3: small indexing search
|
||||
// TODO: this is too intrusive
|
||||
// TODO: use QuerySubResult instead
|
||||
query::dataset::BinaryQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
||||
|
||||
auto vec_ptr = record.get_entity<BinaryVector>(vecfield_offset);
|
||||
|
@ -178,8 +177,8 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size);
|
||||
auto nsize = element_end - element_begin;
|
||||
|
||||
auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id);
|
||||
auto sub_result = BinarySearchBruteForce(query_dataset, chunk.data(), nsize, bitmap_view);
|
||||
auto sub_view = BitsetSubView(bitset, element_begin, nsize);
|
||||
auto sub_result = BinarySearchBruteForce(query_dataset, chunk.data(), nsize, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_result.mutable_labels()) {
|
||||
|
|
|
@ -28,7 +28,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
const float* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmap_opt,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results);
|
||||
|
||||
Status
|
||||
|
@ -37,6 +37,6 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
|||
const uint8_t* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results);
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -20,8 +20,9 @@
|
|||
|
||||
namespace milvus::query {
|
||||
|
||||
// negate bitset, and merge them into one
|
||||
aligned_vector<uint8_t>
|
||||
AssembleBitmap(const BitmapSimple& bitmap_simple) {
|
||||
AssembleNegBitmap(const BitmapSimple& bitmap_simple) {
|
||||
int64_t N = 0;
|
||||
|
||||
for (auto& bitmap : bitmap_simple) {
|
||||
|
@ -52,7 +53,7 @@ SearchOnSealed(const Schema& schema,
|
|||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& result) {
|
||||
auto topK = query_info.topK_;
|
||||
|
||||
|
@ -73,12 +74,7 @@ SearchOnSealed(const Schema& schema,
|
|||
auto conf = query_info.search_params_;
|
||||
conf[milvus::knowhere::meta::TOPK] = query_info.topK_;
|
||||
conf[milvus::knowhere::Metric::TYPE] = MetricTypeToName(indexing_entry->metric_type_);
|
||||
if (bitmaps_opt.has_value()) {
|
||||
auto bitmap = AssembleBitmap(*bitmaps_opt.value());
|
||||
return indexing_entry->indexing_->Query(ds, conf, faiss::BitsetView(bitmap.data(), num_queries));
|
||||
} else {
|
||||
return indexing_entry->indexing_->Query(ds, conf, nullptr);
|
||||
}
|
||||
return indexing_entry->indexing_->Query(ds, conf, bitset);
|
||||
}();
|
||||
|
||||
auto ids = final->Get<idx_t*>(knowhere::meta::IDS);
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
#include "query/Search.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
aligned_vector<uint8_t>
|
||||
AssembleNegBitmap(const BitmapSimple& bitmap_simple);
|
||||
|
||||
void
|
||||
SearchOnSealed(const Schema& schema,
|
||||
const segcore::SealedIndexingRecord& record,
|
||||
|
@ -23,7 +27,7 @@ SearchOnSealed(const Schema& schema,
|
|||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
std::optional<const BitmapSimple*> bitmaps_opt,
|
||||
const faiss::BitsetView& view,
|
||||
QueryResult& result);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -67,21 +67,20 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
|||
auto src_data = ph.get_blob<float>();
|
||||
auto num_queries = ph.num_of_queries_;
|
||||
|
||||
ExecExprVisitor::RetType bitmap_holder;
|
||||
std::optional<const ExecExprVisitor::RetType*> bitset_pack;
|
||||
|
||||
aligned_vector<uint8_t> bitset_holder;
|
||||
BitsetView view;
|
||||
if (node.predicate_.has_value()) {
|
||||
bitmap_holder = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
|
||||
bitset_pack = &bitmap_holder;
|
||||
ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
|
||||
bitset_holder = AssembleNegBitmap(expr_ret);
|
||||
view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8);
|
||||
}
|
||||
|
||||
auto& sealed_indexing = segment->get_sealed_indexing_record();
|
||||
|
||||
if (sealed_indexing.is_ready(node.query_info_.field_offset_)) {
|
||||
SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
|
||||
bitset_pack, ret);
|
||||
view, ret);
|
||||
} else {
|
||||
FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret);
|
||||
FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
|
||||
}
|
||||
|
||||
ret_ = ret;
|
||||
|
@ -98,20 +97,20 @@ ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) {
|
|||
auto src_data = ph.get_blob<uint8_t>();
|
||||
auto num_queries = ph.num_of_queries_;
|
||||
|
||||
ExecExprVisitor::RetType bitmap_holder;
|
||||
std::optional<const ExecExprVisitor::RetType*> bitset_pack;
|
||||
|
||||
aligned_vector<uint8_t> bitset_holder;
|
||||
BitsetView view;
|
||||
if (node.predicate_.has_value()) {
|
||||
bitmap_holder = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
|
||||
bitset_pack = &bitmap_holder;
|
||||
ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
|
||||
bitset_holder = AssembleNegBitmap(expr_ret);
|
||||
view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8);
|
||||
}
|
||||
|
||||
auto& sealed_indexing = segment->get_sealed_indexing_record();
|
||||
if (sealed_indexing.is_ready(node.query_info_.field_offset_)) {
|
||||
SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
|
||||
bitset_pack, ret);
|
||||
view, ret);
|
||||
} else {
|
||||
BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret);
|
||||
BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
|
||||
}
|
||||
ret_ = ret;
|
||||
}
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "utils/tools.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
|
@ -22,14 +21,17 @@ TEST(Span, Naive) {
|
|||
int64_t N = 1000 * 1000;
|
||||
constexpr int64_t chunk_size = 32 * 1024;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, MetricType::METRIC_L2);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema, chunk_size);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
auto vec_ptr = dataset.get_col<uint8_t>(0);
|
||||
auto age_ptr = dataset.get_col<float>(1);
|
||||
auto float_ptr = dataset.get_col<float>(2);
|
||||
SegmentInternalInterface& interface = *segment;
|
||||
auto num_chunk = interface.get_safe_num_chunk();
|
||||
ASSERT_EQ(num_chunk, upper_div(N, chunk_size));
|
||||
|
@ -38,6 +40,7 @@ TEST(Span, Naive) {
|
|||
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
auto vec_span = interface.chunk_data<BinaryVector>(FieldOffset(0), chunk_id);
|
||||
auto age_span = interface.chunk_data<float>(FieldOffset(1), chunk_id);
|
||||
auto float_span = interface.chunk_data<FloatVector>(FieldOffset(2), chunk_id);
|
||||
auto begin = chunk_id * chunk_size;
|
||||
auto end = std::min((chunk_id + 1) * chunk_size, N);
|
||||
auto chunk_size = end - begin;
|
||||
|
@ -47,5 +50,8 @@ TEST(Span, Naive) {
|
|||
for (int i = 0; i < chunk_size; ++i) {
|
||||
ASSERT_EQ(age_span.data()[i], age_ptr[i + begin]);
|
||||
}
|
||||
for (int i = 0; i < chunk_size; ++i) {
|
||||
ASSERT_EQ(float_span.data()[i], float_ptr[i + begin * 32]);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue