mirror of https://github.com/milvus-io/milvus.git
fix: search-group-by failed to get data from multi-chunked-segment(##… (#38383)
related: #38343 Signed-off-by: MrPresent-Han <chun.han@gmail.com> Co-authored-by: MrPresent-Han <chun.han@gmail.com>pull/38536/head
parent
3038383e36
commit
c1f9158996
|
@ -61,50 +61,53 @@ class GrowingDataGetter : public DataGetter<T> {
|
|||
template <typename T>
|
||||
class SealedDataGetter : public DataGetter<T> {
|
||||
private:
|
||||
std::shared_ptr<Span<T>> field_data_;
|
||||
std::shared_ptr<std::vector<std::string_view>> str_field_data_;
|
||||
const index::ScalarIndex<T>* field_index_;
|
||||
const segcore::SegmentSealed& segment_;
|
||||
const FieldId field_id_;
|
||||
bool from_data_;
|
||||
|
||||
mutable std::unordered_map<int64_t, std::vector<std::string_view>>
|
||||
str_view_map_;
|
||||
// Getting str_view from segment is cpu-costly, this map is to cache this view for performance
|
||||
public:
|
||||
SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id) {
|
||||
if (segment.HasFieldData(field_id)) {
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
str_field_data_ =
|
||||
std::make_shared<std::vector<std::string_view>>(
|
||||
segment.chunk_view<std::string_view>(field_id, 0)
|
||||
.first);
|
||||
} else {
|
||||
auto span = segment.chunk_data<T>(field_id, 0);
|
||||
field_data_ = std::make_shared<Span<T>>(
|
||||
span.data(), span.valid_data(), span.row_count());
|
||||
}
|
||||
} else if (segment.HasIndex(field_id)) {
|
||||
this->field_index_ = &(segment.chunk_scalar_index<T>(field_id, 0));
|
||||
} else {
|
||||
PanicInfo(UnexpectedError,
|
||||
"The segment used to init data getter has no effective "
|
||||
"data source, neither"
|
||||
"index or data");
|
||||
SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id)
|
||||
: segment_(segment), field_id_(field_id) {
|
||||
from_data_ = segment_.HasFieldData(field_id_);
|
||||
if (!from_data_ && !segment_.HasIndex(field_id_)) {
|
||||
PanicInfo(
|
||||
UnexpectedError,
|
||||
"The segment:{} used to init data getter has no effective "
|
||||
"data source, neither"
|
||||
"index or data",
|
||||
segment_.get_segment_id());
|
||||
}
|
||||
}
|
||||
|
||||
SealedDataGetter(const SealedDataGetter<T>& other)
|
||||
: field_data_(other.field_data_),
|
||||
str_field_data_(other.str_field_data_),
|
||||
field_index_(other.field_index_) {
|
||||
}
|
||||
|
||||
T
|
||||
Get(int64_t idx) const {
|
||||
if (field_data_ || str_field_data_) {
|
||||
if (from_data_) {
|
||||
auto id_offset_pair = segment_.get_chunk_by_offset(field_id_, idx);
|
||||
auto chunk_id = id_offset_pair.first;
|
||||
auto inner_offset = id_offset_pair.second;
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
if (str_view_map_.find(chunk_id) == str_view_map_.end()) {
|
||||
// for now, search_group_by does not handle null values
|
||||
auto [str_chunk_view, _] =
|
||||
segment_.chunk_view<std::string_view>(field_id_,
|
||||
chunk_id);
|
||||
str_view_map_[chunk_id] = std::move(str_chunk_view);
|
||||
}
|
||||
auto& str_chunk_view = str_view_map_[chunk_id];
|
||||
std::string_view str_val_view =
|
||||
str_field_data_->operator[](idx);
|
||||
str_chunk_view.operator[](inner_offset);
|
||||
return std::string(str_val_view.data(), str_val_view.length());
|
||||
} else {
|
||||
Span<T> span = segment_.chunk_data<T>(field_id_, chunk_id);
|
||||
auto raw = span.operator[](inner_offset);
|
||||
return raw;
|
||||
}
|
||||
return field_data_->operator[](idx);
|
||||
} else {
|
||||
auto raw = (*field_index_).Reverse_Lookup(idx);
|
||||
auto& chunk_index = segment_.chunk_scalar_index<T>(field_id_, 0);
|
||||
auto raw = chunk_index.Reverse_Lookup(idx);
|
||||
AssertInfo(raw.has_value(), "field data not found");
|
||||
return raw.value();
|
||||
}
|
||||
|
|
|
@ -134,7 +134,7 @@ class ScalarIndex : public IndexBase {
|
|||
}
|
||||
|
||||
virtual bool
|
||||
IsMmapSupported() const {
|
||||
IsMmapSupported() const override {
|
||||
return index_type_ == milvus::index::BITMAP_INDEX_TYPE ||
|
||||
index_type_ == milvus::index::HYBRID_INDEX_TYPE;
|
||||
}
|
||||
|
|
|
@ -165,7 +165,14 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
|
||||
std::pair<int64_t, int64_t>
|
||||
get_chunk_by_offset(FieldId field_id, int64_t offset) const override {
|
||||
PanicInfo(ErrorCode::Unsupported, "Not implemented");
|
||||
if (fields_.find(field_id) == fields_.end()) {
|
||||
PanicInfo(
|
||||
ErrorCode::FieldIDInvalid,
|
||||
"Failed to get chunk offset towards a non-existing field:{}",
|
||||
field_id.get());
|
||||
}
|
||||
// for sealed segment, chunk id is always zero and input offset is the target offset
|
||||
return std::make_pair(0, offset);
|
||||
}
|
||||
|
||||
int64_t
|
||||
|
|
Loading…
Reference in New Issue