mirror of https://github.com/milvus-io/milvus.git
enhance: add valid_data in span (#35030)
#31728 Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>pull/35103/head
parent
f466129924
commit
475c333fa2
|
@ -33,6 +33,15 @@ class SpanBase {
|
|||
int64_t element_sizeof)
|
||||
: data_(data), row_count_(row_count), element_sizeof_(element_sizeof) {
|
||||
}
|
||||
explicit SpanBase(const void* data,
|
||||
const bool* valid_data,
|
||||
int64_t row_count,
|
||||
int64_t element_sizeof)
|
||||
: data_(data),
|
||||
valid_data_(valid_data),
|
||||
row_count_(row_count),
|
||||
element_sizeof_(element_sizeof) {
|
||||
}
|
||||
|
||||
int64_t
|
||||
row_count() const {
|
||||
|
@ -49,8 +58,14 @@ class SpanBase {
|
|||
return data_;
|
||||
}
|
||||
|
||||
const bool*
|
||||
valid_data() const {
|
||||
return valid_data_;
|
||||
}
|
||||
|
||||
private:
|
||||
const void* data_;
|
||||
const bool* valid_data_{nullptr};
|
||||
int64_t row_count_;
|
||||
int64_t element_sizeof_;
|
||||
};
|
||||
|
@ -65,20 +80,22 @@ class Span<T,
|
|||
std::is_same_v<T, PkType>>> {
|
||||
public:
|
||||
using embedded_type = T;
|
||||
explicit Span(const T* data, int64_t row_count)
|
||||
: data_(data), row_count_(row_count) {
|
||||
explicit Span(const T* data, const bool* valid_data, int64_t row_count)
|
||||
: data_(data), valid_data_(valid_data), row_count_(row_count) {
|
||||
}
|
||||
|
||||
explicit Span(std::string_view data) {
|
||||
Span(data.data(), data.size());
|
||||
explicit Span(std::string_view data, bool* valid_data) {
|
||||
Span(data.data(), valid_data, data.size());
|
||||
}
|
||||
|
||||
operator SpanBase() const {
|
||||
return SpanBase(data_, row_count_, sizeof(T));
|
||||
return SpanBase(data_, valid_data_, row_count_, sizeof(T));
|
||||
}
|
||||
|
||||
explicit Span(const SpanBase& base)
|
||||
: Span(reinterpret_cast<const T*>(base.data()), base.row_count()) {
|
||||
: Span(reinterpret_cast<const T*>(base.data()),
|
||||
base.valid_data(),
|
||||
base.row_count()) {
|
||||
assert(base.element_sizeof() == sizeof(T));
|
||||
}
|
||||
|
||||
|
@ -92,6 +109,11 @@ class Span<T,
|
|||
return data_;
|
||||
}
|
||||
|
||||
const bool*
|
||||
valid_data() const {
|
||||
return valid_data_;
|
||||
}
|
||||
|
||||
const T&
|
||||
operator[](int64_t offset) const {
|
||||
return data_[offset];
|
||||
|
@ -104,6 +126,7 @@ class Span<T,
|
|||
|
||||
private:
|
||||
const T* data_;
|
||||
const bool* valid_data_;
|
||||
const int64_t row_count_;
|
||||
};
|
||||
|
||||
|
|
|
@ -77,7 +77,8 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
auto chunk_data =
|
||||
segment_->chunk_view<std::string_view>(field_id, chunk_id).data();
|
||||
segment_->chunk_view<std::string_view>(field_id, chunk_id)
|
||||
.first.data();
|
||||
return [chunk_data](int i) -> const number {
|
||||
return std::string(chunk_data[i]);
|
||||
};
|
||||
|
|
|
@ -206,8 +206,11 @@ class SegmentExpr : public Expr {
|
|||
|
||||
auto& skip_index = segment_->GetSkipIndex();
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||
auto data_vec = segment_->get_batch_views<T>(
|
||||
field_id_, 0, current_data_chunk_pos_, need_size);
|
||||
auto data_vec =
|
||||
segment_
|
||||
->get_batch_views<T>(
|
||||
field_id_, 0, current_data_chunk_pos_, need_size)
|
||||
.first;
|
||||
|
||||
func(data_vec.data(), need_size, res, values...);
|
||||
}
|
||||
|
|
|
@ -34,6 +34,10 @@ class ChunkVectorBase {
|
|||
get_chunk_size(int64_t index) = 0;
|
||||
virtual Type
|
||||
get_element(int64_t chunk_id, int64_t chunk_offset) = 0;
|
||||
virtual int64_t
|
||||
get_element_size() = 0;
|
||||
virtual int64_t
|
||||
get_element_offset(int64_t index) = 0;
|
||||
virtual ChunkViewType<Type>
|
||||
view_element(int64_t chunk_id, int64_t chunk_offset) = 0;
|
||||
int64_t
|
||||
|
@ -166,6 +170,25 @@ class ThreadSafeChunkVector : public ChunkVectorBase<Type> {
|
|||
vec_.clear();
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_element_size() override {
|
||||
std::shared_lock<std::shared_mutex> lck(mutex_);
|
||||
if constexpr (IsMmap && std::is_same_v<std::string, Type>) {
|
||||
return sizeof(ChunkViewType<Type>);
|
||||
}
|
||||
return sizeof(Type);
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_element_offset(int64_t index) override {
|
||||
std::shared_lock<std::shared_mutex> lck(mutex_);
|
||||
int64_t offset = 0;
|
||||
for (int i = 0; i < index - 1; i++) {
|
||||
offset += vec_[i].size();
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
SpanBase
|
||||
get_span(int64_t chunk_id) override {
|
||||
std::shared_lock<std::shared_mutex> lck(mutex_);
|
||||
|
|
|
@ -72,6 +72,10 @@ class ColumnBase {
|
|||
SetPaddingSize(data_type);
|
||||
|
||||
if (IsVariableDataType(data_type)) {
|
||||
if (field_meta.is_nullable()) {
|
||||
nullable_ = true;
|
||||
valid_data_.reserve(reserve);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -214,7 +218,7 @@ class ColumnBase {
|
|||
ColumnBase(ColumnBase&& column) noexcept
|
||||
: data_(column.data_),
|
||||
nullable_(column.nullable_),
|
||||
valid_data_(column.valid_data_),
|
||||
valid_data_(std::move(column.valid_data_)),
|
||||
padding_(column.padding_),
|
||||
type_size_(column.type_size_),
|
||||
num_rows_(column.num_rows_),
|
||||
|
@ -282,7 +286,7 @@ class ColumnBase {
|
|||
"GetBatchBuffer only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::vector<std::string_view>
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
StringViews() const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"StringViews only supported for VariableColumn");
|
||||
|
@ -519,7 +523,8 @@ class Column : public ColumnBase {
|
|||
|
||||
SpanBase
|
||||
Span() const override {
|
||||
return SpanBase(data_, num_rows_, data_cap_size_ / num_rows_);
|
||||
return SpanBase(
|
||||
data_, valid_data_.data(), num_rows_, data_cap_size_ / num_rows_);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -681,7 +686,7 @@ class VariableColumn : public ColumnBase {
|
|||
"span() interface is not implemented for variable column");
|
||||
}
|
||||
|
||||
std::vector<std::string_view>
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
StringViews() const override {
|
||||
std::vector<std::string_view> res;
|
||||
char* pos = data_;
|
||||
|
@ -692,7 +697,7 @@ class VariableColumn : public ColumnBase {
|
|||
res.emplace_back(std::string_view(pos, size));
|
||||
pos += size;
|
||||
}
|
||||
return res;
|
||||
return std::make_pair(res, valid_data_);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::vector<ViewType>
|
||||
|
@ -861,7 +866,10 @@ class ArrayColumn : public ColumnBase {
|
|||
|
||||
SpanBase
|
||||
Span() const override {
|
||||
return SpanBase(views_.data(), views_.size(), sizeof(ArrayView));
|
||||
return SpanBase(views_.data(),
|
||||
valid_data_.data(),
|
||||
views_.size(),
|
||||
sizeof(ArrayView));
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::vector<ArrayView>&
|
||||
|
@ -885,8 +893,8 @@ class ArrayColumn : public ColumnBase {
|
|||
element_indices_.emplace_back(array.get_offsets());
|
||||
if (nullable_) {
|
||||
return ColumnBase::Append(static_cast<const char*>(array.data()),
|
||||
array.byte_size(),
|
||||
valid_data);
|
||||
valid_data,
|
||||
array.byte_size());
|
||||
}
|
||||
ColumnBase::Append(static_cast<const char*>(array.data()),
|
||||
array.byte_size());
|
||||
|
|
|
@ -68,11 +68,12 @@ class SealedDataGetter : public DataGetter<T> {
|
|||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
str_field_data_ =
|
||||
std::make_shared<std::vector<std::string_view>>(
|
||||
segment.chunk_view<std::string_view>(field_id, 0));
|
||||
segment.chunk_view<std::string_view>(field_id, 0)
|
||||
.first);
|
||||
} else {
|
||||
auto span = segment.chunk_data<T>(field_id, 0);
|
||||
field_data_ =
|
||||
std::make_shared<Span<T>>(span.data(), span.row_count());
|
||||
field_data_ = std::make_shared<Span<T>>(
|
||||
span.data(), span.valid_data(), span.row_count());
|
||||
}
|
||||
} else if (segment.HasIndex(field_id)) {
|
||||
this->field_index_ = &(segment.chunk_scalar_index<T>(field_id, 0));
|
||||
|
|
|
@ -128,6 +128,12 @@ class VectorBase {
|
|||
virtual int64_t
|
||||
get_chunk_size(ssize_t chunk_index) const = 0;
|
||||
|
||||
virtual int64_t
|
||||
get_element_size() const = 0;
|
||||
|
||||
virtual int64_t
|
||||
get_element_offset(ssize_t chunk_index) const = 0;
|
||||
|
||||
virtual ssize_t
|
||||
num_chunk() const = 0;
|
||||
|
||||
|
@ -245,6 +251,26 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
return chunks_ptr_->get_chunk_size(chunk_index);
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_element_size() const override {
|
||||
if constexpr (is_type_entire_row) {
|
||||
return chunks_ptr_->get_element_size();
|
||||
} else if constexpr (std::is_same_v<Type, int64_t> || // NOLINT
|
||||
std::is_same_v<Type, int>) {
|
||||
// only for testing
|
||||
PanicInfo(NotImplemented, "unimplemented");
|
||||
} else {
|
||||
static_assert(
|
||||
std::is_same_v<typename TraitType::embedded_type, Type>);
|
||||
return elements_per_row_;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_element_offset(ssize_t chunk_index) const override {
|
||||
return chunks_ptr_->get_element_offset(chunk_index);
|
||||
}
|
||||
|
||||
// just for fun, don't use it directly
|
||||
const Type*
|
||||
get_element(ssize_t element_index) const {
|
||||
|
|
|
@ -460,6 +460,13 @@ class ThreadSafeValidData {
|
|||
return data_[offset];
|
||||
}
|
||||
|
||||
bool*
|
||||
get_chunk_data(size_t offset) {
|
||||
std::shared_lock<std::shared_mutex> lck(mutex_);
|
||||
Assert(offset < length_);
|
||||
return &data_[offset];
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::shared_mutex mutex_{};
|
||||
FixedVector<bool> data_;
|
||||
|
@ -770,10 +777,30 @@ struct InsertRecord {
|
|||
}
|
||||
|
||||
bool
|
||||
is_valid_data_exist(FieldId field_id) {
|
||||
is_data_exist(FieldId field_id) const {
|
||||
return data_.find(field_id) != data_.end();
|
||||
}
|
||||
|
||||
bool
|
||||
is_valid_data_exist(FieldId field_id) const {
|
||||
return valid_data_.find(field_id) != valid_data_.end();
|
||||
}
|
||||
|
||||
SpanBase
|
||||
get_span_base(FieldId field_id, int64_t chunk_id) const {
|
||||
auto data = get_data_base(field_id);
|
||||
if (is_valid_data_exist(field_id)) {
|
||||
auto size = data->get_chunk_size(chunk_id);
|
||||
auto element_offset = data->get_element_offset(chunk_id);
|
||||
return SpanBase(
|
||||
data->get_chunk_data(chunk_id),
|
||||
get_valid_data(field_id)->get_chunk_data(element_offset),
|
||||
size,
|
||||
data->get_element_size());
|
||||
}
|
||||
return data->get_span_base(chunk_id);
|
||||
}
|
||||
|
||||
// append a column of scalar or sparse float vector type
|
||||
template <typename Type>
|
||||
void
|
||||
|
|
|
@ -345,11 +345,10 @@ SegmentGrowingImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) {
|
|||
|
||||
SpanBase
|
||||
SegmentGrowingImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
auto vec = get_insert_record().get_data_base(field_id);
|
||||
return vec->get_span_base(chunk_id);
|
||||
return get_insert_record().get_span_base(field_id, chunk_id);
|
||||
}
|
||||
|
||||
std::vector<std::string_view>
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
PanicInfo(ErrorCode::NotImplemented,
|
||||
"chunk view impl not implement for growing segment");
|
||||
|
|
|
@ -76,6 +76,14 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
return id_;
|
||||
}
|
||||
|
||||
bool
|
||||
is_nullable(FieldId field_id) const override {
|
||||
AssertInfo(insert_record_.is_data_exist(field_id),
|
||||
"Cannot find field_data with field_id: " +
|
||||
std::to_string(field_id.get()));
|
||||
return insert_record_.is_valid_data_exist(field_id);
|
||||
};
|
||||
|
||||
public:
|
||||
const InsertRecord<>&
|
||||
get_insert_record() const {
|
||||
|
@ -318,10 +326,10 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
SpanBase
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
std::vector<std::string_view>
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
BufferView
|
||||
std::pair<BufferView, FixedVector<bool>>
|
||||
get_chunk_buffer(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
int64_t start_offset,
|
||||
|
|
|
@ -126,6 +126,9 @@ class SegmentInterface {
|
|||
|
||||
virtual bool
|
||||
HasRawData(int64_t field_id) const = 0;
|
||||
|
||||
virtual bool
|
||||
is_nullable(FieldId field_id) const = 0;
|
||||
};
|
||||
|
||||
// internal API for DSL calculation
|
||||
|
@ -139,23 +142,26 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
}
|
||||
|
||||
template <typename ViewType>
|
||||
std::vector<ViewType>
|
||||
std::pair<std::vector<ViewType>, FixedVector<bool>>
|
||||
chunk_view(FieldId field_id, int64_t chunk_id) const {
|
||||
auto string_views = chunk_view_impl(field_id, chunk_id);
|
||||
auto chunk_info = chunk_view_impl(field_id, chunk_id);
|
||||
auto string_views = chunk_info.first;
|
||||
auto valid_data = chunk_info.second;
|
||||
if constexpr (std::is_same_v<ViewType, std::string_view>) {
|
||||
return std::move(string_views);
|
||||
return std::make_pair(std::move(string_views),
|
||||
std::move(valid_data));
|
||||
} else {
|
||||
std::vector<ViewType> res;
|
||||
res.reserve(string_views.size());
|
||||
for (const auto& view : string_views) {
|
||||
res.emplace_back(view);
|
||||
}
|
||||
return res;
|
||||
return std::make_pair(res, valid_data);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ViewType>
|
||||
std::vector<ViewType>
|
||||
std::pair<std::vector<ViewType>, FixedVector<bool>>
|
||||
get_batch_views(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
int64_t start_offset,
|
||||
|
@ -164,8 +170,9 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
PanicInfo(ErrorCode::Unsupported,
|
||||
"get chunk views not supported for growing segment");
|
||||
}
|
||||
BufferView buffer =
|
||||
auto chunk_info =
|
||||
get_chunk_buffer(field_id, chunk_id, start_offset, length);
|
||||
BufferView buffer = chunk_info.first;
|
||||
std::vector<ViewType> res;
|
||||
res.reserve(length);
|
||||
char* pos = buffer.data_;
|
||||
|
@ -176,7 +183,7 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
res.emplace_back(ViewType(pos, size));
|
||||
pos += size;
|
||||
}
|
||||
return res;
|
||||
return std::make_pair(res, chunk_info.second);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -352,16 +359,17 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
is_mmap_field(FieldId field_id) const = 0;
|
||||
|
||||
protected:
|
||||
// todo: use an Unified struct for all type in growing/seal segment to store data and valid_data.
|
||||
// internal API: return chunk_data in span
|
||||
virtual SpanBase
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
||||
|
||||
// internal API: return chunk string views in vector
|
||||
virtual std::vector<std::string_view>
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
||||
|
||||
// internal API: return buffer reference to field chunk data located from start_offset
|
||||
virtual BufferView
|
||||
virtual std::pair<BufferView, FixedVector<bool>>
|
||||
get_chunk_buffer(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
int64_t start_offset,
|
||||
|
|
|
@ -644,7 +644,7 @@ SegmentSealedImpl::size_per_chunk() const {
|
|||
return get_row_count();
|
||||
}
|
||||
|
||||
BufferView
|
||||
std::pair<BufferView, FixedVector<bool>>
|
||||
SegmentSealedImpl::get_chunk_buffer(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
int64_t start_offset,
|
||||
|
@ -655,7 +655,15 @@ SegmentSealedImpl::get_chunk_buffer(FieldId field_id,
|
|||
auto& field_meta = schema_->operator[](field_id);
|
||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||
auto& field_data = it->second;
|
||||
return field_data->GetBatchBuffer(start_offset, length);
|
||||
FixedVector<bool> valid_data;
|
||||
if (field_data->IsNullable()) {
|
||||
valid_data.reserve(length);
|
||||
for (int i = 0; i < length; i++) {
|
||||
valid_data.push_back(field_data->IsValid(start_offset + i));
|
||||
}
|
||||
}
|
||||
return std::make_pair(field_data->GetBatchBuffer(start_offset, length),
|
||||
valid_data);
|
||||
}
|
||||
PanicInfo(ErrorCode::UnexpectedError,
|
||||
"get_chunk_buffer only used for variable column field");
|
||||
|
@ -680,10 +688,11 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
|
|||
auto field_data = insert_record_.get_data_base(field_id);
|
||||
AssertInfo(field_data->num_chunk() == 1,
|
||||
"num chunk not equal to 1 for sealed segment");
|
||||
// system field
|
||||
return field_data->get_span_base(0);
|
||||
}
|
||||
|
||||
std::vector<std::string_view>
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||
|
|
|
@ -117,6 +117,15 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
return insert_record_.search_pk(pk, ts);
|
||||
}
|
||||
|
||||
bool
|
||||
is_nullable(FieldId field_id) const override {
|
||||
auto it = fields_.find(field_id);
|
||||
AssertInfo(it != fields_.end(),
|
||||
"Cannot find field with field_id: " +
|
||||
std::to_string(field_id.get()));
|
||||
return it->second->IsNullable();
|
||||
};
|
||||
|
||||
public:
|
||||
int64_t
|
||||
num_chunk_index(FieldId field_id) const override;
|
||||
|
@ -167,10 +176,10 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
SpanBase
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
std::vector<std::string_view>
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
BufferView
|
||||
std::pair<BufferView, FixedVector<bool>>
|
||||
get_chunk_buffer(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
int64_t start_offset,
|
||||
|
|
|
@ -19,7 +19,7 @@ TEST(Common, Span) {
|
|||
using namespace milvus;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
Span<float> s1(nullptr, 100);
|
||||
Span<float> s1(nullptr, nullptr, 100);
|
||||
Span<milvus::FloatVector> s2(nullptr, 10, 16 * sizeof(float));
|
||||
SpanBase b1 = s1;
|
||||
SpanBase b2 = s2;
|
||||
|
|
|
@ -408,6 +408,20 @@ TEST(Sealed, LoadFieldData) {
|
|||
schema->AddDebugField("json", DataType::JSON);
|
||||
schema->AddDebugField("array", DataType::ARRAY, DataType::INT64);
|
||||
schema->set_primary_field_id(counter_id);
|
||||
auto int8_nullable_id =
|
||||
schema->AddDebugField("int8_null", DataType::INT8, true);
|
||||
auto int16_nullable_id =
|
||||
schema->AddDebugField("int16_null", DataType::INT16, true);
|
||||
auto int32_nullable_id =
|
||||
schema->AddDebugField("int32_null", DataType::INT32, true);
|
||||
auto int64_nullable_id =
|
||||
schema->AddDebugField("int64_null", DataType::INT64, true);
|
||||
auto double_nullable_id =
|
||||
schema->AddDebugField("double_null", DataType::DOUBLE, true);
|
||||
auto str_nullable_id =
|
||||
schema->AddDebugField("str_null", DataType::VARCHAR, true);
|
||||
auto float_nullable_id =
|
||||
schema->AddDebugField("float_null", DataType::FLOAT, true);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
|
||||
|
@ -500,13 +514,49 @@ TEST(Sealed, LoadFieldData) {
|
|||
auto chunk_span2 = segment->chunk_data<double>(double_id, 0);
|
||||
auto chunk_span3 =
|
||||
segment->get_batch_views<std::string_view>(str_id, 0, 0, N);
|
||||
auto chunk_span4 = segment->chunk_data<int8_t>(int8_nullable_id, 0);
|
||||
auto chunk_span5 = segment->chunk_data<int16_t>(int16_nullable_id, 0);
|
||||
auto chunk_span6 = segment->chunk_data<int32_t>(int32_nullable_id, 0);
|
||||
auto chunk_span7 = segment->chunk_data<int64_t>(int64_nullable_id, 0);
|
||||
auto chunk_span8 = segment->chunk_data<double>(double_nullable_id, 0);
|
||||
auto chunk_span9 =
|
||||
segment->get_batch_views<std::string_view>(str_nullable_id, 0, 0, N);
|
||||
|
||||
auto ref1 = dataset.get_col<int64_t>(counter_id);
|
||||
auto ref2 = dataset.get_col<double>(double_id);
|
||||
auto ref3 = dataset.get_col(str_id)->scalars().string_data().data();
|
||||
auto ref4 = dataset.get_col<int8_t>(int8_nullable_id);
|
||||
auto ref5 = dataset.get_col<int16_t>(int16_nullable_id);
|
||||
auto ref6 = dataset.get_col<int32_t>(int32_nullable_id);
|
||||
auto ref7 = dataset.get_col<int64_t>(int64_nullable_id);
|
||||
auto ref8 = dataset.get_col<double>(double_nullable_id);
|
||||
auto ref9 =
|
||||
dataset.get_col(str_nullable_id)->scalars().string_data().data();
|
||||
auto valid4 = dataset.get_col_valid(int8_nullable_id);
|
||||
auto valid5 = dataset.get_col_valid(int16_nullable_id);
|
||||
auto valid6 = dataset.get_col_valid(int32_nullable_id);
|
||||
auto valid7 = dataset.get_col_valid(int64_nullable_id);
|
||||
auto valid8 = dataset.get_col_valid(double_nullable_id);
|
||||
auto valid9 = dataset.get_col_valid(str_nullable_id);
|
||||
ASSERT_EQ(chunk_span1.valid_data(), nullptr);
|
||||
ASSERT_EQ(chunk_span2.valid_data(), nullptr);
|
||||
ASSERT_EQ(chunk_span3.second.size(), 0);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
ASSERT_EQ(chunk_span1[i], ref1[i]);
|
||||
ASSERT_EQ(chunk_span2[i], ref2[i]);
|
||||
ASSERT_EQ(chunk_span3[i], ref3[i]);
|
||||
ASSERT_EQ(chunk_span1.data()[i], ref1[i]);
|
||||
ASSERT_EQ(chunk_span2.data()[i], ref2[i]);
|
||||
ASSERT_EQ(chunk_span3.first[i], ref3[i]);
|
||||
ASSERT_EQ(chunk_span4.data()[i], ref4[i]);
|
||||
ASSERT_EQ(chunk_span5.data()[i], ref5[i]);
|
||||
ASSERT_EQ(chunk_span6.data()[i], ref6[i]);
|
||||
ASSERT_EQ(chunk_span7.data()[i], ref7[i]);
|
||||
ASSERT_EQ(chunk_span8.data()[i], ref8[i]);
|
||||
ASSERT_EQ(chunk_span9.first[i], ref9[i]);
|
||||
ASSERT_EQ(chunk_span4.valid_data()[i], valid4[i]);
|
||||
ASSERT_EQ(chunk_span5.valid_data()[i], valid5[i]);
|
||||
ASSERT_EQ(chunk_span6.valid_data()[i], valid6[i]);
|
||||
ASSERT_EQ(chunk_span7.valid_data()[i], valid7[i]);
|
||||
ASSERT_EQ(chunk_span8.valid_data()[i], valid8[i]);
|
||||
ASSERT_EQ(chunk_span9.second[i], valid9[i]);
|
||||
}
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), timestamp);
|
||||
|
@ -630,10 +680,11 @@ TEST(Sealed, ClearData) {
|
|||
auto ref1 = dataset.get_col<int64_t>(counter_id);
|
||||
auto ref2 = dataset.get_col<double>(double_id);
|
||||
auto ref3 = dataset.get_col(str_id)->scalars().string_data().data();
|
||||
ASSERT_EQ(chunk_span3.second.size(), 0);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
ASSERT_EQ(chunk_span1[i], ref1[i]);
|
||||
ASSERT_EQ(chunk_span2[i], ref2[i]);
|
||||
ASSERT_EQ(chunk_span3[i], ref3[i]);
|
||||
ASSERT_EQ(chunk_span3.first[i], ref3[i]);
|
||||
}
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), timestamp);
|
||||
|
@ -733,10 +784,11 @@ TEST(Sealed, LoadFieldDataMmap) {
|
|||
auto ref1 = dataset.get_col<int64_t>(counter_id);
|
||||
auto ref2 = dataset.get_col<double>(double_id);
|
||||
auto ref3 = dataset.get_col(str_id)->scalars().string_data().data();
|
||||
ASSERT_EQ(chunk_span3.second.size(), 0);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
ASSERT_EQ(chunk_span1[i], ref1[i]);
|
||||
ASSERT_EQ(chunk_span2[i], ref2[i]);
|
||||
ASSERT_EQ(chunk_span3[i], ref3[i]);
|
||||
ASSERT_EQ(chunk_span3.first[i], ref3[i]);
|
||||
}
|
||||
|
||||
auto sr = segment->Search(plan.get(), ph_group.get(), timestamp);
|
||||
|
|
|
@ -29,6 +29,8 @@ TEST(Span, Naive) {
|
|||
auto float_vec_fid = schema->AddDebugField(
|
||||
"floatvec", DataType::VECTOR_FLOAT, 32, knowhere::metric::L2);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
auto nullable_fid =
|
||||
schema->AddDebugField("nullable", DataType::INT64, true);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
|
@ -42,6 +44,8 @@ TEST(Span, Naive) {
|
|||
auto vec_ptr = dataset.get_col<uint8_t>(bin_vec_fid);
|
||||
auto age_ptr = dataset.get_col<float>(float_fid);
|
||||
auto float_ptr = dataset.get_col<float>(float_vec_fid);
|
||||
auto nullable_data_ptr = dataset.get_col<int64_t>(nullable_fid);
|
||||
auto nullable_valid_data_ptr = dataset.get_col_valid(nullable_fid);
|
||||
auto num_chunk = segment->num_chunk();
|
||||
ASSERT_EQ(num_chunk, upper_div(N, size_per_chunk));
|
||||
auto row_count = segment->get_row_count();
|
||||
|
@ -52,9 +56,12 @@ TEST(Span, Naive) {
|
|||
auto age_span = segment->chunk_data<float>(float_fid, chunk_id);
|
||||
auto float_span =
|
||||
segment->chunk_data<milvus::FloatVector>(float_vec_fid, chunk_id);
|
||||
auto null_field_span =
|
||||
segment->chunk_data<int64_t>(nullable_fid, chunk_id);
|
||||
auto begin = chunk_id * size_per_chunk;
|
||||
auto end = std::min((chunk_id + 1) * size_per_chunk, N);
|
||||
auto size_of_chunk = end - begin;
|
||||
ASSERT_EQ(age_span.valid_data(), nullptr);
|
||||
for (int i = 0; i < size_of_chunk * 512 / 8; ++i) {
|
||||
ASSERT_EQ(vec_span.data()[i], vec_ptr[i + begin * 512 / 8]);
|
||||
}
|
||||
|
@ -64,5 +71,12 @@ TEST(Span, Naive) {
|
|||
for (int i = 0; i < size_of_chunk; ++i) {
|
||||
ASSERT_EQ(float_span.data()[i], float_ptr[i + begin * 32]);
|
||||
}
|
||||
for (int i = 0; i < size_of_chunk; ++i) {
|
||||
ASSERT_EQ(null_field_span.data()[i], nullable_data_ptr[i + begin]);
|
||||
}
|
||||
for (int i = 0; i < size_of_chunk; ++i) {
|
||||
ASSERT_EQ(null_field_span.valid_data()[i],
|
||||
nullable_valid_data_ptr[i + begin]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue