diff --git a/internal/core/src/common/Json.h b/internal/core/src/common/Json.h index 640dc03724..8fb7b23ca0 100644 --- a/internal/core/src/common/Json.h +++ b/internal/core/src/common/Json.h @@ -157,11 +157,21 @@ class Json { return dom_doc().at_pointer(pointer).get_array(); } + size_t + size() const { + return data_.size(); + } + std::string_view data() const { return data_; } + const char* + c_str() const { + return data_.data(); + } + private: std::optional own_data_{}; // this could be empty, then the Json will be just s view on bytes diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index 28a6ea3119..8eae2a80db 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -209,10 +209,13 @@ class ColumnBase { if (data_ != nullptr) { std::memcpy(data, data_, size_); if (munmap(data_, cap_size_ + padding_)) { + auto err = errno; + munmap(data, new_size + padding_); + AssertInfo( false, "failed to unmap while expanding: {}, old_map_size={}", - strerror(errno), + strerror(err), cap_size_ + padding_); } } @@ -307,10 +310,14 @@ class VariableColumn : public ColumnBase { } void - Append(const char* data, size_t size) { - indices_.emplace_back(size_); - size_ += size; - load_buf_.emplace(data, size); + Append(storage::FieldDataPtr chunk) { + for (auto i = 0; i < chunk->get_num_rows(); i++) { + auto data = static_cast(chunk->RawValue(i)); + + indices_.emplace_back(size_); + size_ += data->size(); + } + load_buf_.emplace(std::move(chunk)); } void @@ -328,11 +335,14 @@ class VariableColumn : public ColumnBase { Expand(total_size); while (!load_buf_.empty()) { - auto data = std::move(load_buf_.front()); + auto chunk = std::move(load_buf_.front()); load_buf_.pop(); - std::copy_n(data.data(), data.length(), data_ + size_); - size_ += data.length(); + for (auto i = 0; i < chunk->get_num_rows(); i++) { + auto data = static_cast(chunk->RawValue(i)); + std::copy_n(data->c_str(), data->size(), data_ + size_); + size_ += data->size(); + } } } @@ -352,7 +362,7 @@ class VariableColumn : public ColumnBase { private: // loading states - std::queue load_buf_{}; + std::queue load_buf_{}; std::vector indices_{}; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 730aaf01d0..80054ea884 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -330,15 +330,10 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { num_rows, field_meta); storage::FieldDataPtr field_data; while (data.channel->pop(field_data)) { - for (auto i = 0; i < field_data->get_num_rows(); i++) { - auto str = static_cast( - field_data->RawValue(i)); - auto str_size = str->size(); - var_column->Append(str->data(), str_size); - field_data_size += str_size; - } + var_column->Append(std::move(field_data)); } var_column->Seal(); + field_data_size = var_column->ByteSize(); column = std::move(var_column); break; } @@ -348,18 +343,10 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { num_rows, field_meta); storage::FieldDataPtr field_data; while (data.channel->pop(field_data)) { - for (auto i = 0; i < field_data->get_num_rows(); i++) { - auto padded_string = - static_cast( - field_data->RawValue(i)) - ->data(); - auto padded_string_size = padded_string.size(); - var_column->Append(padded_string.data(), - padded_string_size); - field_data_size += padded_string_size; - } + var_column->Append(std::move(field_data)); } var_column->Seal(); + field_data_size = var_column->ByteSize(); column = std::move(var_column); break; } @@ -373,6 +360,9 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { auto array = static_cast(rawValue); var_column->Append(*array); + // we stores the offset for each array element, so there is a additional uint64_t for each array element + field_data_size = + array->byte_size() + sizeof(uint64_t); } } var_column->Seal(); diff --git a/internal/core/unittest/test_disk_file_manager_test.cpp b/internal/core/unittest/test_disk_file_manager_test.cpp index 310dec776c..3a5a20c1fa 100644 --- a/internal/core/unittest/test_disk_file_manager_test.cpp +++ b/internal/core/unittest/test_disk_file_manager_test.cpp @@ -11,6 +11,8 @@ #include #include +#include +#include #include #include #include