mirror of https://github.com/milvus-io/milvus.git
enhance: redefine variable column block size (#35040)
#35013 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>pull/34672/head
parent
273e9203f4
commit
dd0c26cf58
|
@ -53,7 +53,10 @@ namespace milvus {
|
|||
*/
|
||||
constexpr size_t STRING_PADDING = 1;
|
||||
constexpr size_t ARRAY_PADDING = 1;
|
||||
constexpr size_t BLOCK_SIZE = 8192;
|
||||
|
||||
constexpr size_t DEFAULT_PK_VRCOL_BLOCK_SIZE = 1;
|
||||
constexpr size_t DEFAULT_MEM_VRCOL_BLOCK_SIZE = 32;
|
||||
constexpr size_t DEFAULT_MMAP_VRCOL_BLOCK_SIZE = 256;
|
||||
|
||||
class ColumnBase {
|
||||
public:
|
||||
|
@ -643,13 +646,16 @@ class VariableColumn : public ColumnBase {
|
|||
std::conditional_t<std::is_same_v<T, std::string>, std::string_view, T>;
|
||||
|
||||
// memory mode ctor
|
||||
VariableColumn(size_t cap, const FieldMeta& field_meta)
|
||||
: ColumnBase(cap, field_meta) {
|
||||
VariableColumn(size_t cap, const FieldMeta& field_meta, size_t block_size)
|
||||
: ColumnBase(cap, field_meta), block_size_(block_size) {
|
||||
}
|
||||
|
||||
// mmap mode ctor
|
||||
VariableColumn(const File& file, size_t size, const FieldMeta& field_meta)
|
||||
: ColumnBase(file, size, field_meta) {
|
||||
VariableColumn(const File& file,
|
||||
size_t size,
|
||||
const FieldMeta& field_meta,
|
||||
size_t block_size)
|
||||
: ColumnBase(file, size, field_meta), block_size_(block_size) {
|
||||
}
|
||||
// mmap with mmap manager
|
||||
VariableColumn(size_t reserve,
|
||||
|
@ -657,8 +663,10 @@ class VariableColumn : public ColumnBase {
|
|||
const DataType& data_type,
|
||||
storage::MmapChunkManagerPtr mcm,
|
||||
storage::MmapChunkDescriptorPtr descriptor,
|
||||
bool nullable)
|
||||
: ColumnBase(reserve, dim, data_type, mcm, descriptor, nullable) {
|
||||
bool nullable,
|
||||
size_t block_size)
|
||||
: ColumnBase(reserve, dim, data_type, mcm, descriptor, nullable),
|
||||
block_size_(block_size) {
|
||||
}
|
||||
|
||||
VariableColumn(VariableColumn&& column) noexcept
|
||||
|
@ -708,8 +716,8 @@ class VariableColumn : public ColumnBase {
|
|||
PanicInfo(ErrorCode::OutOfRange, "index out of range");
|
||||
}
|
||||
|
||||
char* pos = data_ + indices_[start_offset / BLOCK_SIZE];
|
||||
for (size_t j = 0; j < start_offset % BLOCK_SIZE; j++) {
|
||||
char* pos = data_ + indices_[start_offset / block_size_];
|
||||
for (size_t j = 0; j < start_offset % block_size_; j++) {
|
||||
uint32_t size;
|
||||
size = *reinterpret_cast<uint32_t*>(pos);
|
||||
pos += sizeof(uint32_t) + size;
|
||||
|
@ -723,8 +731,8 @@ class VariableColumn : public ColumnBase {
|
|||
if (i < 0 || i > num_rows_) {
|
||||
PanicInfo(ErrorCode::OutOfRange, "index out of range");
|
||||
}
|
||||
size_t batch_id = i / BLOCK_SIZE;
|
||||
size_t offset = i % BLOCK_SIZE;
|
||||
size_t batch_id = i / block_size_;
|
||||
size_t offset = i % block_size_;
|
||||
|
||||
// located in batch start location
|
||||
char* pos = data_ + indices_[batch_id];
|
||||
|
@ -801,11 +809,11 @@ class VariableColumn : public ColumnBase {
|
|||
void
|
||||
shrink_indice() {
|
||||
std::vector<uint64_t> tmp_indices;
|
||||
tmp_indices.reserve((indices_.size() + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
||||
tmp_indices.reserve((indices_.size() + block_size_ - 1) / block_size_);
|
||||
|
||||
for (size_t i = 0; i < indices_.size();) {
|
||||
tmp_indices.push_back(indices_[i]);
|
||||
i += BLOCK_SIZE;
|
||||
i += block_size_;
|
||||
}
|
||||
|
||||
indices_.swap(tmp_indices);
|
||||
|
@ -814,8 +822,8 @@ class VariableColumn : public ColumnBase {
|
|||
private:
|
||||
// loading states
|
||||
std::queue<FieldDataPtr> load_buf_{};
|
||||
// raw data index, record indices located 0, interval, 2 * interval, 3 * interval
|
||||
// ... just like page index, interval set to 8192 that matches search engine's batch size
|
||||
// raw data index, record indices located 0, block_size_, 2 * block_size_, 3 * block_size_
|
||||
size_t block_size_;
|
||||
std::vector<uint64_t> indices_{};
|
||||
};
|
||||
|
||||
|
|
|
@ -399,6 +399,11 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
|
|||
// Don't allow raw data and index exist at the same time
|
||||
// AssertInfo(!get_bit(index_ready_bitset_, field_id),
|
||||
// "field data can't be loaded when indexing exists");
|
||||
auto get_block_size = [&]() -> size_t {
|
||||
return schema_->get_primary_field_id() == field_id
|
||||
? DEFAULT_PK_VRCOL_BLOCK_SIZE
|
||||
: DEFAULT_MEM_VRCOL_BLOCK_SIZE;
|
||||
};
|
||||
|
||||
std::shared_ptr<ColumnBase> column{};
|
||||
if (IsVariableDataType(data_type)) {
|
||||
|
@ -408,7 +413,7 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
|
|||
case milvus::DataType::VARCHAR: {
|
||||
auto var_column =
|
||||
std::make_shared<VariableColumn<std::string>>(
|
||||
num_rows, field_meta);
|
||||
num_rows, field_meta, get_block_size());
|
||||
FieldDataPtr field_data;
|
||||
while (data.channel->pop(field_data)) {
|
||||
var_column->Append(std::move(field_data));
|
||||
|
@ -423,7 +428,7 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
|
|||
case milvus::DataType::JSON: {
|
||||
auto var_column =
|
||||
std::make_shared<VariableColumn<milvus::Json>>(
|
||||
num_rows, field_meta);
|
||||
num_rows, field_meta, get_block_size());
|
||||
FieldDataPtr field_data;
|
||||
while (data.channel->pop(field_data)) {
|
||||
var_column->Append(std::move(field_data));
|
||||
|
@ -572,7 +577,10 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
|
|||
case milvus::DataType::STRING:
|
||||
case milvus::DataType::VARCHAR: {
|
||||
auto var_column = std::make_shared<VariableColumn<std::string>>(
|
||||
file, total_written, field_meta);
|
||||
file,
|
||||
total_written,
|
||||
field_meta,
|
||||
DEFAULT_MMAP_VRCOL_BLOCK_SIZE);
|
||||
var_column->Seal(std::move(indices));
|
||||
column = std::move(var_column);
|
||||
break;
|
||||
|
@ -580,7 +588,10 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
|
|||
case milvus::DataType::JSON: {
|
||||
auto var_column =
|
||||
std::make_shared<VariableColumn<milvus::Json>>(
|
||||
file, total_written, field_meta);
|
||||
file,
|
||||
total_written,
|
||||
field_meta,
|
||||
DEFAULT_MMAP_VRCOL_BLOCK_SIZE);
|
||||
var_column->Seal(std::move(indices));
|
||||
column = std::move(var_column);
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue