Optimize performance of insert & query & search (#22829)

- Reduce 1x copy of inserting int8/int16 into growing segment
- Reduce 1x copy of retrieving primary keys
- Reduce 1x copy of inserting/loading/deleting/filtering primary keys
- Reduce 1x copy of reducing string results

Signed-off-by: yah01 <yang.cen@zilliz.com>
pull/22843/head
yah01 2023-03-20 10:19:56 +08:00 committed by GitHub
parent 20974711a1
commit 005d178a0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 10 deletions

View File

@ -39,13 +39,13 @@ VectorBase::set_data_raw(ssize_t element_offset,
element_count);
}
case DataType::INT8: {
auto src_data = data->scalars().int_data().data();
auto& src_data = data->scalars().int_data().data();
std::vector<int8_t> data_raw(src_data.size());
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
return set_data_raw(element_offset, data_raw.data(), element_count);
}
case DataType::INT16: {
auto src_data = data->scalars().int_data().data();
auto& src_data = data->scalars().int_data().data();
std::vector<int16_t> data_raw(src_data.size());
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
return set_data_raw(element_offset, data_raw.data(), element_count);
@ -104,13 +104,13 @@ VectorBase::fill_chunk_data(ssize_t element_count,
element_count);
}
case DataType::INT8: {
auto src_data = data->scalars().int_data().data();
auto& src_data = data->scalars().int_data().data();
std::vector<int8_t> data_raw(src_data.size());
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
return fill_chunk_data(data_raw.data(), element_count);
}
case DataType::INT16: {
auto src_data = data->scalars().int_data().data();
auto& src_data = data->scalars().int_data().data();
std::vector<int16_t> data_raw(src_data.size());
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
return fill_chunk_data(data_raw.data(), element_count);

View File

@ -133,16 +133,17 @@ SegmentInternalInterface::Retrieve(const query::RetrievePlan* plan,
switch (field_meta.get_data_type()) {
case DataType::INT64: {
auto int_ids = ids->mutable_int_id();
auto src_data = col_data->scalars().long_data();
auto& src_data = col_data->scalars().long_data();
int_ids->mutable_data()->Add(src_data.data().begin(),
src_data.data().end());
break;
}
case DataType::VARCHAR: {
auto str_ids = ids->mutable_str_id();
auto src_data = col_data->scalars().string_data();
for (auto i = 0; i < src_data.data_size(); ++i)
auto& src_data = col_data->scalars().string_data();
for (auto i = 0; i < src_data.data_size(); ++i) {
*(str_ids->mutable_data()->Add()) = src_data.data(i);
}
break;
}
default: {

View File

@ -25,7 +25,7 @@ ParsePksFromFieldData(std::vector<PkType>& pks, const DataArray& data) {
break;
}
case DataType::VARCHAR: {
auto src_data = data.scalars().string_data().data();
auto& src_data = data.scalars().string_data().data();
std::copy(src_data.begin(), src_data.end(), pks.begin());
break;
}
@ -47,7 +47,7 @@ ParsePksFromIDs(std::vector<PkType>& pks,
break;
}
case DataType::VARCHAR: {
auto source_data = data.str_id().data();
auto& source_data = data.str_id().data();
std::copy(source_data.begin(), source_data.end(), pks.begin());
break;
}
@ -363,7 +363,7 @@ MergeDataArray(
continue;
}
case DataType::VARCHAR: {
auto data = src_field_data->scalars().string_data();
auto& data = src_field_data->scalars().string_data();
auto obj = scalar_array->mutable_string_data();
*(obj->mutable_data()->Add()) = data.data(src_offset);
continue;