mirror of https://github.com/milvus-io/milvus.git
Avoid allocating and coping when fill column with default values (#20721)
Signed-off-by: yah01 <yang.cen@zilliz.com> Signed-off-by: yah01 <yang.cen@zilliz.com>pull/20781/head
parent
958e94f6f0
commit
1eabfdb199
|
@ -10,11 +10,13 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "SegmentSealedImpl.h"
|
||||
|
||||
#include "Utils.h"
|
||||
#include "common/Consts.h"
|
||||
#include "common/FieldMeta.h"
|
||||
#include "query/ScalarIndex.h"
|
||||
#include "query/SearchBruteForce.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "query/ScalarIndex.h"
|
||||
#include "Utils.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -474,50 +476,10 @@ SegmentSealedImpl::bulk_subscript_impl(
|
|||
std::unique_ptr<DataArray>
|
||||
SegmentSealedImpl::fill_with_empty(FieldId field_id, int64_t count) const {
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
FixedVector<bool> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
FixedVector<int8_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
FixedVector<int16_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
FixedVector<int32_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
FixedVector<int64_t> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
FixedVector<float> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
FixedVector<double> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
FixedVector<std::string> output(count);
|
||||
return CreateScalarDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_BINARY: {
|
||||
aligned_vector<char> output(field_meta.get_sizeof() * count);
|
||||
return CreateVectorDataArrayFrom(output.data(), count, field_meta);
|
||||
}
|
||||
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
if (datatype_is_vector(field_meta.get_data_type())) {
|
||||
return CreateVectorDataArray(count, field_meta);
|
||||
}
|
||||
return CreateScalarDataArray(count, field_meta);
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "segcore/Utils.h"
|
||||
|
||||
#include "index/ScalarIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
@ -67,6 +68,96 @@ GetSizeOfIdArray(const IdArray& data) {
|
|||
|
||||
// Note: this is temporary solution.
|
||||
// modify bulk script implement to make process more clear
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateScalarDataArray(int64_t count, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_array = std::make_unique<DataArray>();
|
||||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(milvus::proto::schema::DataType(field_meta.get_data_type()));
|
||||
|
||||
auto scalar_array = data_array->mutable_scalars();
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
auto obj = scalar_array->mutable_bool_data();
|
||||
obj->mutable_data()->Resize(count, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Resize(count, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Resize(count, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
obj->mutable_data()->Resize(count, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto obj = scalar_array->mutable_long_data();
|
||||
obj->mutable_data()->Resize(count, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto obj = scalar_array->mutable_float_data();
|
||||
obj->mutable_data()->Resize(count, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto obj = scalar_array->mutable_double_data();
|
||||
obj->mutable_data()->Resize(count, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto obj = scalar_array->mutable_string_data();
|
||||
obj->mutable_data()->Reserve(count);
|
||||
for (auto i = 0; i < count; i++) *(obj->mutable_data()->Add()) = std::string();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
|
||||
return data_array;
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateVectorDataArray(int64_t count, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_array = std::make_unique<DataArray>();
|
||||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(milvus::proto::schema::DataType(field_meta.get_data_type()));
|
||||
|
||||
auto vector_array = data_array->mutable_vectors();
|
||||
auto dim = field_meta.get_dim();
|
||||
vector_array->set_dim(dim);
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT: {
|
||||
auto length = count * dim;
|
||||
auto obj = vector_array->mutable_float_vector();
|
||||
obj->mutable_data()->Resize(length, 0);
|
||||
break;
|
||||
}
|
||||
case DataType::VECTOR_BINARY: {
|
||||
AssertInfo(dim % 8 == 0, "Binary vector field dimension is not a multiple of 8");
|
||||
auto num_bytes = count * dim / 8;
|
||||
auto obj = vector_array->mutable_binary_vector();
|
||||
obj->resize(num_bytes);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
return data_array;
|
||||
}
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateScalarDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
|
|
|
@ -36,6 +36,12 @@ GetSizeOfIdArray(const IdArray& data);
|
|||
|
||||
// Note: this is temporary solution.
|
||||
// modify bulk script implement to make process more clear
|
||||
std::unique_ptr<DataArray>
|
||||
CreateScalarDataArray(int64_t count, const FieldMeta& field_meta);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateVectorDataArray(int64_t count, const FieldMeta& field_meta);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
CreateScalarDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_meta);
|
||||
|
||||
|
|
Loading…
Reference in New Issue