fix: not check has_value before get value in JSON (#37128)

https://github.com/milvus-io/milvus/issues/36236
also: https://github.com/milvus-io/milvus/issues/37113

Signed-off-by: lixinguo <xinguo.li@zilliz.com>
Co-authored-by: lixinguo <xinguo.li@zilliz.com>
pull/37160/head
smellthemoon 2024-10-25 17:19:28 +08:00 committed by GitHub
parent d7b2906318
commit 44ddcb5a63
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 43 additions and 6 deletions

View File

@ -100,7 +100,7 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
if (element_count == 0) { if (element_count == 0) {
return; return;
} }
null_count = array->null_count(); null_count_ = array->null_count();
switch (data_type_) { switch (data_type_) {
case DataType::BOOL: { case DataType::BOOL: {
AssertInfo(array->type()->id() == arrow::Type::type::BOOL, AssertInfo(array->type()->id() == arrow::Type::type::BOOL,
@ -195,6 +195,8 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
return FillFieldData(values.data(), element_count); return FillFieldData(values.data(), element_count);
} }
case DataType::JSON: { case DataType::JSON: {
// The code here is not referenced.
// A subclass named FieldDataJsonImpl is implemented, which overloads this function.
AssertInfo(array->type()->id() == arrow::Type::type::BINARY, AssertInfo(array->type()->id() == arrow::Type::type::BINARY,
"inconsistent data type"); "inconsistent data type");
auto json_array = auto json_array =

View File

@ -484,7 +484,7 @@ class FieldDataImpl : public FieldDataBase {
int64_t int64_t
get_null_count() const override { get_null_count() const override {
std::shared_lock lck(tell_mutex_); std::shared_lock lck(tell_mutex_);
return null_count; return null_count_;
} }
bool bool
@ -507,7 +507,7 @@ class FieldDataImpl : public FieldDataBase {
// number of elements data_ can hold // number of elements data_ can hold
int64_t num_rows_; int64_t num_rows_;
mutable std::shared_mutex num_rows_mutex_; mutable std::shared_mutex num_rows_mutex_;
int64_t null_count{0}; int64_t null_count_{0};
// number of actual elements in data_ // number of actual elements in data_
size_t length_{}; size_t length_{};
mutable std::shared_mutex tell_mutex_; mutable std::shared_mutex tell_mutex_;
@ -616,6 +616,7 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
if (n == 0) { if (n == 0) {
return; return;
} }
null_count_ = array->null_count();
std::lock_guard lck(tell_mutex_); std::lock_guard lck(tell_mutex_);
if (length_ + n > get_num_rows()) { if (length_ + n > get_num_rows()) {
@ -624,8 +625,11 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
auto i = 0; auto i = 0;
for (const auto& json : *array) { for (const auto& json : *array) {
data_[length_ + i] = Json(simdjson::padded_string(json.value())); if (!json.has_value()) {
i++; i++;
continue;
}
data_[length_ + i++] = Json(simdjson::padded_string(json.value()));
} }
if (IsNullable()) { if (IsNullable()) {
auto valid_data = array->null_bitmap_data(); auto valid_data = array->null_bitmap_data();

View File

@ -273,10 +273,12 @@ BaseEventData::Serialize() {
auto string_view = auto string_view =
static_cast<const Json*>(field_data->RawValue(offset)) static_cast<const Json*>(field_data->RawValue(offset))
->data(); ->data();
auto size =
field_data->is_valid(offset) ? string_view.size() : -1;
payload_writer->add_one_binary_payload( payload_writer->add_one_binary_payload(
reinterpret_cast<const uint8_t*>( reinterpret_cast<const uint8_t*>(
std::string(string_view).c_str()), std::string(string_view).c_str()),
string_view.size()); size);
} }
break; break;
} }

View File

@ -765,3 +765,32 @@ TEST(storage, InsertDataStringArrayNullable) {
ASSERT_EQ(*new_payload->ValidData(), *valid_data); ASSERT_EQ(*new_payload->ValidData(), *valid_data);
delete[] valid_data; delete[] valid_data;
} }
TEST(storage, InsertDataJsonNullable) {
FixedVector<Json> data = {Json(),
Json(simdjson::padded_string(std::string("A")))};
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::JSON, true);
uint8_t* valid_data = new uint8_t[1]{0x00};
field_data->FillFieldData(data.data(), valid_data, data.size());
storage::InsertData insert_data(field_data);
storage::FieldDataMeta field_data_meta{100, 101, 102, 103};
insert_data.SetFieldDataMeta(field_data_meta);
insert_data.SetTimestamps(0, 100);
auto serialized_bytes = insert_data.Serialize(storage::StorageType::Remote);
std::shared_ptr<uint8_t[]> serialized_data_ptr(serialized_bytes.data(),
[&](uint8_t*) {});
auto new_insert_data = storage::DeserializeFileData(
serialized_data_ptr, serialized_bytes.size());
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
ASSERT_EQ(new_insert_data->GetTimeRage(),
std::make_pair(Timestamp(0), Timestamp(100)));
auto new_payload = new_insert_data->GetFieldData();
ASSERT_EQ(new_payload->get_data_type(), storage::DataType::JSON);
ASSERT_EQ(new_payload->get_num_rows(), data.size());
ASSERT_EQ(new_payload->get_null_count(), 2);
ASSERT_EQ(*new_payload->ValidData(), *valid_data);
delete[] valid_data;
}