fix: not check has_value before get value in JSON ()

https://github.com/milvus-io/milvus/issues/36236
also: https://github.com/milvus-io/milvus/issues/37113

Signed-off-by: lixinguo <xinguo.li@zilliz.com>
Co-authored-by: lixinguo <xinguo.li@zilliz.com>
pull/37160/head
smellthemoon 2024-10-25 17:19:28 +08:00 committed by GitHub
parent d7b2906318
commit 44ddcb5a63
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 43 additions and 6 deletions

View File

@ -100,7 +100,7 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
if (element_count == 0) {
return;
}
null_count = array->null_count();
null_count_ = array->null_count();
switch (data_type_) {
case DataType::BOOL: {
AssertInfo(array->type()->id() == arrow::Type::type::BOOL,
@ -195,6 +195,8 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
return FillFieldData(values.data(), element_count);
}
case DataType::JSON: {
// The code here is not referenced.
// A subclass named FieldDataJsonImpl is implemented, which overloads this function.
AssertInfo(array->type()->id() == arrow::Type::type::BINARY,
"inconsistent data type");
auto json_array =

View File

@ -484,7 +484,7 @@ class FieldDataImpl : public FieldDataBase {
int64_t
get_null_count() const override {
std::shared_lock lck(tell_mutex_);
return null_count;
return null_count_;
}
bool
@ -507,7 +507,7 @@ class FieldDataImpl : public FieldDataBase {
// number of elements data_ can hold
int64_t num_rows_;
mutable std::shared_mutex num_rows_mutex_;
int64_t null_count{0};
int64_t null_count_{0};
// number of actual elements in data_
size_t length_{};
mutable std::shared_mutex tell_mutex_;
@ -616,6 +616,7 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
if (n == 0) {
return;
}
null_count_ = array->null_count();
std::lock_guard lck(tell_mutex_);
if (length_ + n > get_num_rows()) {
@ -624,8 +625,11 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
auto i = 0;
for (const auto& json : *array) {
data_[length_ + i] = Json(simdjson::padded_string(json.value()));
i++;
if (!json.has_value()) {
i++;
continue;
}
data_[length_ + i++] = Json(simdjson::padded_string(json.value()));
}
if (IsNullable()) {
auto valid_data = array->null_bitmap_data();

View File

@ -273,10 +273,12 @@ BaseEventData::Serialize() {
auto string_view =
static_cast<const Json*>(field_data->RawValue(offset))
->data();
auto size =
field_data->is_valid(offset) ? string_view.size() : -1;
payload_writer->add_one_binary_payload(
reinterpret_cast<const uint8_t*>(
std::string(string_view).c_str()),
string_view.size());
size);
}
break;
}

View File

@ -765,3 +765,32 @@ TEST(storage, InsertDataStringArrayNullable) {
ASSERT_EQ(*new_payload->ValidData(), *valid_data);
delete[] valid_data;
}
TEST(storage, InsertDataJsonNullable) {
FixedVector<Json> data = {Json(),
Json(simdjson::padded_string(std::string("A")))};
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::JSON, true);
uint8_t* valid_data = new uint8_t[1]{0x00};
field_data->FillFieldData(data.data(), valid_data, data.size());
storage::InsertData insert_data(field_data);
storage::FieldDataMeta field_data_meta{100, 101, 102, 103};
insert_data.SetFieldDataMeta(field_data_meta);
insert_data.SetTimestamps(0, 100);
auto serialized_bytes = insert_data.Serialize(storage::StorageType::Remote);
std::shared_ptr<uint8_t[]> serialized_data_ptr(serialized_bytes.data(),
[&](uint8_t*) {});
auto new_insert_data = storage::DeserializeFileData(
serialized_data_ptr, serialized_bytes.size());
ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType);
ASSERT_EQ(new_insert_data->GetTimeRage(),
std::make_pair(Timestamp(0), Timestamp(100)));
auto new_payload = new_insert_data->GetFieldData();
ASSERT_EQ(new_payload->get_data_type(), storage::DataType::JSON);
ASSERT_EQ(new_payload->get_num_rows(), data.size());
ASSERT_EQ(new_payload->get_null_count(), 2);
ASSERT_EQ(*new_payload->ValidData(), *valid_data);
delete[] valid_data;
}