fix: Use int32 when creating array index for element type int8/int16 (#41186)

issue: #41172 
pr: #41185 
Elements with type int8 or int16 in Array is encoded using int32, so we
should parse it as int32 when creating index.

Signed-off-by: sunby <sunbingyi1992@gmail.com>
pull/41221/head
Bingyi Sun 2025-04-10 11:08:26 +08:00 committed by GitHub
parent b7858d24a0
commit daa910af6f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 11 additions and 2 deletions

View File

@ -23,6 +23,7 @@
#include <boost/uuid/random_generator.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <cstddef>
#include <type_traits>
#include <vector>
#include "InvertedIndexTantivy.h"
@ -572,6 +573,10 @@ template <typename T>
void
InvertedIndexTantivy<T>::build_index_for_array(
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas) {
using ElementType = std::conditional_t<std::is_same<T, int8_t>::value ||
std::is_same<T, int16_t>::value,
int32_t,
T>;
int64_t offset = 0;
for (const auto& data : field_datas) {
auto n = data->get_num_rows();
@ -584,12 +589,16 @@ InvertedIndexTantivy<T>::build_index_for_array(
auto length = data->is_valid(i) ? array_column[i].length() : 0;
if (!inverted_index_single_segment_) {
wrapper_->template add_multi_data(
reinterpret_cast<const T*>(array_column[i].data()),
reinterpret_cast<const ElementType*>(
array_column[i].data()),
length,
offset++);
} else {
wrapper_->template add_multi_data_by_single_segment_writer(
reinterpret_cast<const T*>(array_column[i].data()), length);
reinterpret_cast<const ElementType*>(
array_column[i].data()),
length);
offset++;
}
}
}