mirror of https://github.com/milvus-io/milvus.git
enhance: support null value in index (#35238)
#31728 --------- Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>pull/35525/head
parent
f87af9bc54
commit
80dbe87759
|
@ -69,3 +69,5 @@ const int64_t DEFAULT_MAX_OUTPUT_SIZE = 67108864; // bytes, 64MB
|
|||
const int64_t DEFAULT_CHUNK_MANAGER_REQUEST_TIMEOUT_MS = 10000;
|
||||
|
||||
const int64_t DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND = 500;
|
||||
|
||||
const size_t MARISA_NULL_KEY_ID = -1;
|
||||
|
|
|
@ -69,8 +69,8 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
|
|||
ssize_t byte_count = (element_count + 7) / 8;
|
||||
// Note: if 'nullable == true` and valid_data is nullptr
|
||||
// means null_count == 0, will fill it with 0xFF
|
||||
if (valid_data == nullptr) {
|
||||
valid_data_.resize(byte_count, 0xFF);
|
||||
if (!valid_data) {
|
||||
valid_data_.assign(byte_count, 0xFF);
|
||||
} else {
|
||||
std::copy_n(valid_data, byte_count, valid_data_.data());
|
||||
}
|
||||
|
|
|
@ -476,7 +476,7 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
|
|||
if (IsNullable()) {
|
||||
auto valid_data = array->null_bitmap_data();
|
||||
if (valid_data == nullptr) {
|
||||
valid_data_.resize((n + 7) / 8, 0xFF);
|
||||
valid_data_.assign((n + 7) / 8, 0xFF);
|
||||
} else {
|
||||
std::copy_n(valid_data, (n + 7) / 8, valid_data_.data());
|
||||
}
|
||||
|
|
|
@ -69,11 +69,14 @@ BitmapIndex<T>::Build(size_t n, const T* data) {
|
|||
PanicInfo(DataIsEmpty, "BitmapIndex can not build null values");
|
||||
}
|
||||
|
||||
total_num_rows_ = n;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
|
||||
T* p = const_cast<T*>(data);
|
||||
for (int i = 0; i < n; ++i, ++p) {
|
||||
data_[*p].add(i);
|
||||
valid_bitset.set(i);
|
||||
}
|
||||
total_num_rows_ = n;
|
||||
|
||||
if (data_.size() < DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) {
|
||||
for (auto it = data_.begin(); it != data_.end(); ++it) {
|
||||
|
@ -95,8 +98,11 @@ BitmapIndex<T>::BuildPrimitiveField(
|
|||
for (const auto& data : field_datas) {
|
||||
auto slice_row_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_row_num; ++i) {
|
||||
auto val = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_[*val].add(offset);
|
||||
if (data->is_valid(i)) {
|
||||
auto val = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_[*val].add(offset);
|
||||
valid_bitset.set(offset);
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
@ -114,6 +120,7 @@ BitmapIndex<T>::BuildWithFieldData(
|
|||
PanicInfo(DataIsEmpty, "scalar bitmap index can not build null values");
|
||||
}
|
||||
total_num_rows_ = total_num_rows;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
|
||||
switch (schema_.data_type()) {
|
||||
case proto::schema::DataType::Bool:
|
||||
|
@ -151,12 +158,14 @@ BitmapIndex<T>::BuildArrayField(const std::vector<FieldDataPtr>& field_datas) {
|
|||
for (const auto& data : field_datas) {
|
||||
auto slice_row_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_row_num; ++i) {
|
||||
auto array =
|
||||
reinterpret_cast<const milvus::Array*>(data->RawValue(i));
|
||||
|
||||
for (size_t j = 0; j < array->length(); ++j) {
|
||||
auto val = static_cast<T>(array->template get_data<GetType>(j));
|
||||
data_[val].add(offset);
|
||||
if (data->is_valid(i)) {
|
||||
auto array =
|
||||
reinterpret_cast<const milvus::Array*>(data->RawValue(i));
|
||||
for (size_t j = 0; j < array->length(); ++j) {
|
||||
auto val = array->template get_data<T>(j);
|
||||
data_[val].add(offset);
|
||||
}
|
||||
valid_bitset.set(offset);
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
@ -330,6 +339,9 @@ BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||
} else {
|
||||
data_[key] = value;
|
||||
}
|
||||
for (const auto& v : value) {
|
||||
valid_bitset.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -355,6 +367,9 @@ BitmapIndex<std::string>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||
} else {
|
||||
data_[key] = value;
|
||||
}
|
||||
for (const auto& v : value) {
|
||||
valid_bitset.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -367,6 +382,7 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
|||
index_meta_buffer->size);
|
||||
auto index_length = index_meta.first;
|
||||
total_num_rows_ = index_meta.second;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
|
||||
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
|
||||
DeserializeIndexData(index_data_buffer->data.get(), index_length);
|
||||
|
@ -389,7 +405,7 @@ BitmapIndex<T>::Load(milvus::tracer::TraceContext ctx, const Config& config) {
|
|||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto size = data->DataSize();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
|
@ -442,6 +458,8 @@ BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||
res &= valid_bitset;
|
||||
return res;
|
||||
} else {
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
|
@ -452,10 +470,31 @@ BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
|||
}
|
||||
}
|
||||
res.flip();
|
||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||
res &= valid_bitset;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
BitmapIndex<T>::IsNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap res(total_num_rows_, true);
|
||||
res &= valid_bitset;
|
||||
res.flip();
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
BitmapIndex<T>::IsNotNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap res(total_num_rows_, true);
|
||||
res &= valid_bitset;
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
BitmapIndex<T>::RangeForBitset(const T value, const OpType op) {
|
||||
|
|
|
@ -82,6 +82,12 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
const TargetBitmap
|
||||
NotIn(size_t n, const T* values) override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNotNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
Range(T value, OpType op) override;
|
||||
|
||||
|
@ -205,6 +211,9 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
size_t total_num_rows_{0};
|
||||
proto::schema::FieldSchema schema_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
|
||||
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
||||
TargetBitmap valid_bitset;
|
||||
};
|
||||
|
||||
} // namespace index
|
||||
|
|
|
@ -358,7 +358,7 @@ HybridScalarIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto size = data->DataSize();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
|
|
|
@ -87,6 +87,16 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
return internal_index_->NotIn(n, values);
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
IsNull() override {
|
||||
return internal_index_->IsNull();
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
IsNotNull() override {
|
||||
return internal_index_->IsNotNull();
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
Query(const DatasetPtr& dataset) override {
|
||||
return internal_index_->Query(dataset);
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include <boost/filesystem.hpp>
|
||||
#include <boost/uuid/random_generator.hpp>
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "InvertedIndexTantivy.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
@ -105,8 +107,14 @@ InvertedIndexTantivy<T>::finish() {
|
|||
template <typename T>
|
||||
BinarySet
|
||||
InvertedIndexTantivy<T>::Serialize(const Config& config) {
|
||||
auto index_valid_data_length = null_offset.size() * sizeof(size_t);
|
||||
std::shared_ptr<uint8_t[]> index_valid_data(
|
||||
new uint8_t[index_valid_data_length]);
|
||||
memcpy(index_valid_data.get(), null_offset.data(), index_valid_data_length);
|
||||
BinarySet res_set;
|
||||
|
||||
res_set.Append(
|
||||
"index_null_offset", index_valid_data, index_valid_data_length);
|
||||
milvus::Disassemble(res_set);
|
||||
return res_set;
|
||||
}
|
||||
|
||||
|
@ -137,7 +145,8 @@ InvertedIndexTantivy<T>::Upload(const Config& config) {
|
|||
for (auto& file : remote_paths_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
auto binary_set = Serialize(config);
|
||||
mem_file_manager_->AddFile(binary_set);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -173,6 +182,26 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
files_value.end());
|
||||
disk_file_manager_->CacheIndexToDisk(files_value);
|
||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
|
||||
auto index_valid_data_file =
|
||||
mem_file_manager_->GetRemoteIndexObjectPrefix() +
|
||||
std::string("/index_null_offset");
|
||||
std::vector<std::string> file;
|
||||
file.push_back(index_valid_data_file);
|
||||
auto index_datas = mem_file_manager_->LoadIndexToMemory(file);
|
||||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->DataSize();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
binary_set.Append(key, buf, size);
|
||||
}
|
||||
auto index_valid_data = binary_set.GetByName("index_null_offset");
|
||||
null_offset.resize((size_t)index_valid_data->size / sizeof(size_t));
|
||||
memcpy(null_offset.data(),
|
||||
index_valid_data->data.get(),
|
||||
(size_t)index_valid_data->size);
|
||||
}
|
||||
|
||||
inline void
|
||||
|
@ -212,6 +241,27 @@ InvertedIndexTantivy<T>::In(size_t n, const T* values) {
|
|||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
InvertedIndexTantivy<T>::IsNull() {
|
||||
TargetBitmap bitset(Count());
|
||||
|
||||
for (size_t i = 0; i < null_offset.size(); ++i) {
|
||||
bitset.set(null_offset[i]);
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
InvertedIndexTantivy<T>::IsNotNull() {
|
||||
TargetBitmap bitset(Count(), true);
|
||||
for (size_t i = 0; i < null_offset.size(); ++i) {
|
||||
bitset.reset(null_offset[i]);
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
InvertedIndexTantivy<T>::InApplyFilter(
|
||||
|
@ -242,6 +292,9 @@ InvertedIndexTantivy<T>::NotIn(size_t n, const T* values) {
|
|||
auto array = wrapper_->term_query(values[i]);
|
||||
apply_hits(bitset, array, false);
|
||||
}
|
||||
for (size_t i = 0; i < null_offset.size(); ++i) {
|
||||
bitset.reset(null_offset[i]);
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
|
@ -378,6 +431,13 @@ template <typename T>
|
|||
void
|
||||
InvertedIndexTantivy<T>::BuildWithFieldData(
|
||||
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas) {
|
||||
if (schema_.nullable()) {
|
||||
int64_t total = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
total += data->get_null_count();
|
||||
}
|
||||
null_offset.reserve(total);
|
||||
}
|
||||
switch (schema_.data_type()) {
|
||||
case proto::schema::DataType::Bool:
|
||||
case proto::schema::DataType::Int8:
|
||||
|
@ -390,6 +450,17 @@ InvertedIndexTantivy<T>::BuildWithFieldData(
|
|||
case proto::schema::DataType::VarChar: {
|
||||
for (const auto& data : field_datas) {
|
||||
auto n = data->get_num_rows();
|
||||
if (schema_.nullable()) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (!data->is_valid(i)) {
|
||||
null_offset.push_back(i);
|
||||
}
|
||||
wrapper_->add_multi_data<T>(
|
||||
static_cast<const T*>(data->RawValue(i)),
|
||||
data->is_valid(i));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
wrapper_->add_data<T>(static_cast<const T*>(data->Data()), n);
|
||||
}
|
||||
break;
|
||||
|
@ -417,9 +488,12 @@ InvertedIndexTantivy<T>::build_index_for_array(
|
|||
for (int64_t i = 0; i < n; i++) {
|
||||
assert(array_column[i].get_element_type() ==
|
||||
static_cast<DataType>(schema_.element_type()));
|
||||
if (schema_.nullable() && !data->is_valid(i)) {
|
||||
null_offset.push_back(i);
|
||||
}
|
||||
auto length = data->is_valid(i) ? array_column[i].length() : 0;
|
||||
wrapper_->template add_multi_data(
|
||||
reinterpret_cast<const T*>(array_column[i].data()),
|
||||
array_column[i].length());
|
||||
reinterpret_cast<const T*>(array_column[i].data()), length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -435,12 +509,16 @@ InvertedIndexTantivy<std::string>::build_index_for_array(
|
|||
Assert(IsStringDataType(array_column[i].get_element_type()));
|
||||
Assert(IsStringDataType(
|
||||
static_cast<DataType>(schema_.element_type())));
|
||||
if (schema_.nullable() && !data->is_valid(i)) {
|
||||
null_offset.push_back(i);
|
||||
}
|
||||
std::vector<std::string> output;
|
||||
for (int64_t j = 0; j < array_column[i].length(); j++) {
|
||||
output.push_back(
|
||||
array_column[i].template get_data<std::string>(j));
|
||||
}
|
||||
wrapper_->template add_multi_data(output.data(), output.size());
|
||||
auto length = data->is_valid(i) ? output.size() : 0;
|
||||
wrapper_->template add_multi_data(output.data(), length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/RegexQuery.h"
|
||||
#include "index/Index.h"
|
||||
#include "storage/FileManager.h"
|
||||
|
@ -80,12 +82,8 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
const void* values,
|
||||
const Config& config = {}) override;
|
||||
|
||||
/*
|
||||
* deprecated.
|
||||
* TODO: why not remove this?
|
||||
*/
|
||||
BinarySet
|
||||
Serialize(const Config& config /* not used */) override;
|
||||
Serialize(const Config& config) override;
|
||||
|
||||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
@ -101,6 +99,12 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
const TargetBitmap
|
||||
In(size_t n, const T* values) override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNotNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
InApplyFilter(
|
||||
size_t n,
|
||||
|
@ -193,5 +197,9 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
*/
|
||||
MemFileManagerPtr mem_file_manager_;
|
||||
DiskFileManagerPtr disk_file_manager_;
|
||||
|
||||
// all data need to be built to align the offset
|
||||
// so need to store null_offset in inverted index additionally
|
||||
std::vector<size_t> null_offset{};
|
||||
};
|
||||
} // namespace milvus::index
|
||||
|
|
|
@ -82,6 +82,12 @@ class ScalarIndex : public IndexBase {
|
|||
virtual const TargetBitmap
|
||||
In(size_t n, const T* values) = 0;
|
||||
|
||||
virtual const TargetBitmap
|
||||
IsNull() = 0;
|
||||
|
||||
virtual const TargetBitmap
|
||||
IsNotNull() = 0;
|
||||
|
||||
virtual const TargetBitmap
|
||||
InApplyFilter(size_t n,
|
||||
const T* values,
|
||||
|
|
|
@ -68,10 +68,13 @@ ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
|||
PanicInfo(DataIsEmpty, "ScalarIndexSort cannot build null values!");
|
||||
}
|
||||
data_.reserve(n);
|
||||
total_num_rows_ = n;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
idx_to_offsets_.resize(n);
|
||||
T* p = const_cast<T*>(values);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
data_.emplace_back(IndexStructure(*p++, i));
|
||||
valid_bitset.set(i);
|
||||
}
|
||||
std::sort(data_.begin(), data_.end());
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
|
@ -84,28 +87,33 @@ template <typename T>
|
|||
void
|
||||
ScalarIndexSort<T>::BuildWithFieldData(
|
||||
const std::vector<milvus::FieldDataPtr>& field_datas) {
|
||||
int64_t total_num_rows = 0;
|
||||
int64_t length = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
total_num_rows += data->get_num_rows();
|
||||
total_num_rows_ += data->get_num_rows();
|
||||
length += data->get_num_rows() - data->get_null_count();
|
||||
}
|
||||
if (total_num_rows == 0) {
|
||||
if (length == 0) {
|
||||
PanicInfo(DataIsEmpty, "ScalarIndexSort cannot build null values!");
|
||||
}
|
||||
|
||||
data_.reserve(total_num_rows);
|
||||
data_.reserve(length);
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
int64_t offset = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_num; ++i) {
|
||||
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_.emplace_back(IndexStructure(*value, offset));
|
||||
if (data->is_valid(i)) {
|
||||
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_.emplace_back(IndexStructure(*value, offset));
|
||||
valid_bitset.set(offset);
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(data_.begin(), data_.end());
|
||||
idx_to_offsets_.resize(total_num_rows);
|
||||
for (size_t i = 0; i < total_num_rows; ++i) {
|
||||
idx_to_offsets_.resize(total_num_rows_);
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
}
|
||||
is_built_ = true;
|
||||
|
@ -124,9 +132,13 @@ ScalarIndexSort<T>::Serialize(const Config& config) {
|
|||
auto index_size = data_.size();
|
||||
memcpy(index_length.get(), &index_size, sizeof(size_t));
|
||||
|
||||
std::shared_ptr<uint8_t[]> index_num_rows(new uint8_t[sizeof(size_t)]);
|
||||
memcpy(index_num_rows.get(), &total_num_rows_, sizeof(size_t));
|
||||
|
||||
BinarySet res_set;
|
||||
res_set.Append("index_data", index_data, index_data_size);
|
||||
res_set.Append("index_length", index_length, sizeof(size_t));
|
||||
res_set.Append("index_num_rows", index_num_rows, sizeof(size_t));
|
||||
|
||||
milvus::Disassemble(res_set);
|
||||
|
||||
|
@ -158,11 +170,18 @@ ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
|
|||
|
||||
auto index_data = index_binary.GetByName("index_data");
|
||||
data_.resize(index_size);
|
||||
idx_to_offsets_.resize(index_size);
|
||||
auto index_num_rows = index_binary.GetByName("index_num_rows");
|
||||
memcpy(&total_num_rows_,
|
||||
index_num_rows->data.get(),
|
||||
(size_t)index_num_rows->size);
|
||||
idx_to_offsets_.resize(total_num_rows_);
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
memcpy(data_.data(), index_data->data.get(), (size_t)index_data->size);
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
valid_bitset.set(data_[i].idx_);
|
||||
}
|
||||
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
|
@ -185,7 +204,7 @@ ScalarIndexSort<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto size = data->DataSize();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
|
@ -199,7 +218,7 @@ template <typename T>
|
|||
const TargetBitmap
|
||||
ScalarIndexSort<T>::In(const size_t n, const T* values) {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(data_.size());
|
||||
TargetBitmap bitset(Count());
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto lb = std::lower_bound(
|
||||
data_.begin(), data_.end(), IndexStructure<T>(*(values + i)));
|
||||
|
@ -221,7 +240,7 @@ template <typename T>
|
|||
const TargetBitmap
|
||||
ScalarIndexSort<T>::NotIn(const size_t n, const T* values) {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(data_.size(), true);
|
||||
TargetBitmap bitset(Count(), true);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto lb = std::lower_bound(
|
||||
data_.begin(), data_.end(), IndexStructure<T>(*(values + i)));
|
||||
|
@ -236,6 +255,27 @@ ScalarIndexSort<T>::NotIn(const size_t n, const T* values) {
|
|||
bitset[lb->idx_] = false;
|
||||
}
|
||||
}
|
||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||
bitset &= valid_bitset;
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
ScalarIndexSort<T>::IsNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(total_num_rows_, true);
|
||||
bitset &= valid_bitset;
|
||||
bitset.flip();
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const TargetBitmap
|
||||
ScalarIndexSort<T>::IsNotNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(total_num_rows_, true);
|
||||
bitset &= valid_bitset;
|
||||
return bitset;
|
||||
}
|
||||
|
||||
|
@ -243,7 +283,7 @@ template <typename T>
|
|||
const TargetBitmap
|
||||
ScalarIndexSort<T>::Range(const T value, const OpType op) {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(data_.size());
|
||||
TargetBitmap bitset(Count());
|
||||
auto lb = data_.begin();
|
||||
auto ub = data_.end();
|
||||
if (ShouldSkip(value, value, op)) {
|
||||
|
@ -283,7 +323,7 @@ ScalarIndexSort<T>::Range(T lower_bound_value,
|
|||
T upper_bound_value,
|
||||
bool ub_inclusive) {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(data_.size());
|
||||
TargetBitmap bitset(Count());
|
||||
if (lower_bound_value > upper_bound_value ||
|
||||
(lower_bound_value == upper_bound_value &&
|
||||
!(lb_inclusive && ub_inclusive))) {
|
||||
|
|
|
@ -47,7 +47,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
|
||||
int64_t
|
||||
Count() override {
|
||||
return data_.size();
|
||||
return total_num_rows_;
|
||||
}
|
||||
|
||||
ScalarIndexType
|
||||
|
@ -67,6 +67,12 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
const TargetBitmap
|
||||
NotIn(size_t n, const T* values) override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNotNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
Range(T value, OpType op) override;
|
||||
|
||||
|
@ -120,6 +126,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
std::vector<int32_t> idx_to_offsets_; // used to retrieve.
|
||||
std::vector<IndexStructure<T>> data_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
size_t total_num_rows_{0};
|
||||
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
||||
TargetBitmap valid_bitset;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -33,67 +33,74 @@ SkipIndex::LoadPrimitive(milvus::FieldId field_id,
|
|||
int64_t chunk_id,
|
||||
milvus::DataType data_type,
|
||||
const void* chunk_data,
|
||||
const bool* valid_data,
|
||||
int64_t count) {
|
||||
auto chunkMetrics = std::make_unique<FieldChunkMetrics>();
|
||||
|
||||
if (count > 0) {
|
||||
chunkMetrics->hasValue_ = true;
|
||||
switch (data_type) {
|
||||
case DataType::INT8: {
|
||||
const int8_t* typedData =
|
||||
static_cast<const int8_t*>(chunk_data);
|
||||
std::pair<int8_t, int8_t> minMax =
|
||||
ProcessFieldMetrics<int8_t>(typedData, count);
|
||||
chunkMetrics->min_ = Metrics(minMax.first);
|
||||
chunkMetrics->max_ = Metrics(minMax.second);
|
||||
auto info =
|
||||
ProcessFieldMetrics<int8_t>(typedData, valid_data, count);
|
||||
chunkMetrics->min_ = Metrics(info.min_);
|
||||
chunkMetrics->max_ = Metrics(info.max_);
|
||||
chunkMetrics->null_count_ = info.null_count_;
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
const int16_t* typedData =
|
||||
static_cast<const int16_t*>(chunk_data);
|
||||
std::pair<int16_t, int16_t> minMax =
|
||||
ProcessFieldMetrics<int16_t>(typedData, count);
|
||||
chunkMetrics->min_ = Metrics(minMax.first);
|
||||
chunkMetrics->max_ = Metrics(minMax.second);
|
||||
auto info =
|
||||
ProcessFieldMetrics<int16_t>(typedData, valid_data, count);
|
||||
chunkMetrics->min_ = Metrics(info.min_);
|
||||
chunkMetrics->max_ = Metrics(info.max_);
|
||||
chunkMetrics->null_count_ = info.null_count_;
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
const int32_t* typedData =
|
||||
static_cast<const int32_t*>(chunk_data);
|
||||
std::pair<int32_t, int32_t> minMax =
|
||||
ProcessFieldMetrics<int32_t>(typedData, count);
|
||||
chunkMetrics->min_ = Metrics(minMax.first);
|
||||
chunkMetrics->max_ = Metrics(minMax.second);
|
||||
auto info =
|
||||
ProcessFieldMetrics<int32_t>(typedData, valid_data, count);
|
||||
chunkMetrics->min_ = Metrics(info.min_);
|
||||
chunkMetrics->max_ = Metrics(info.max_);
|
||||
chunkMetrics->null_count_ = info.null_count_;
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
const int64_t* typedData =
|
||||
static_cast<const int64_t*>(chunk_data);
|
||||
std::pair<int64_t, int64_t> minMax =
|
||||
ProcessFieldMetrics<int64_t>(typedData, count);
|
||||
chunkMetrics->min_ = Metrics(minMax.first);
|
||||
chunkMetrics->max_ = Metrics(minMax.second);
|
||||
auto info =
|
||||
ProcessFieldMetrics<int64_t>(typedData, valid_data, count);
|
||||
chunkMetrics->min_ = Metrics(info.min_);
|
||||
chunkMetrics->max_ = Metrics(info.max_);
|
||||
chunkMetrics->null_count_ = info.null_count_;
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
const float* typedData = static_cast<const float*>(chunk_data);
|
||||
std::pair<float, float> minMax =
|
||||
ProcessFieldMetrics<float>(typedData, count);
|
||||
chunkMetrics->min_ = Metrics(minMax.first);
|
||||
chunkMetrics->max_ = Metrics(minMax.second);
|
||||
auto info =
|
||||
ProcessFieldMetrics<float>(typedData, valid_data, count);
|
||||
chunkMetrics->min_ = Metrics(info.min_);
|
||||
chunkMetrics->max_ = Metrics(info.max_);
|
||||
chunkMetrics->null_count_ = info.null_count_;
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
const double* typedData =
|
||||
static_cast<const double*>(chunk_data);
|
||||
std::pair<double, double> minMax =
|
||||
ProcessFieldMetrics<double>(typedData, count);
|
||||
chunkMetrics->min_ = Metrics(minMax.first);
|
||||
chunkMetrics->max_ = Metrics(minMax.second);
|
||||
auto info =
|
||||
ProcessFieldMetrics<double>(typedData, valid_data, count);
|
||||
chunkMetrics->min_ = Metrics(info.min_);
|
||||
chunkMetrics->max_ = Metrics(info.max_);
|
||||
chunkMetrics->null_count_ = info.null_count_;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
chunkMetrics->hasValue_ = chunkMetrics->null_count_ == count ? false : true;
|
||||
std::unique_lock lck(mutex_);
|
||||
if (fieldChunkMetrics_.count(field_id) == 0) {
|
||||
fieldChunkMetrics_.insert(std::make_pair(
|
||||
|
@ -111,21 +118,15 @@ SkipIndex::LoadString(milvus::FieldId field_id,
|
|||
int num_rows = var_column.NumRows();
|
||||
auto chunkMetrics = std::make_unique<FieldChunkMetrics>();
|
||||
if (num_rows > 0) {
|
||||
chunkMetrics->hasValue_ = true;
|
||||
std::string_view min_string = var_column.RawAt(0);
|
||||
std::string_view max_string = var_column.RawAt(0);
|
||||
for (size_t i = 1; i < num_rows; i++) {
|
||||
const auto& val = var_column.RawAt(i);
|
||||
if (val < min_string) {
|
||||
min_string = val;
|
||||
}
|
||||
if (val > max_string) {
|
||||
max_string = val;
|
||||
}
|
||||
}
|
||||
chunkMetrics->min_ = Metrics(min_string);
|
||||
chunkMetrics->max_ = Metrics(max_string);
|
||||
auto info = ProcessStringFieldMetrics(var_column);
|
||||
chunkMetrics->min_ = Metrics(info.min_);
|
||||
chunkMetrics->max_ = Metrics(info.max_);
|
||||
chunkMetrics->null_count_ = info.null_count_;
|
||||
}
|
||||
|
||||
chunkMetrics->hasValue_ =
|
||||
chunkMetrics->null_count_ == num_rows ? false : true;
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
if (fieldChunkMetrics_.count(field_id) == 0) {
|
||||
fieldChunkMetrics_.insert(std::make_pair(
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/Types.h"
|
||||
|
@ -29,6 +30,7 @@ struct FieldChunkMetrics {
|
|||
Metrics min_;
|
||||
Metrics max_;
|
||||
bool hasValue_;
|
||||
int64_t null_count_;
|
||||
|
||||
FieldChunkMetrics() : hasValue_(false){};
|
||||
};
|
||||
|
@ -73,6 +75,7 @@ class SkipIndex {
|
|||
int64_t chunk_id,
|
||||
milvus::DataType data_type,
|
||||
const void* chunk_data,
|
||||
const bool* valid_data,
|
||||
int64_t count);
|
||||
|
||||
void
|
||||
|
@ -217,17 +220,43 @@ class SkipIndex {
|
|||
return should_skip;
|
||||
}
|
||||
|
||||
// todo: support some null_count_ skip
|
||||
|
||||
template <typename T>
|
||||
std::pair<T, T>
|
||||
ProcessFieldMetrics(const T* data, int64_t count) {
|
||||
struct metricInfo {
|
||||
T min_;
|
||||
T max_;
|
||||
int64_t null_count_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
metricInfo<T>
|
||||
ProcessFieldMetrics(const T* data, const bool* valid_data, int64_t count) {
|
||||
//double check to avoid crush
|
||||
if (data == nullptr || count == 0) {
|
||||
return {T(), T()};
|
||||
}
|
||||
T minValue = data[0];
|
||||
T maxValue = data[0];
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
// find first not null value
|
||||
int64_t start = 0;
|
||||
for (int64_t i = start; i < count; i++) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
start++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (start > count - 1) {
|
||||
return {T(), T(), count};
|
||||
}
|
||||
T minValue = data[start];
|
||||
T maxValue = data[start];
|
||||
int64_t null_count = start;
|
||||
for (int64_t i = start; i < count; i++) {
|
||||
T value = data[i];
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
null_count++;
|
||||
continue;
|
||||
}
|
||||
if (value < minValue) {
|
||||
minValue = value;
|
||||
}
|
||||
|
@ -235,7 +264,42 @@ class SkipIndex {
|
|||
maxValue = value;
|
||||
}
|
||||
}
|
||||
return {minValue, maxValue};
|
||||
return {minValue, maxValue, null_count};
|
||||
}
|
||||
|
||||
metricInfo<std::string_view>
|
||||
ProcessStringFieldMetrics(
|
||||
const milvus::VariableColumn<std::string>& var_column) {
|
||||
int num_rows = var_column.NumRows();
|
||||
// find first not null value
|
||||
int64_t start = 0;
|
||||
for (int64_t i = start; i < num_rows; i++) {
|
||||
if (!var_column.IsValid(i)) {
|
||||
start++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (start > num_rows - 1) {
|
||||
return {std::string_view(), std::string_view(), num_rows};
|
||||
}
|
||||
std::string_view min_string = var_column.RawAt(start);
|
||||
std::string_view max_string = var_column.RawAt(start);
|
||||
int64_t null_count = start;
|
||||
for (int64_t i = start; i < num_rows; i++) {
|
||||
const auto& val = var_column.RawAt(i);
|
||||
if (!var_column.IsValid(i)) {
|
||||
null_count++;
|
||||
continue;
|
||||
}
|
||||
if (val < min_string) {
|
||||
min_string = val;
|
||||
}
|
||||
if (val > max_string) {
|
||||
max_string = val;
|
||||
}
|
||||
}
|
||||
return {min_string, max_string, null_count};
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -83,23 +83,29 @@ StringIndexMarisa::BuildWithFieldData(
|
|||
for (const auto& data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (int64_t i = 0; i < slice_num; ++i) {
|
||||
keyset.push_back(
|
||||
(*static_cast<const std::string*>(data->RawValue(i))).c_str());
|
||||
if (data->is_valid(i)) {
|
||||
keyset.push_back(
|
||||
(*static_cast<const std::string*>(data->RawValue(i)))
|
||||
.c_str());
|
||||
}
|
||||
}
|
||||
total_num_rows += slice_num;
|
||||
}
|
||||
trie_.build(keyset);
|
||||
|
||||
// fill str_ids_
|
||||
str_ids_.resize(total_num_rows);
|
||||
str_ids_.resize(total_num_rows, MARISA_NULL_KEY_ID);
|
||||
int64_t offset = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (int64_t i = 0; i < slice_num; ++i) {
|
||||
auto str_id =
|
||||
lookup(*static_cast<const std::string*>(data->RawValue(i)));
|
||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
||||
str_ids_[offset++] = str_id;
|
||||
if (data->is_valid(offset)) {
|
||||
auto str_id =
|
||||
lookup(*static_cast<const std::string*>(data->RawValue(i)));
|
||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
||||
str_ids_[offset] = str_id;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -228,7 +234,7 @@ StringIndexMarisa::Load(milvus::tracer::TraceContext ctx,
|
|||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto size = data->DataSize();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
|
@ -267,6 +273,32 @@ StringIndexMarisa::NotIn(size_t n, const std::string* values) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||
auto offsets = str_ids_to_offsets_[MARISA_NULL_KEY_ID];
|
||||
for (size_t i = 0; i < offsets.size(); i++) {
|
||||
bitset.reset(offsets[i]);
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
StringIndexMarisa::IsNull() {
|
||||
TargetBitmap bitset(str_ids_.size());
|
||||
auto offsets = str_ids_to_offsets_[MARISA_NULL_KEY_ID];
|
||||
for (size_t i = 0; i < offsets.size(); i++) {
|
||||
bitset.set(offsets[i]);
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
StringIndexMarisa::IsNotNull() {
|
||||
TargetBitmap bitset(str_ids_.size());
|
||||
auto offsets = str_ids_to_offsets_[MARISA_NULL_KEY_ID];
|
||||
for (size_t i = 0; i < offsets.size(); i++) {
|
||||
bitset.set(offsets[i]);
|
||||
}
|
||||
bitset.flip();
|
||||
return bitset;
|
||||
}
|
||||
|
||||
|
|
|
@ -69,6 +69,12 @@ class StringIndexMarisa : public StringIndex {
|
|||
const TargetBitmap
|
||||
NotIn(size_t n, const std::string* values) override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
IsNotNull() override;
|
||||
|
||||
const TargetBitmap
|
||||
Range(std::string value, OpType op) override;
|
||||
|
||||
|
|
|
@ -242,14 +242,15 @@ void
|
|||
AssembleIndexDatas(std::map<std::string, FieldDataPtr>& index_datas) {
|
||||
if (index_datas.find(INDEX_FILE_SLICE_META) != index_datas.end()) {
|
||||
auto slice_meta = index_datas.at(INDEX_FILE_SLICE_META);
|
||||
Config meta_data = Config::parse(std::string(
|
||||
static_cast<const char*>(slice_meta->Data()), slice_meta->Size()));
|
||||
Config meta_data = Config::parse(
|
||||
std::string(static_cast<const char*>(slice_meta->Data()),
|
||||
slice_meta->DataSize()));
|
||||
|
||||
for (auto& item : meta_data[META]) {
|
||||
std::string prefix = item[NAME];
|
||||
int slice_num = item[SLICE_NUM];
|
||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
||||
// todo: support nullable index
|
||||
// build index skip null value, so not need to set nullable == true
|
||||
auto new_field_data =
|
||||
storage::CreateFieldData(DataType::INT8, false, 1, total_len);
|
||||
|
||||
|
@ -258,7 +259,7 @@ AssembleIndexDatas(std::map<std::string, FieldDataPtr>& index_datas) {
|
|||
AssertInfo(index_datas.find(file_name) != index_datas.end(),
|
||||
"lost index slice data");
|
||||
auto data = index_datas.at(file_name);
|
||||
auto len = data->Size();
|
||||
auto len = data->DataSize();
|
||||
new_field_data->FillFieldData(data->Data(), len);
|
||||
index_datas.erase(file_name);
|
||||
}
|
||||
|
@ -282,13 +283,13 @@ AssembleIndexDatas(std::map<std::string, FieldDataChannelPtr>& index_datas,
|
|||
index_datas.erase(INDEX_FILE_SLICE_META);
|
||||
Config metadata = Config::parse(
|
||||
std::string(static_cast<const char*>(raw_metadata->Data()),
|
||||
raw_metadata->Size()));
|
||||
raw_metadata->DataSize()));
|
||||
|
||||
for (auto& item : metadata[META]) {
|
||||
std::string prefix = item[NAME];
|
||||
int slice_num = item[SLICE_NUM];
|
||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
||||
// todo: support nullable index
|
||||
// build index skip null value, so not need to set nullable == true
|
||||
auto new_field_data =
|
||||
storage::CreateFieldData(DataType::INT8, false, 1, total_len);
|
||||
|
||||
|
@ -299,7 +300,7 @@ AssembleIndexDatas(std::map<std::string, FieldDataChannelPtr>& index_datas,
|
|||
auto& channel = it->second;
|
||||
auto data_array = storage::CollectFieldDataChannel(channel);
|
||||
auto data = storage::MergeFieldData(data_array);
|
||||
auto len = data->Size();
|
||||
auto len = data->DataSize();
|
||||
new_field_data->FillFieldData(data->Data(), len);
|
||||
index_datas.erase(file_name);
|
||||
}
|
||||
|
|
|
@ -195,7 +195,6 @@ VectorMemIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
std::string prefix = item[NAME];
|
||||
int slice_num = item[SLICE_NUM];
|
||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
||||
// todo: support nullable index
|
||||
auto new_field_data = milvus::storage::CreateFieldData(
|
||||
DataType::INT8, false, 1, total_len);
|
||||
|
||||
|
|
|
@ -245,7 +245,10 @@ class ColumnBase {
|
|||
|
||||
bool
|
||||
IsValid(size_t offset) const {
|
||||
return valid_data_[offset];
|
||||
if (nullable_) {
|
||||
return valid_data_[offset];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -357,8 +357,10 @@ SegmentInternalInterface::LoadPrimitiveSkipIndex(milvus::FieldId field_id,
|
|||
int64_t chunk_id,
|
||||
milvus::DataType data_type,
|
||||
const void* chunk_data,
|
||||
const bool* valid_data,
|
||||
int64_t count) {
|
||||
skip_index_.LoadPrimitive(field_id, chunk_id, data_type, chunk_data, count);
|
||||
skip_index_.LoadPrimitive(
|
||||
field_id, chunk_id, data_type, chunk_data, valid_data, count);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -248,6 +248,7 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
int64_t chunk_id,
|
||||
DataType data_type,
|
||||
const void* chunk_data,
|
||||
const bool* valid_data,
|
||||
int64_t count);
|
||||
|
||||
void
|
||||
|
|
|
@ -423,8 +423,12 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
|
|||
column->AppendBatch(field_data);
|
||||
stats_.mem_size += field_data->Size();
|
||||
}
|
||||
LoadPrimitiveSkipIndex(
|
||||
field_id, 0, data_type, column->Span().data(), num_rows);
|
||||
LoadPrimitiveSkipIndex(field_id,
|
||||
0,
|
||||
data_type,
|
||||
column->Span().data(),
|
||||
column->Span().valid_data(),
|
||||
num_rows);
|
||||
}
|
||||
|
||||
AssertInfo(column->NumRows() == num_rows,
|
||||
|
|
|
@ -213,7 +213,7 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
|||
auto index_chunks = GetObjectData(rcm_.get(), batch_remote_files);
|
||||
for (auto& chunk : index_chunks) {
|
||||
auto index_data = chunk.get()->GetFieldData();
|
||||
auto index_size = index_data->Size();
|
||||
auto index_size = index_data->DataSize();
|
||||
auto chunk_data = reinterpret_cast<uint8_t*>(
|
||||
const_cast<void*>(index_data->Data()));
|
||||
file.Write(chunk_data, index_size);
|
||||
|
|
|
@ -528,6 +528,7 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
|||
IndexMeta index_meta,
|
||||
FieldDataMeta field_meta,
|
||||
std::string object_key) {
|
||||
// index not use valid_data, so no need to set nullable==true
|
||||
auto field_data = CreateFieldData(DataType::INT8, false);
|
||||
field_data->FillFieldData(buf, batch_size);
|
||||
auto indexData = std::make_shared<IndexData>(field_data);
|
||||
|
@ -551,8 +552,8 @@ EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
|
|||
auto dim = IsSparseFloatVectorDataType(field_meta.get_data_type())
|
||||
? -1
|
||||
: field_meta.get_dim();
|
||||
auto field_data = CreateFieldData(
|
||||
field_meta.get_data_type(), field_meta.is_nullable(), dim, 0);
|
||||
auto field_data =
|
||||
CreateFieldData(field_meta.get_data_type(), false, dim, 0);
|
||||
field_data->FillFieldData(buf, element_count);
|
||||
auto insertData = std::make_shared<InsertData>(field_data);
|
||||
insertData->SetFieldDataMeta(field_data_meta);
|
||||
|
|
|
@ -162,6 +162,7 @@ class ArrayBitmapIndexTest : public testing::Test {
|
|||
int64_t index_version) {
|
||||
proto::schema::FieldSchema field_schema;
|
||||
field_schema.set_data_type(proto::schema::DataType::Array);
|
||||
field_schema.set_nullable(nullable_);
|
||||
proto::schema::DataType element_type;
|
||||
if constexpr (std::is_same_v<int8_t, T>) {
|
||||
element_type = proto::schema::DataType::Int8;
|
||||
|
@ -185,9 +186,26 @@ class ArrayBitmapIndexTest : public testing::Test {
|
|||
segment_id, field_id, index_build_id, index_version};
|
||||
|
||||
data_ = GenerateArrayData(element_type, cardinality_, nb_, 10);
|
||||
|
||||
auto field_data = storage::CreateFieldData(DataType::ARRAY);
|
||||
field_data->FillFieldData(data_.data(), data_.size());
|
||||
auto field_data = storage::CreateFieldData(DataType::ARRAY, nullable_);
|
||||
if (nullable_) {
|
||||
valid_data_.reserve(nb_);
|
||||
uint8_t* ptr = new uint8_t[(nb_ + 7) / 8];
|
||||
for (int i = 0; i < nb_; i++) {
|
||||
int byteIndex = i / 8;
|
||||
int bitIndex = i % 8;
|
||||
if (i % 2 == 0) {
|
||||
valid_data_.push_back(true);
|
||||
ptr[byteIndex] |= (1 << bitIndex);
|
||||
} else {
|
||||
valid_data_.push_back(false);
|
||||
ptr[byteIndex] &= ~(1 << bitIndex);
|
||||
}
|
||||
}
|
||||
field_data->FillFieldData(data_.data(), ptr, data_.size());
|
||||
delete[] ptr;
|
||||
} else {
|
||||
field_data->FillFieldData(data_.data(), data_.size());
|
||||
}
|
||||
storage::InsertData insert_data(field_data);
|
||||
insert_data.SetFieldDataMeta(field_meta);
|
||||
insert_data.SetTimestamps(0, 100);
|
||||
|
@ -237,6 +255,7 @@ class ArrayBitmapIndexTest : public testing::Test {
|
|||
SetParam() {
|
||||
nb_ = 10000;
|
||||
cardinality_ = 30;
|
||||
nullable_ = false;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -293,6 +312,9 @@ class ArrayBitmapIndexTest : public testing::Test {
|
|||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
auto ref = [&]() -> bool {
|
||||
milvus::Array array = data_[i];
|
||||
if (nullable_ && !valid_data_[i]) {
|
||||
return false;
|
||||
}
|
||||
for (size_t j = 0; j < array.length(); ++j) {
|
||||
auto val = array.template get_data<T>(j);
|
||||
if (s.find(val) != s.end()) {
|
||||
|
@ -313,7 +335,9 @@ class ArrayBitmapIndexTest : public testing::Test {
|
|||
IndexBasePtr index_;
|
||||
size_t nb_;
|
||||
size_t cardinality_;
|
||||
bool nullable_;
|
||||
std::vector<milvus::Array> data_;
|
||||
FixedVector<bool> valid_data_;
|
||||
};
|
||||
|
||||
TYPED_TEST_SUITE_P(ArrayBitmapIndexTest);
|
||||
|
@ -350,6 +374,7 @@ class ArrayBitmapIndexTestV1 : public ArrayBitmapIndexTest<T> {
|
|||
SetParam() override {
|
||||
this->nb_ = 10000;
|
||||
this->cardinality_ = 200;
|
||||
this->nullable_ = false;
|
||||
}
|
||||
|
||||
virtual ~ArrayBitmapIndexTestV1() {
|
||||
|
@ -363,10 +388,36 @@ TYPED_TEST_P(ArrayBitmapIndexTestV1, CountFuncTest) {
|
|||
EXPECT_EQ(count, this->nb_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class ArrayBitmapIndexTestNullable : public ArrayBitmapIndexTest<T> {
|
||||
public:
|
||||
virtual void
|
||||
SetParam() override {
|
||||
this->nb_ = 10000;
|
||||
this->cardinality_ = 30;
|
||||
this->nullable_ = true;
|
||||
}
|
||||
|
||||
virtual ~ArrayBitmapIndexTestNullable() {
|
||||
}
|
||||
};
|
||||
|
||||
TYPED_TEST_SUITE_P(ArrayBitmapIndexTestNullable);
|
||||
|
||||
TYPED_TEST_P(ArrayBitmapIndexTestNullable, CountFuncTest) {
|
||||
auto count = this->index_->Count();
|
||||
EXPECT_EQ(count, this->nb_);
|
||||
}
|
||||
|
||||
using BitmapTypeV1 = testing::Types<int32_t, int64_t, std::string>;
|
||||
|
||||
REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTestV1, CountFuncTest);
|
||||
REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTestNullable, CountFuncTest);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheckV1,
|
||||
ArrayBitmapIndexTestV1,
|
||||
BitmapTypeV1);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheckV1,
|
||||
ArrayBitmapIndexTestNullable,
|
||||
BitmapTypeV1);
|
|
@ -72,6 +72,7 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
int64_t index_build_id,
|
||||
int64_t index_version) {
|
||||
proto::schema::FieldSchema field_schema;
|
||||
field_schema.set_nullable(nullable_);
|
||||
if constexpr (std::is_same_v<int8_t, T>) {
|
||||
field_schema.set_data_type(proto::schema::DataType::Int8);
|
||||
} else if constexpr (std::is_same_v<int16_t, T>) {
|
||||
|
@ -98,8 +99,26 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
data_.push_back(x);
|
||||
}
|
||||
|
||||
auto field_data = storage::CreateFieldData(type_);
|
||||
field_data->FillFieldData(data_.data(), data_.size());
|
||||
auto field_data = storage::CreateFieldData(type_, nullable_);
|
||||
if (nullable_) {
|
||||
valid_data_.reserve(nb_);
|
||||
uint8_t* ptr = new uint8_t[(nb_ + 7) / 8];
|
||||
for (int i = 0; i < nb_; i++) {
|
||||
int byteIndex = i / 8;
|
||||
int bitIndex = i % 8;
|
||||
if (i % 2 == 0) {
|
||||
valid_data_.push_back(true);
|
||||
ptr[byteIndex] |= (1 << bitIndex);
|
||||
} else {
|
||||
valid_data_.push_back(false);
|
||||
ptr[byteIndex] &= ~(1 << bitIndex);
|
||||
}
|
||||
}
|
||||
field_data->FillFieldData(data_.data(), ptr, data_.size());
|
||||
delete[] ptr;
|
||||
} else {
|
||||
field_data->FillFieldData(data_.data(), data_.size());
|
||||
}
|
||||
storage::InsertData insert_data(field_data);
|
||||
insert_data.SetFieldDataMeta(field_meta);
|
||||
insert_data.SetTimestamps(0, 100);
|
||||
|
@ -149,6 +168,7 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
SetParam() {
|
||||
nb_ = 10000;
|
||||
cardinality_ = 30;
|
||||
nullable_ = false;
|
||||
}
|
||||
void
|
||||
SetUp() override {
|
||||
|
@ -171,7 +191,7 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
int64_t field_id = 101;
|
||||
int64_t index_build_id = 1000;
|
||||
int64_t index_version = 10000;
|
||||
std::string root_path = "/tmp/test-bitmap-index/";
|
||||
std::string root_path = "/tmp/test-bitmap-index";
|
||||
|
||||
storage::StorageConfig storage_config;
|
||||
storage_config.storage_type = "local";
|
||||
|
@ -204,7 +224,11 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||
auto bitset = index_ptr->In(test_data.size(), test_data.data());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end());
|
||||
if (nullable_ && !valid_data_[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,7 +245,39 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||
auto bitset = index_ptr->NotIn(test_data.size(), test_data.data());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_EQ(bitset[i], s.find(data_[i]) == s.end());
|
||||
if (nullable_ && !valid_data_[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_NE(bitset[i], s.find(data_[i]) != s.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TestIsNullFunc() {
|
||||
auto index_ptr =
|
||||
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||
auto bitset = index_ptr->IsNull();
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
if (nullable_ && !valid_data_[i]) {
|
||||
ASSERT_EQ(bitset[i], true);
|
||||
} else {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TestIsNotNullFunc() {
|
||||
auto index_ptr =
|
||||
dynamic_cast<index::HybridScalarIndex<T>*>(index_.get());
|
||||
auto bitset = index_ptr->IsNotNull();
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
if (nullable_ && !valid_data_[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_EQ(bitset[i], true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -250,9 +306,15 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
auto ans = bitset[i];
|
||||
auto should = ref(i);
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "op: " << op << ", @" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
if (nullable_ && !valid_data_[i]) {
|
||||
ASSERT_EQ(ans, false)
|
||||
<< "op: " << op << ", @" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
} else {
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "op: " << op << ", @" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -309,10 +371,17 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
auto ans = bitset[i];
|
||||
auto should = test_case.ref(i);
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "lower:" << test_case.lower_val
|
||||
<< "upper:" << test_case.upper_val << ", @" << i
|
||||
<< ", ans: " << ans << ", ref: " << should;
|
||||
if (nullable_ && !valid_data_[i]) {
|
||||
ASSERT_EQ(ans, false)
|
||||
<< "lower:" << test_case.lower_val
|
||||
<< "upper:" << test_case.upper_val << ", @" << i
|
||||
<< ", ans: " << ans << ", ref: " << false;
|
||||
} else {
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "lower:" << test_case.lower_val
|
||||
<< "upper:" << test_case.upper_val << ", @" << i
|
||||
<< ", ans: " << ans << ", ref: " << should;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -325,6 +394,8 @@ class HybridIndexTestV1 : public testing::Test {
|
|||
size_t cardinality_;
|
||||
boost::container::vector<T> data_;
|
||||
std::shared_ptr<storage::ChunkManager> chunk_manager_;
|
||||
bool nullable_;
|
||||
FixedVector<bool> valid_data_;
|
||||
};
|
||||
|
||||
TYPED_TEST_SUITE_P(HybridIndexTestV1);
|
||||
|
@ -342,6 +413,14 @@ TYPED_TEST_P(HybridIndexTestV1, NotINFuncTest) {
|
|||
this->TestNotInFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestV1, IsNullFuncTest) {
|
||||
this->TestIsNullFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestV1, IsNotNullFuncTest) {
|
||||
this->TestIsNotNullFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestV1, CompareValFuncTest) {
|
||||
this->TestCompareValueFunc();
|
||||
}
|
||||
|
@ -356,6 +435,8 @@ using BitmapType =
|
|||
REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV1,
|
||||
CountFuncTest,
|
||||
INFuncTest,
|
||||
IsNullFuncTest,
|
||||
IsNotNullFuncTest,
|
||||
NotINFuncTest,
|
||||
CompareValFuncTest,
|
||||
TestRangeCompareFuncTest);
|
||||
|
@ -371,6 +452,7 @@ class HybridIndexTestV2 : public HybridIndexTestV1<T> {
|
|||
SetParam() override {
|
||||
this->nb_ = 10000;
|
||||
this->cardinality_ = 2000;
|
||||
this->nullable_ = false;
|
||||
}
|
||||
|
||||
virtual ~HybridIndexTestV2() {
|
||||
|
@ -392,6 +474,14 @@ TYPED_TEST_P(HybridIndexTestV2, NotINFuncTest) {
|
|||
this->TestNotInFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestV2, IsNullFuncTest) {
|
||||
this->TestIsNullFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestV2, IsNotNullFuncTest) {
|
||||
this->TestIsNotNullFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestV2, CompareValFuncTest) {
|
||||
this->TestCompareValueFunc();
|
||||
}
|
||||
|
@ -400,12 +490,68 @@ TYPED_TEST_P(HybridIndexTestV2, TestRangeCompareFuncTest) {
|
|||
this->TestRangeCompareFunc();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class HybridIndexTestNullable : public HybridIndexTestV1<T> {
|
||||
public:
|
||||
virtual void
|
||||
SetParam() override {
|
||||
this->nb_ = 10000;
|
||||
this->cardinality_ = 2000;
|
||||
this->nullable_ = true;
|
||||
}
|
||||
|
||||
virtual ~HybridIndexTestNullable() {
|
||||
}
|
||||
};
|
||||
|
||||
TYPED_TEST_SUITE_P(HybridIndexTestNullable);
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestNullable, CountFuncTest) {
|
||||
auto count = this->index_->Count();
|
||||
EXPECT_EQ(count, this->nb_);
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestNullable, INFuncTest) {
|
||||
this->TestInFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestNullable, NotINFuncTest) {
|
||||
this->TestNotInFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestNullable, IsNullFuncTest) {
|
||||
this->TestIsNullFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestNullable, IsNotNullFuncTest) {
|
||||
this->TestIsNotNullFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestNullable, CompareValFuncTest) {
|
||||
this->TestCompareValueFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(HybridIndexTestNullable, TestRangeCompareFuncTest) {
|
||||
this->TestRangeCompareFunc();
|
||||
}
|
||||
|
||||
using BitmapType =
|
||||
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;
|
||||
|
||||
REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV2,
|
||||
CountFuncTest,
|
||||
INFuncTest,
|
||||
IsNullFuncTest,
|
||||
IsNotNullFuncTest,
|
||||
NotINFuncTest,
|
||||
CompareValFuncTest,
|
||||
TestRangeCompareFuncTest);
|
||||
|
||||
REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestNullable,
|
||||
CountFuncTest,
|
||||
INFuncTest,
|
||||
IsNullFuncTest,
|
||||
IsNotNullFuncTest,
|
||||
NotINFuncTest,
|
||||
CompareValFuncTest,
|
||||
TestRangeCompareFuncTest);
|
||||
|
@ -413,3 +559,7 @@ REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV2,
|
|||
INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_HighCardinality,
|
||||
HybridIndexTestV2,
|
||||
BitmapType);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_Nullable,
|
||||
HybridIndexTestNullable,
|
||||
BitmapType);
|
||||
|
|
|
@ -32,8 +32,8 @@ gen_field_meta(int64_t collection_id = 1,
|
|||
int64_t segment_id = 3,
|
||||
int64_t field_id = 101,
|
||||
DataType data_type = DataType::NONE,
|
||||
DataType element_type = DataType::NONE)
|
||||
-> storage::FieldDataMeta {
|
||||
DataType element_type = DataType::NONE,
|
||||
bool nullable = false) -> storage::FieldDataMeta {
|
||||
auto meta = storage::FieldDataMeta{
|
||||
.collection_id = collection_id,
|
||||
.partition_id = partition_id,
|
||||
|
@ -44,6 +44,7 @@ gen_field_meta(int64_t collection_id = 1,
|
|||
static_cast<proto::schema::DataType>(data_type));
|
||||
meta.field_schema.set_element_type(
|
||||
static_cast<proto::schema::DataType>(element_type));
|
||||
meta.field_schema.set_nullable(nullable);
|
||||
return meta;
|
||||
}
|
||||
|
||||
|
@ -92,7 +93,10 @@ struct ChunkManagerWrapper {
|
|||
};
|
||||
} // namespace milvus::test
|
||||
|
||||
template <typename T, DataType dtype, DataType element_type = DataType::NONE>
|
||||
template <typename T,
|
||||
DataType dtype,
|
||||
DataType element_type = DataType::NONE,
|
||||
bool nullable = false>
|
||||
void
|
||||
test_run() {
|
||||
int64_t collection_id = 1;
|
||||
|
@ -102,8 +106,13 @@ test_run() {
|
|||
int64_t index_build_id = 1000;
|
||||
int64_t index_version = 10000;
|
||||
|
||||
auto field_meta = test::gen_field_meta(
|
||||
collection_id, partition_id, segment_id, field_id, dtype, element_type);
|
||||
auto field_meta = test::gen_field_meta(collection_id,
|
||||
partition_id,
|
||||
segment_id,
|
||||
field_id,
|
||||
dtype,
|
||||
element_type,
|
||||
nullable);
|
||||
auto index_meta = test::gen_index_meta(
|
||||
segment_id, field_id, index_build_id, index_version);
|
||||
|
||||
|
@ -114,6 +123,7 @@ test_run() {
|
|||
size_t nb = 10000;
|
||||
std::vector<T> data_gen;
|
||||
boost::container::vector<T> data;
|
||||
FixedVector<bool> valid_data;
|
||||
if constexpr (!std::is_same_v<T, bool>) {
|
||||
data_gen = GenSortedArr<T>(nb);
|
||||
} else {
|
||||
|
@ -121,12 +131,36 @@ test_run() {
|
|||
data_gen.push_back(rand() % 2 == 0);
|
||||
}
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data.reserve(nb);
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
valid_data.push_back(rand() % 2 == 0);
|
||||
}
|
||||
}
|
||||
for (auto x : data_gen) {
|
||||
data.push_back(x);
|
||||
}
|
||||
|
||||
auto field_data = storage::CreateFieldData(dtype);
|
||||
field_data->FillFieldData(data.data(), data.size());
|
||||
auto field_data = storage::CreateFieldData(dtype, nullable);
|
||||
if (nullable) {
|
||||
int byteSize = (nb + 7) / 8;
|
||||
uint8_t* valid_data_ = new uint8_t[byteSize];
|
||||
for (int i = 0; i < nb; i++) {
|
||||
bool value = valid_data[i];
|
||||
int byteIndex = i / 8;
|
||||
int bitIndex = i % 8;
|
||||
if (value) {
|
||||
valid_data_[byteIndex] |= (1 << bitIndex);
|
||||
} else {
|
||||
valid_data_[byteIndex] &= ~(1 << bitIndex);
|
||||
}
|
||||
}
|
||||
field_data->FillFieldData(data.data(), valid_data_, data.size());
|
||||
delete[] valid_data_;
|
||||
} else {
|
||||
field_data->FillFieldData(data.data(), data.size());
|
||||
}
|
||||
// std::cout << "length:" << field_data->get_num_rows() << std::endl;
|
||||
storage::InsertData insert_data(field_data);
|
||||
insert_data.SetFieldDataMeta(field_meta);
|
||||
insert_data.SetTimestamps(0, 100);
|
||||
|
@ -197,7 +231,11 @@ test_run() {
|
|||
real_index->In(test_data.size(), test_data.data());
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_EQ(bitset[i], s.find(data[i]) != s.end());
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_EQ(bitset[i], s.find(data[i]) != s.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,7 +251,35 @@ test_run() {
|
|||
real_index->NotIn(test_data.size(), test_data.data());
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_NE(bitset[i], s.find(data[i]) != s.end());
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_NE(bitset[i], s.find(data[i]) != s.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = real_index->IsNull();
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], true);
|
||||
} else {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = real_index->IsNotNull();
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_EQ(bitset[i], true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -241,12 +307,16 @@ test_run() {
|
|||
for (const auto& [test_value, op, ref] : test_cases) {
|
||||
auto bitset = real_index->Range(test_value, op);
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
auto ans = bitset[i];
|
||||
auto should = ref(i);
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "op: " << op << ", @" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
} else {
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "op: " << op << ", @" << i
|
||||
<< ", ans: " << ans << ", ref: " << should;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -287,11 +357,16 @@ test_run() {
|
|||
auto bitset =
|
||||
real_index->Range(lb, lb_inclusive, ub, ub_inclusive);
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
auto ans = bitset[i];
|
||||
auto should = ref(i);
|
||||
ASSERT_EQ(ans, should) << "@" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
} else {
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "@" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -299,6 +374,7 @@ test_run() {
|
|||
}
|
||||
}
|
||||
|
||||
template <bool nullable = false>
|
||||
void
|
||||
test_string() {
|
||||
using T = std::string;
|
||||
|
@ -316,7 +392,8 @@ test_string() {
|
|||
segment_id,
|
||||
field_id,
|
||||
dtype,
|
||||
DataType::NONE);
|
||||
DataType::NONE,
|
||||
nullable);
|
||||
auto index_meta = test::gen_index_meta(
|
||||
segment_id, field_id, index_build_id, index_version);
|
||||
|
||||
|
@ -326,12 +403,36 @@ test_string() {
|
|||
|
||||
size_t nb = 10000;
|
||||
boost::container::vector<T> data;
|
||||
FixedVector<bool> valid_data;
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
data.push_back(std::to_string(rand()));
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data.reserve(nb);
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
valid_data.push_back(rand() % 2 == 0);
|
||||
}
|
||||
}
|
||||
|
||||
auto field_data = storage::CreateFieldData(dtype, false);
|
||||
field_data->FillFieldData(data.data(), data.size());
|
||||
auto field_data = storage::CreateFieldData(dtype, nullable);
|
||||
if (nullable) {
|
||||
int byteSize = (nb + 7) / 8;
|
||||
uint8_t* valid_data_ = new uint8_t[byteSize];
|
||||
for (int i = 0; i < nb; i++) {
|
||||
bool value = valid_data[i];
|
||||
int byteIndex = i / 8;
|
||||
int bitIndex = i % 8;
|
||||
if (value) {
|
||||
valid_data_[byteIndex] |= (1 << bitIndex);
|
||||
} else {
|
||||
valid_data_[byteIndex] &= ~(1 << bitIndex);
|
||||
}
|
||||
}
|
||||
field_data->FillFieldData(data.data(), valid_data_, data.size());
|
||||
delete[] valid_data_;
|
||||
} else {
|
||||
field_data->FillFieldData(data.data(), data.size());
|
||||
}
|
||||
storage::InsertData insert_data(field_data);
|
||||
insert_data.SetFieldDataMeta(field_meta);
|
||||
insert_data.SetTimestamps(0, 100);
|
||||
|
@ -399,7 +500,11 @@ test_string() {
|
|||
auto bitset = real_index->In(test_data.size(), test_data.data());
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_EQ(bitset[i], s.find(data[i]) != s.end());
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_EQ(bitset[i], s.find(data[i]) != s.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -414,7 +519,11 @@ test_string() {
|
|||
auto bitset = real_index->NotIn(test_data.size(), test_data.data());
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_NE(bitset[i], s.find(data[i]) != s.end());
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
ASSERT_NE(bitset[i], s.find(data[i]) != s.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -441,9 +550,13 @@ test_string() {
|
|||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
auto ans = bitset[i];
|
||||
auto should = ref(i);
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "op: " << op << ", @" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
} else {
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "op: " << op << ", @" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -484,11 +597,15 @@ test_string() {
|
|||
auto bitset =
|
||||
real_index->Range(lb, lb_inclusive, ub, ub_inclusive);
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
auto ans = bitset[i];
|
||||
auto should = ref(i);
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "@" << i << ", ans: " << ans << ", ref: " << should;
|
||||
if (nullable && !valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
} else {
|
||||
ASSERT_EQ(ans, should) << "@" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -501,7 +618,11 @@ test_string() {
|
|||
auto bitset = real_index->Query(dataset);
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_EQ(bitset[i], boost::starts_with(data[i], prefix));
|
||||
auto should = boost::starts_with(data[i], prefix);
|
||||
if (nullable && !valid_data[i]) {
|
||||
should = false;
|
||||
}
|
||||
ASSERT_EQ(bitset[i], should);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -511,7 +632,11 @@ test_string() {
|
|||
auto bitset = real_index->RegexQuery(prefix + "(.|\n)*");
|
||||
ASSERT_EQ(cnt, bitset.size());
|
||||
for (size_t i = 0; i < bitset.size(); i++) {
|
||||
ASSERT_EQ(bitset[i], boost::starts_with(data[i], prefix));
|
||||
auto should = boost::starts_with(data[i], prefix);
|
||||
if (nullable && !valid_data[i]) {
|
||||
should = false;
|
||||
}
|
||||
ASSERT_EQ(bitset[i], should);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -529,4 +654,15 @@ TEST(InvertedIndex, Naive) {
|
|||
test_run<double, DataType::DOUBLE>();
|
||||
|
||||
test_string();
|
||||
test_run<int8_t, DataType::INT8, DataType::NONE, true>();
|
||||
test_run<int16_t, DataType::INT16, DataType::NONE, true>();
|
||||
test_run<int32_t, DataType::INT32, DataType::NONE, true>();
|
||||
test_run<int64_t, DataType::INT64, DataType::NONE, true>();
|
||||
|
||||
test_run<bool, DataType::BOOL, DataType::NONE, true>();
|
||||
|
||||
test_run<float, DataType::FLOAT, DataType::NONE, true>();
|
||||
test_run<double, DataType::DOUBLE, DataType::NONE, true>();
|
||||
|
||||
test_string<true>();
|
||||
}
|
||||
|
|
|
@ -1872,7 +1872,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) {
|
|||
storage::CreateFieldData(DataType::INT64, false, 1, 10);
|
||||
pk_field_data->FillFieldData(pks.data(), N);
|
||||
segment->LoadPrimitiveSkipIndex(
|
||||
pk_fid, 0, DataType::INT64, pk_field_data->Data(), N);
|
||||
pk_fid, 0, DataType::INT64, pk_field_data->Data(), nullptr, N);
|
||||
auto& skip_index = segment->GetSkipIndex();
|
||||
bool equal_5_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(pk_fid, 0, OpType::Equal, 5);
|
||||
|
@ -1914,7 +1914,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) {
|
|||
storage::CreateFieldData(DataType::INT32, false, 1, 10);
|
||||
int32_field_data->FillFieldData(int32s.data(), N);
|
||||
segment->LoadPrimitiveSkipIndex(
|
||||
i32_fid, 0, DataType::INT32, int32_field_data->Data(), N);
|
||||
i32_fid, 0, DataType::INT32, int32_field_data->Data(), nullptr, N);
|
||||
less_than_1_skip =
|
||||
skip_index.CanSkipUnaryRange<int32_t>(i32_fid, 0, OpType::LessThan, 1);
|
||||
ASSERT_TRUE(less_than_1_skip);
|
||||
|
@ -1925,7 +1925,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) {
|
|||
storage::CreateFieldData(DataType::INT16, false, 1, 10);
|
||||
int16_field_data->FillFieldData(int16s.data(), N);
|
||||
segment->LoadPrimitiveSkipIndex(
|
||||
i16_fid, 0, DataType::INT16, int16_field_data->Data(), N);
|
||||
i16_fid, 0, DataType::INT16, int16_field_data->Data(), nullptr, N);
|
||||
bool less_than_12_skip =
|
||||
skip_index.CanSkipUnaryRange<int16_t>(i16_fid, 0, OpType::LessThan, 12);
|
||||
ASSERT_FALSE(less_than_12_skip);
|
||||
|
@ -1936,7 +1936,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) {
|
|||
storage::CreateFieldData(DataType::INT8, false, 1, 10);
|
||||
int8_field_data->FillFieldData(int8s.data(), N);
|
||||
segment->LoadPrimitiveSkipIndex(
|
||||
i8_fid, 0, DataType::INT8, int8_field_data->Data(), N);
|
||||
i8_fid, 0, DataType::INT8, int8_field_data->Data(), nullptr, N);
|
||||
bool greater_than_12_skip = skip_index.CanSkipUnaryRange<int8_t>(
|
||||
i8_fid, 0, OpType::GreaterThan, 12);
|
||||
ASSERT_TRUE(greater_than_12_skip);
|
||||
|
@ -1948,7 +1948,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) {
|
|||
storage::CreateFieldData(DataType::FLOAT, false, 1, 10);
|
||||
float_field_data->FillFieldData(floats.data(), N);
|
||||
segment->LoadPrimitiveSkipIndex(
|
||||
float_fid, 0, DataType::FLOAT, float_field_data->Data(), N);
|
||||
float_fid, 0, DataType::FLOAT, float_field_data->Data(), nullptr, N);
|
||||
greater_than_10_skip = skip_index.CanSkipUnaryRange<float>(
|
||||
float_fid, 0, OpType::GreaterThan, 10.0);
|
||||
ASSERT_TRUE(greater_than_10_skip);
|
||||
|
@ -1960,7 +1960,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) {
|
|||
storage::CreateFieldData(DataType::DOUBLE, false, 1, 10);
|
||||
double_field_data->FillFieldData(doubles.data(), N);
|
||||
segment->LoadPrimitiveSkipIndex(
|
||||
double_fid, 0, DataType::DOUBLE, double_field_data->Data(), N);
|
||||
double_fid, 0, DataType::DOUBLE, double_field_data->Data(), nullptr, N);
|
||||
greater_than_10_skip = skip_index.CanSkipUnaryRange<double>(
|
||||
double_fid, 0, OpType::GreaterThan, 10.0);
|
||||
ASSERT_TRUE(greater_than_10_skip);
|
||||
|
@ -1984,7 +1984,7 @@ TEST(Sealed, SkipIndexSkipBinaryRange) {
|
|||
storage::CreateFieldData(DataType::INT64, false, 1, 10);
|
||||
pk_field_data->FillFieldData(pks.data(), N);
|
||||
segment->LoadPrimitiveSkipIndex(
|
||||
pk_fid, 0, DataType::INT64, pk_field_data->Data(), N);
|
||||
pk_fid, 0, DataType::INT64, pk_field_data->Data(), nullptr, N);
|
||||
auto& skip_index = segment->GetSkipIndex();
|
||||
ASSERT_FALSE(
|
||||
skip_index.CanSkipBinaryRange<int64_t>(pk_fid, 0, -3, 1, true, true));
|
||||
|
@ -2002,6 +2002,117 @@ TEST(Sealed, SkipIndexSkipBinaryRange) {
|
|||
skip_index.CanSkipBinaryRange<int64_t>(pk_fid, 0, 10, 12, true, true));
|
||||
}
|
||||
|
||||
TEST(Sealed, SkipIndexSkipUnaryRangeNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto metrics_type = "L2";
|
||||
auto fake_vec_fid = schema->AddDebugField(
|
||||
"fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type);
|
||||
auto i64_fid = schema->AddDebugField("int64_field", DataType::INT64, true);
|
||||
|
||||
auto dataset = DataGen(schema, 5);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
|
||||
//test for int64
|
||||
std::vector<int64_t> int64s = {1, 2, 3, 4, 5};
|
||||
uint8_t* valid_data = new uint8_t[1]{0x03};
|
||||
FixedVector<bool> valid_data_ = {true, true, false, false, false};
|
||||
auto int64s_field_data =
|
||||
storage::CreateFieldData(DataType::INT64, true, 1, 5);
|
||||
|
||||
int64s_field_data->FillFieldData(int64s.data(), valid_data, 5);
|
||||
segment->LoadPrimitiveSkipIndex(i64_fid,
|
||||
0,
|
||||
DataType::INT64,
|
||||
int64s_field_data->Data(),
|
||||
valid_data_.data(),
|
||||
5);
|
||||
auto& skip_index = segment->GetSkipIndex();
|
||||
bool equal_5_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::Equal, 5);
|
||||
bool equal_4_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::Equal, 4);
|
||||
bool equal_2_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::Equal, 2);
|
||||
bool equal_1_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::Equal, 1);
|
||||
ASSERT_TRUE(equal_5_skip);
|
||||
ASSERT_TRUE(equal_4_skip);
|
||||
ASSERT_FALSE(equal_2_skip);
|
||||
ASSERT_FALSE(equal_1_skip);
|
||||
bool less_than_1_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::LessThan, 1);
|
||||
bool less_than_5_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::LessThan, 5);
|
||||
ASSERT_TRUE(less_than_1_skip);
|
||||
ASSERT_FALSE(less_than_5_skip);
|
||||
bool less_equal_than_1_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::LessEqual, 1);
|
||||
bool less_equal_than_15_skip =
|
||||
skip_index.CanSkipUnaryRange<int64_t>(i64_fid, 0, OpType::LessThan, 15);
|
||||
ASSERT_FALSE(less_equal_than_1_skip);
|
||||
ASSERT_FALSE(less_equal_than_15_skip);
|
||||
bool greater_than_10_skip = skip_index.CanSkipUnaryRange<int64_t>(
|
||||
i64_fid, 0, OpType::GreaterThan, 10);
|
||||
bool greater_than_5_skip = skip_index.CanSkipUnaryRange<int64_t>(
|
||||
i64_fid, 0, OpType::GreaterThan, 5);
|
||||
bool greater_than_2_skip = skip_index.CanSkipUnaryRange<int64_t>(
|
||||
i64_fid, 0, OpType::GreaterThan, 2);
|
||||
bool greater_than_1_skip = skip_index.CanSkipUnaryRange<int64_t>(
|
||||
i64_fid, 0, OpType::GreaterThan, 1);
|
||||
ASSERT_TRUE(greater_than_10_skip);
|
||||
ASSERT_TRUE(greater_than_5_skip);
|
||||
ASSERT_TRUE(greater_than_2_skip);
|
||||
ASSERT_FALSE(greater_than_1_skip);
|
||||
bool greater_equal_than_3_skip = skip_index.CanSkipUnaryRange<int64_t>(
|
||||
i64_fid, 0, OpType::GreaterEqual, 3);
|
||||
bool greater_equal_than_2_skip = skip_index.CanSkipUnaryRange<int64_t>(
|
||||
i64_fid, 0, OpType::GreaterEqual, 2);
|
||||
ASSERT_TRUE(greater_equal_than_3_skip);
|
||||
ASSERT_FALSE(greater_equal_than_2_skip);
|
||||
}
|
||||
|
||||
TEST(Sealed, SkipIndexSkipBinaryRangeNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto metrics_type = "L2";
|
||||
auto fake_vec_fid = schema->AddDebugField(
|
||||
"fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type);
|
||||
auto i64_fid = schema->AddDebugField("int64_field", DataType::INT64, true);
|
||||
auto dataset = DataGen(schema, 5);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
|
||||
//test for int64
|
||||
std::vector<int64_t> int64s = {1, 2, 3, 4, 5};
|
||||
uint8_t* valid_data = new uint8_t[1]{0x03};
|
||||
FixedVector<bool> valid_data_ = {true, true, false, false, false};
|
||||
auto int64s_field_data =
|
||||
storage::CreateFieldData(DataType::INT64, true, 1, 5);
|
||||
|
||||
int64s_field_data->FillFieldData(int64s.data(), valid_data, 5);
|
||||
segment->LoadPrimitiveSkipIndex(i64_fid,
|
||||
0,
|
||||
DataType::INT64,
|
||||
int64s_field_data->Data(),
|
||||
valid_data_.data(),
|
||||
5);
|
||||
auto& skip_index = segment->GetSkipIndex();
|
||||
ASSERT_FALSE(
|
||||
skip_index.CanSkipBinaryRange<int64_t>(i64_fid, 0, -3, 1, true, true));
|
||||
ASSERT_TRUE(
|
||||
skip_index.CanSkipBinaryRange<int64_t>(i64_fid, 0, -3, 1, true, false));
|
||||
|
||||
ASSERT_FALSE(
|
||||
skip_index.CanSkipBinaryRange<int64_t>(i64_fid, 0, 1, 3, true, true));
|
||||
ASSERT_FALSE(
|
||||
skip_index.CanSkipBinaryRange<int64_t>(i64_fid, 0, 1, 2, true, false));
|
||||
|
||||
ASSERT_TRUE(
|
||||
skip_index.CanSkipBinaryRange<int64_t>(i64_fid, 0, 2, 3, false, true));
|
||||
ASSERT_FALSE(
|
||||
skip_index.CanSkipBinaryRange<int64_t>(i64_fid, 0, 2, 3, true, true));
|
||||
}
|
||||
|
||||
TEST(Sealed, SkipIndexSkipStringRange) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
|
|
Loading…
Reference in New Issue