mirror of https://github.com/milvus-io/milvus.git
enhance: support bitmap mmap (#35399)
#32900 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>pull/35808/head
parent
3a0c61a455
commit
4d2f96c760
|
@ -35,6 +35,7 @@
|
|||
#include "simdjson/error.h"
|
||||
#include "simdjson/padded_string.h"
|
||||
#include "rapidjson/document.h"
|
||||
#include "rapidjson/error/en.h"
|
||||
#include "rapidjson/writer.h"
|
||||
#include "rapidjson/stringbuffer.h"
|
||||
|
||||
|
@ -49,7 +50,7 @@ ExtractSubJson(const std::string& json, const std::vector<std::string>& keys) {
|
|||
if (doc.HasParseError()) {
|
||||
PanicInfo(ErrorCode::UnexpectedError,
|
||||
"json parse failed, error:{}",
|
||||
doc.GetParseError());
|
||||
rapidjson::GetParseError_En(doc.GetParseError()));
|
||||
}
|
||||
|
||||
rapidjson::Document result_doc;
|
||||
|
|
|
@ -15,10 +15,14 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <sys/errno.h>
|
||||
#include <unistd.h>
|
||||
#include <yaml-cpp/yaml.h>
|
||||
|
||||
#include "index/BitmapIndex.h"
|
||||
|
||||
#include "common/File.h"
|
||||
#include "common/Slice.h"
|
||||
#include "common/Common.h"
|
||||
#include "index/Meta.h"
|
||||
|
@ -33,8 +37,10 @@ namespace index {
|
|||
template <typename T>
|
||||
BitmapIndex<T>::BitmapIndex(
|
||||
const storage::FileManagerContext& file_manager_context)
|
||||
: is_built_(false),
|
||||
schema_(file_manager_context.fieldDataMeta.field_schema) {
|
||||
: ScalarIndex<T>(BITMAP_INDEX_TYPE),
|
||||
is_built_(false),
|
||||
schema_(file_manager_context.fieldDataMeta.field_schema),
|
||||
is_mmap_(false) {
|
||||
if (file_manager_context.Valid()) {
|
||||
file_manager_ =
|
||||
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
|
||||
|
@ -42,6 +48,19 @@ BitmapIndex<T>::BitmapIndex(
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::UnmapIndexData() {
|
||||
if (mmap_data_ != nullptr && mmap_data_ != MAP_FAILED) {
|
||||
if (munmap(mmap_data_, mmap_size_) != 0) {
|
||||
AssertInfo(
|
||||
true, "failed to unmap bitmap index, err={}", strerror(errno));
|
||||
}
|
||||
mmap_data_ = nullptr;
|
||||
mmap_size_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::Build(const Config& config) {
|
||||
|
@ -348,20 +367,30 @@ BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::BuildOffsetCache() {
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
data_offsets_cache_.resize(total_num_rows_);
|
||||
for (auto it = data_.begin(); it != data_.end(); it++) {
|
||||
for (const auto& v : it->second) {
|
||||
data_offsets_cache_[v] = it;
|
||||
if (is_mmap_) {
|
||||
mmap_offsets_cache_.resize(total_num_rows_);
|
||||
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
|
||||
++it) {
|
||||
for (const auto& v : AccessBitmap(it->second)) {
|
||||
mmap_offsets_cache_[v] = it;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
data_offsets_cache_.resize(total_num_rows_);
|
||||
for (auto it = data_.begin(); it != data_.end(); it++) {
|
||||
for (const auto& v : it->second) {
|
||||
data_offsets_cache_[v] = it;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bitsets_offsets_cache_.resize(total_num_rows_);
|
||||
const auto& bits = it->second;
|
||||
for (int i = 0; i < bits.size(); i++) {
|
||||
if (bits[i]) {
|
||||
bitsets_offsets_cache_[i] = it;
|
||||
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
|
||||
const auto& bits = it->second;
|
||||
for (int i = 0; i < bits.size(); i++) {
|
||||
if (bits[i]) {
|
||||
bitsets_offsets_cache_[i] = it;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -398,6 +427,83 @@ BitmapIndex<std::string>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::DeserializeIndexDataForMmap(const char* data_ptr,
|
||||
size_t index_length) {
|
||||
for (size_t i = 0; i < index_length; ++i) {
|
||||
T key;
|
||||
memcpy(&key, data_ptr, sizeof(T));
|
||||
data_ptr += sizeof(T);
|
||||
|
||||
roaring::Roaring value;
|
||||
value = roaring::Roaring::read(reinterpret_cast<const char*>(data_ptr));
|
||||
auto size = value.getSizeInBytes();
|
||||
|
||||
bitmap_info_map_[key] = {static_cast<size_t>(data_ptr - mmap_data_),
|
||||
size};
|
||||
data_ptr += size;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void
|
||||
BitmapIndex<std::string>::DeserializeIndexDataForMmap(const char* data_ptr,
|
||||
size_t index_length) {
|
||||
for (size_t i = 0; i < index_length; ++i) {
|
||||
size_t key_size;
|
||||
memcpy(&key_size, data_ptr, sizeof(size_t));
|
||||
data_ptr += sizeof(size_t);
|
||||
|
||||
std::string key(reinterpret_cast<const char*>(data_ptr), key_size);
|
||||
data_ptr += key_size;
|
||||
|
||||
roaring::Roaring value;
|
||||
value = roaring::Roaring::read(reinterpret_cast<const char*>(data_ptr));
|
||||
auto size = value.getSizeInBytes();
|
||||
|
||||
bitmap_info_map_[key] = {static_cast<size_t>(data_ptr - mmap_data_),
|
||||
size};
|
||||
data_ptr += size;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::MMapIndexData(const std::string& file_name,
|
||||
const uint8_t* data_ptr,
|
||||
size_t data_size,
|
||||
size_t index_length) {
|
||||
std::filesystem::create_directories(
|
||||
std::filesystem::path(file_name).parent_path());
|
||||
|
||||
auto file = File::Open(file_name, O_RDWR | O_CREAT | O_TRUNC);
|
||||
auto written = file.Write(data_ptr, data_size);
|
||||
if (written != data_size) {
|
||||
file.Close();
|
||||
remove(file_name.c_str());
|
||||
PanicInfo(ErrorCode::UnistdError,
|
||||
fmt::format("write index to fd error: {}", strerror(errno)));
|
||||
}
|
||||
|
||||
file.Seek(0, SEEK_SET);
|
||||
mmap_data_ = static_cast<char*>(
|
||||
mmap(NULL, data_size, PROT_READ, MAP_PRIVATE, file.Descriptor(), 0));
|
||||
if (mmap_data_ == MAP_FAILED) {
|
||||
file.Close();
|
||||
remove(file_name.c_str());
|
||||
PanicInfo(
|
||||
ErrorCode::UnexpectedError, "failed to mmap: {}", strerror(errno));
|
||||
}
|
||||
|
||||
mmap_size_ = data_size;
|
||||
unlink(file_name.c_str());
|
||||
|
||||
char* ptr = mmap_data_;
|
||||
DeserializeIndexDataForMmap(ptr, index_length);
|
||||
is_mmap_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
||||
|
@ -413,15 +519,37 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
|||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
|
||||
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
|
||||
DeserializeIndexData(index_data_buffer->data.get(), index_length);
|
||||
|
||||
ChooseIndexLoadMode(index_length);
|
||||
|
||||
// only using mmap when build mode is raw roaring bitmap
|
||||
if (config.contains(MMAP_FILE_PATH) &&
|
||||
build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
auto mmap_filepath =
|
||||
GetValueFromConfig<std::string>(config, MMAP_FILE_PATH);
|
||||
AssertInfo(mmap_filepath.has_value(),
|
||||
"mmap filepath is empty when load index");
|
||||
MMapIndexData(mmap_filepath.value(),
|
||||
index_data_buffer->data.get(),
|
||||
index_data_buffer->size,
|
||||
index_length);
|
||||
} else {
|
||||
DeserializeIndexData(index_data_buffer->data.get(), index_length);
|
||||
}
|
||||
|
||||
if (enable_offset_cache.has_value() && enable_offset_cache.value()) {
|
||||
BuildOffsetCache();
|
||||
}
|
||||
|
||||
LOG_INFO("load bitmap index with cardinality = {}, num_rows = {}",
|
||||
Cardinality(),
|
||||
total_num_rows_);
|
||||
auto file_index_meta = file_manager_->GetIndexMeta();
|
||||
LOG_INFO(
|
||||
"load bitmap index with cardinality = {}, num_rows = {} for segment_id "
|
||||
"= {}, field_id = {}, mmap = {}",
|
||||
Cardinality(),
|
||||
total_num_rows_,
|
||||
file_index_meta.segment_id,
|
||||
file_index_meta.field_id,
|
||||
is_mmap_);
|
||||
|
||||
is_built_ = true;
|
||||
}
|
||||
|
@ -429,6 +557,7 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
|||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::Load(milvus::tracer::TraceContext ctx, const Config& config) {
|
||||
LOG_DEBUG("load bitmap index with config {}", config.dump());
|
||||
auto index_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
|
||||
AssertInfo(index_files.has_value(),
|
||||
|
@ -453,6 +582,18 @@ BitmapIndex<T>::In(const size_t n, const T* values) {
|
|||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
|
||||
if (is_mmap_) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto val = values[i];
|
||||
auto it = bitmap_info_map_.find(val);
|
||||
if (it != bitmap_info_map_.end()) {
|
||||
for (const auto& v : AccessBitmap(it->second)) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto val = values[i];
|
||||
|
@ -479,6 +620,19 @@ const TargetBitmap
|
|||
BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
|
||||
if (is_mmap_) {
|
||||
TargetBitmap res(total_num_rows_, true);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto val = values[i];
|
||||
auto it = bitmap_info_map_.find(val);
|
||||
if (it != bitmap_info_map_.end()) {
|
||||
for (const auto& v : AccessBitmap(it->second)) {
|
||||
res.reset(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
TargetBitmap res(total_num_rows_, true);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
|
@ -590,12 +744,76 @@ BitmapIndex<T>::RangeForBitset(const T value, const OpType op) {
|
|||
template <typename T>
|
||||
const TargetBitmap
|
||||
BitmapIndex<T>::Range(const T value, OpType op) {
|
||||
if (is_mmap_) {
|
||||
return std::move(RangeForMmap(value, op));
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
return std::move(RangeForRoaring(value, op));
|
||||
} else {
|
||||
return std::move(RangeForBitset(value, op));
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
BitmapIndex<T>::RangeForMmap(const T value, const OpType op) {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
if (ShouldSkip(value, value, op)) {
|
||||
return res;
|
||||
}
|
||||
auto lb = bitmap_info_map_.begin();
|
||||
auto ub = bitmap_info_map_.end();
|
||||
|
||||
switch (op) {
|
||||
case OpType::LessThan: {
|
||||
ub = std::lower_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
break;
|
||||
}
|
||||
case OpType::LessEqual: {
|
||||
ub = std::upper_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterThan: {
|
||||
lb = std::upper_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterEqual: {
|
||||
lb = std::lower_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("Invalid OperatorType: {}", op));
|
||||
}
|
||||
}
|
||||
|
||||
for (; lb != ub; lb++) {
|
||||
for (const auto& v : AccessBitmap(lb->second)) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
|
@ -721,6 +939,10 @@ BitmapIndex<T>::Range(const T lower_value,
|
|||
bool lb_inclusive,
|
||||
const T upper_value,
|
||||
bool ub_inclusive) {
|
||||
if (is_mmap_) {
|
||||
return RangeForMmap(
|
||||
lower_value, lb_inclusive, upper_value, ub_inclusive);
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
return RangeForRoaring(
|
||||
lower_value, lb_inclusive, upper_value, ub_inclusive);
|
||||
|
@ -730,6 +952,65 @@ BitmapIndex<T>::Range(const T lower_value,
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
BitmapIndex<T>::RangeForMmap(const T lower_value,
|
||||
bool lb_inclusive,
|
||||
const T upper_value,
|
||||
bool ub_inclusive) {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
if (lower_value > upper_value ||
|
||||
(lower_value == upper_value && !(lb_inclusive && ub_inclusive))) {
|
||||
return res;
|
||||
}
|
||||
if (ShouldSkip(lower_value, upper_value, OpType::Range)) {
|
||||
return res;
|
||||
}
|
||||
|
||||
auto lb = bitmap_info_map_.begin();
|
||||
auto ub = bitmap_info_map_.end();
|
||||
|
||||
if (lb_inclusive) {
|
||||
lb = std::lower_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(lower_value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
} else {
|
||||
lb = std::upper_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(lower_value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
}
|
||||
|
||||
if (ub_inclusive) {
|
||||
ub = std::upper_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(upper_value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
} else {
|
||||
ub = std::lower_bound(bitmap_info_map_.begin(),
|
||||
bitmap_info_map_.end(),
|
||||
std::make_pair(upper_value, TargetBitmap()),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
}
|
||||
|
||||
for (; lb != ub; lb++) {
|
||||
for (const auto& v : AccessBitmap(lb->second)) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
BitmapIndex<T>::RangeForRoaring(const T lower_value,
|
||||
|
@ -792,6 +1073,11 @@ BitmapIndex<T>::RangeForRoaring(const T lower_value,
|
|||
template <typename T>
|
||||
T
|
||||
BitmapIndex<T>::Reverse_Lookup_InCache(size_t idx) const {
|
||||
if (is_mmap_) {
|
||||
Assert(build_mode_ == BitmapIndexBuildMode::ROARING);
|
||||
return mmap_offsets_cache_[idx]->first;
|
||||
}
|
||||
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
return data_offsets_cache_[idx]->first;
|
||||
} else {
|
||||
|
@ -809,18 +1095,29 @@ BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
|||
return Reverse_Lookup_InCache(idx);
|
||||
}
|
||||
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
for (auto it = data_.begin(); it != data_.end(); it++) {
|
||||
for (const auto& v : it->second) {
|
||||
if (is_mmap_) {
|
||||
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
|
||||
it++) {
|
||||
for (const auto& v : AccessBitmap(it->second)) {
|
||||
if (v == idx) {
|
||||
return it->first;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
|
||||
if (it->second[idx]) {
|
||||
return it->first;
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
for (auto it = data_.begin(); it != data_.end(); it++) {
|
||||
for (const auto& v : it->second) {
|
||||
if (v == idx) {
|
||||
return it->first;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
|
||||
if (it->second[idx]) {
|
||||
return it->first;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -873,6 +1170,15 @@ BitmapIndex<T>::ShouldSkip(const T lower_value,
|
|||
return should_skip;
|
||||
};
|
||||
|
||||
if (is_mmap_) {
|
||||
if (!bitmap_info_map_.empty()) {
|
||||
auto lower_bound = bitmap_info_map_.begin()->first;
|
||||
auto upper_bound = bitmap_info_map_.rbegin()->first;
|
||||
bool should_skip = skip(op, lower_bound, upper_bound);
|
||||
return should_skip;
|
||||
}
|
||||
}
|
||||
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
if (!data_.empty()) {
|
||||
auto lower_bound = data_.begin()->first;
|
||||
|
@ -906,6 +1212,19 @@ BitmapIndex<std::string>::Query(const DatasetPtr& dataset) {
|
|||
if (op == OpType::PrefixMatch) {
|
||||
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
if (is_mmap_) {
|
||||
for (auto it = bitmap_info_map_.begin();
|
||||
it != bitmap_info_map_.end();
|
||||
++it) {
|
||||
const auto& key = it->first;
|
||||
if (milvus::query::Match(key, prefix, op)) {
|
||||
for (const auto& v : AccessBitmap(it->second)) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
for (auto it = data_.begin(); it != data_.end(); ++it) {
|
||||
const auto& key = it->first;
|
||||
|
@ -943,6 +1262,18 @@ BitmapIndex<std::string>::RegexQuery(const std::string& regex_pattern) {
|
|||
AssertInfo(is_built_, "index has not been built");
|
||||
RegexMatcher matcher(regex_pattern);
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
if (is_mmap_) {
|
||||
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
|
||||
++it) {
|
||||
const auto& key = it->first;
|
||||
if (matcher(key)) {
|
||||
for (const auto& v : AccessBitmap(it->second)) {
|
||||
res.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
for (auto it = data_.begin(); it != data_.end(); ++it) {
|
||||
const auto& key = it->first;
|
||||
|
|
|
@ -30,6 +30,11 @@
|
|||
namespace milvus {
|
||||
namespace index {
|
||||
|
||||
struct BitmapInfo {
|
||||
size_t offset_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
enum class BitmapIndexBuildMode {
|
||||
ROARING,
|
||||
BITSET,
|
||||
|
@ -46,7 +51,11 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
const storage::FileManagerContext& file_manager_context =
|
||||
storage::FileManagerContext());
|
||||
|
||||
~BitmapIndex() override = default;
|
||||
~BitmapIndex() {
|
||||
if (is_mmap_) {
|
||||
UnmapIndexData();
|
||||
}
|
||||
}
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config) override;
|
||||
|
@ -146,6 +155,10 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
public:
|
||||
int64_t
|
||||
Cardinality() {
|
||||
if (is_mmap_) {
|
||||
return bitmap_info_map_.size();
|
||||
}
|
||||
|
||||
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
|
||||
return data_.size();
|
||||
} else {
|
||||
|
@ -172,6 +185,9 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
std::pair<size_t, size_t>
|
||||
DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size);
|
||||
|
||||
void
|
||||
DeserializeIndexDataForMmap(const char* data_ptr, size_t index_length);
|
||||
|
||||
void
|
||||
DeserializeIndexData(const uint8_t* data_ptr, size_t index_length);
|
||||
|
||||
|
@ -196,6 +212,9 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
TargetBitmap
|
||||
RangeForBitset(T value, OpType op);
|
||||
|
||||
TargetBitmap
|
||||
RangeForMmap(T value, OpType op);
|
||||
|
||||
TargetBitmap
|
||||
RangeForRoaring(T lower_bound_value,
|
||||
bool lb_inclusive,
|
||||
|
@ -208,12 +227,35 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
T upper_bound_value,
|
||||
bool ub_inclusive);
|
||||
|
||||
TargetBitmap
|
||||
RangeForMmap(T lower_bound_value,
|
||||
bool lb_inclusive,
|
||||
T upper_bound_value,
|
||||
bool ub_inclusive);
|
||||
|
||||
void
|
||||
MMapIndexData(const std::string& filepath,
|
||||
const uint8_t* data,
|
||||
size_t data_size,
|
||||
size_t index_length);
|
||||
|
||||
roaring::Roaring
|
||||
AccessBitmap(const BitmapInfo& info) const {
|
||||
return roaring::Roaring::read(mmap_data_ + info.offset_, info.size_);
|
||||
}
|
||||
|
||||
void
|
||||
UnmapIndexData();
|
||||
|
||||
public:
|
||||
bool is_built_{false};
|
||||
Config config_;
|
||||
BitmapIndexBuildMode build_mode_;
|
||||
std::map<T, roaring::Roaring> data_;
|
||||
std::map<T, TargetBitmap> bitsets_;
|
||||
bool is_mmap_{false};
|
||||
char* mmap_data_;
|
||||
int64_t mmap_size_;
|
||||
std::map<T, BitmapInfo> bitmap_info_map_;
|
||||
size_t total_num_rows_{0};
|
||||
proto::schema::FieldSchema schema_;
|
||||
bool use_offset_cache_{false};
|
||||
|
@ -221,6 +263,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
data_offsets_cache_;
|
||||
std::vector<typename std::map<T, TargetBitmap>::iterator>
|
||||
bitsets_offsets_cache_;
|
||||
std::vector<typename std::map<T, BitmapInfo>::iterator> mmap_offsets_cache_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
|
||||
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
||||
|
|
|
@ -30,7 +30,8 @@ namespace index {
|
|||
template <typename T>
|
||||
HybridScalarIndex<T>::HybridScalarIndex(
|
||||
const storage::FileManagerContext& file_manager_context)
|
||||
: is_built_(false),
|
||||
: ScalarIndex<T>(HYBRID_INDEX_TYPE),
|
||||
is_built_(false),
|
||||
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
|
||||
file_manager_context_(file_manager_context) {
|
||||
if (file_manager_context.Valid()) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "knowhere/dataset.h"
|
||||
#include "common/Tracer.h"
|
||||
#include "common/Types.h"
|
||||
#include "index/Meta.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
|
@ -73,7 +74,10 @@ class IndexBase {
|
|||
index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP ||
|
||||
index_type_ ==
|
||||
knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
|
||||
index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND;
|
||||
index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND ||
|
||||
// support mmap for bitmap/hybrid index
|
||||
index_type_ == milvus::index::BITMAP_INDEX_TYPE ||
|
||||
index_type_ == milvus::index::HYBRID_INDEX_TYPE;
|
||||
}
|
||||
|
||||
const IndexType&
|
||||
|
|
|
@ -71,7 +71,8 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
|
|||
template <typename T>
|
||||
InvertedIndexTantivy<T>::InvertedIndexTantivy(
|
||||
const storage::FileManagerContext& ctx)
|
||||
: schema_(ctx.fieldDataMeta.field_schema) {
|
||||
: ScalarIndex<T>(INVERTED_INDEX_TYPE),
|
||||
schema_(ctx.fieldDataMeta.field_schema) {
|
||||
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
|
||||
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
|
||||
auto field =
|
||||
|
|
|
@ -35,7 +35,9 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
using DiskFileManager = storage::DiskFileManagerImpl;
|
||||
using DiskFileManagerPtr = std::shared_ptr<DiskFileManager>;
|
||||
|
||||
InvertedIndexTantivy() = default;
|
||||
InvertedIndexTantivy() : ScalarIndex<T>(INVERTED_INDEX_TYPE) {
|
||||
}
|
||||
|
||||
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx);
|
||||
|
||||
~InvertedIndexTantivy();
|
||||
|
|
|
@ -60,6 +60,9 @@ ToString(ScalarIndexType type) {
|
|||
template <typename T>
|
||||
class ScalarIndex : public IndexBase {
|
||||
public:
|
||||
ScalarIndex(const std::string& index_type) : IndexBase(index_type) {
|
||||
}
|
||||
|
||||
void
|
||||
BuildWithRawData(size_t n,
|
||||
const void* values,
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace milvus::index {
|
|||
template <typename T>
|
||||
ScalarIndexSort<T>::ScalarIndexSort(
|
||||
const storage::FileManagerContext& file_manager_context)
|
||||
: is_built_(false), data_() {
|
||||
: ScalarIndex<T>(ASCENDING_SORT), is_built_(false), data_() {
|
||||
if (file_manager_context.Valid()) {
|
||||
file_manager_ =
|
||||
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
|
||||
|
|
|
@ -29,6 +29,10 @@ namespace milvus::index {
|
|||
|
||||
class StringIndex : public ScalarIndex<std::string> {
|
||||
public:
|
||||
StringIndex(const std::string& index_type)
|
||||
: ScalarIndex<std::string>(index_type) {
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
Query(const DatasetPtr& dataset) override {
|
||||
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
|
||||
|
|
|
@ -40,7 +40,8 @@
|
|||
namespace milvus::index {
|
||||
|
||||
StringIndexMarisa::StringIndexMarisa(
|
||||
const storage::FileManagerContext& file_manager_context) {
|
||||
const storage::FileManagerContext& file_manager_context)
|
||||
: StringIndex(MARISA_TRIE) {
|
||||
if (file_manager_context.Valid()) {
|
||||
file_manager_ =
|
||||
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
|
||||
|
|
|
@ -31,6 +31,9 @@ namespace milvus::storage {
|
|||
struct FileManagerContext {
|
||||
FileManagerContext() : chunkManagerPtr(nullptr) {
|
||||
}
|
||||
FileManagerContext(const ChunkManagerPtr& chunkManagerPtr)
|
||||
: chunkManagerPtr(chunkManagerPtr) {
|
||||
}
|
||||
FileManagerContext(const FieldDataMeta& fieldDataMeta,
|
||||
const IndexMeta& indexMeta,
|
||||
const ChunkManagerPtr& chunkManagerPtr)
|
||||
|
|
|
@ -105,7 +105,7 @@ class BitmapIndexTest : public testing::Test {
|
|||
auto serialized_bytes = insert_data.Serialize(storage::Remote);
|
||||
|
||||
auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}",
|
||||
"/tmp/test_bitmap/",
|
||||
"/tmp/test-bitmap-index/",
|
||||
collection_id,
|
||||
partition_id,
|
||||
segment_id,
|
||||
|
@ -137,6 +137,16 @@ class BitmapIndexTest : public testing::Test {
|
|||
|
||||
config["index_files"] = index_files;
|
||||
|
||||
if (is_mmap_) {
|
||||
config["enable_mmap"] = "true";
|
||||
config["mmap_filepath"] = fmt::format("/{}/{}/{}/{}/{}",
|
||||
"/tmp/test-bitmap-index/",
|
||||
collection_id,
|
||||
1,
|
||||
segment_id,
|
||||
field_id);
|
||||
;
|
||||
}
|
||||
index_ =
|
||||
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
|
||||
index_->Load(milvus::tracer::TraceContext{}, config);
|
||||
|
@ -247,7 +257,7 @@ class BitmapIndexTest : public testing::Test {
|
|||
auto should = ref(i);
|
||||
ASSERT_EQ(ans, should)
|
||||
<< "op: " << op << ", @" << i << ", ans: " << ans
|
||||
<< ", ref: " << should;
|
||||
<< ", ref: " << should << "|" << data_[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -318,6 +328,7 @@ class BitmapIndexTest : public testing::Test {
|
|||
DataType type_;
|
||||
size_t nb_;
|
||||
size_t cardinality_;
|
||||
bool is_mmap_ = false;
|
||||
boost::container::vector<T> data_;
|
||||
std::shared_ptr<storage::ChunkManager> chunk_manager_;
|
||||
};
|
||||
|
@ -400,4 +411,55 @@ REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV2,
|
|||
|
||||
INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_HighCardinality,
|
||||
BitmapIndexTestV2,
|
||||
BitmapType);
|
||||
|
||||
template <typename T>
|
||||
class BitmapIndexTestV3 : public BitmapIndexTest<T> {
|
||||
public:
|
||||
virtual void
|
||||
SetParam() override {
|
||||
this->nb_ = 10000;
|
||||
this->cardinality_ = 2000;
|
||||
this->is_mmap_ = true;
|
||||
}
|
||||
|
||||
virtual ~BitmapIndexTestV3() {
|
||||
}
|
||||
};
|
||||
|
||||
TYPED_TEST_SUITE_P(BitmapIndexTestV3);
|
||||
|
||||
TYPED_TEST_P(BitmapIndexTestV3, CountFuncTest) {
|
||||
auto count = this->index_->Count();
|
||||
EXPECT_EQ(count, this->nb_);
|
||||
}
|
||||
|
||||
TYPED_TEST_P(BitmapIndexTestV3, INFuncTest) {
|
||||
this->TestInFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(BitmapIndexTestV3, NotINFuncTest) {
|
||||
this->TestNotInFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(BitmapIndexTestV3, CompareValFuncTest) {
|
||||
this->TestCompareValueFunc();
|
||||
}
|
||||
|
||||
TYPED_TEST_P(BitmapIndexTestV3, TestRangeCompareFuncTest) {
|
||||
this->TestRangeCompareFunc();
|
||||
}
|
||||
|
||||
using BitmapType =
|
||||
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;
|
||||
|
||||
REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV3,
|
||||
CountFuncTest,
|
||||
INFuncTest,
|
||||
NotINFuncTest,
|
||||
CompareValFuncTest,
|
||||
TestRangeCompareFuncTest);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_Mmap,
|
||||
BitmapIndexTestV3,
|
||||
BitmapType);
|
|
@ -27,6 +27,7 @@
|
|||
#include <boost/filesystem.hpp>
|
||||
#include "test_utils/storage_test_utils.h"
|
||||
#include "test_utils/TmpPath.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
constexpr int64_t nb = 100;
|
||||
namespace indexcgo = milvus::proto::indexcgo;
|
||||
|
@ -55,7 +56,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) {
|
|||
|
||||
auto
|
||||
GetTempFileManagerCtx(CDataType data_type) {
|
||||
auto ctx = milvus::storage::FileManagerContext();
|
||||
milvus::storage::StorageConfig storage_config;
|
||||
storage_config.storage_type = "local";
|
||||
storage_config.root_path = "/tmp/local/";
|
||||
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config);
|
||||
auto ctx = milvus::storage::FileManagerContext(chunk_manager);
|
||||
ctx.fieldDataMeta.field_schema.set_data_type(
|
||||
static_cast<milvus::proto::schema::DataType>(data_type));
|
||||
return ctx;
|
||||
|
|
|
@ -82,7 +82,9 @@ func IsDiskIndex(indexType IndexType) bool {
|
|||
}
|
||||
|
||||
func IsScalarMmapIndex(indexType IndexType) bool {
|
||||
return indexType == IndexINVERTED
|
||||
return indexType == IndexINVERTED ||
|
||||
indexType == IndexBitmap ||
|
||||
indexType == IndexHybrid
|
||||
}
|
||||
|
||||
func ValidateMmapIndexParams(indexType IndexType, indexParams map[string]string) error {
|
||||
|
@ -110,7 +112,7 @@ func ValidateOffsetCacheIndexParams(indexType IndexType, indexParams map[string]
|
|||
if err != nil {
|
||||
return fmt.Errorf("invalid %s value: %s, expected: true, false", common.IndexOffsetCacheEnabledKey, offsetCacheEnable)
|
||||
}
|
||||
if enable && IsOffsetCacheSupported(indexType) {
|
||||
if enable && !IsOffsetCacheSupported(indexType) {
|
||||
return fmt.Errorf("only bitmap index support %s now", common.IndexOffsetCacheEnabledKey)
|
||||
}
|
||||
return nil
|
||||
|
|
Loading…
Reference in New Issue