enhance: support bitmap mmap (#35399)

#32900

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
pull/35808/head
zhagnlu 2024-08-27 16:34:59 +08:00 committed by GitHub
parent 3a0c61a455
commit 4d2f96c760
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 499 additions and 36 deletions

View File

@ -35,6 +35,7 @@
#include "simdjson/error.h"
#include "simdjson/padded_string.h"
#include "rapidjson/document.h"
#include "rapidjson/error/en.h"
#include "rapidjson/writer.h"
#include "rapidjson/stringbuffer.h"
@ -49,7 +50,7 @@ ExtractSubJson(const std::string& json, const std::vector<std::string>& keys) {
if (doc.HasParseError()) {
PanicInfo(ErrorCode::UnexpectedError,
"json parse failed, error:{}",
doc.GetParseError());
rapidjson::GetParseError_En(doc.GetParseError()));
}
rapidjson::Document result_doc;

View File

@ -15,10 +15,14 @@
// limitations under the License.
#include <algorithm>
#include <boost/algorithm/string.hpp>
#include <sys/errno.h>
#include <unistd.h>
#include <yaml-cpp/yaml.h>
#include "index/BitmapIndex.h"
#include "common/File.h"
#include "common/Slice.h"
#include "common/Common.h"
#include "index/Meta.h"
@ -33,8 +37,10 @@ namespace index {
template <typename T>
BitmapIndex<T>::BitmapIndex(
const storage::FileManagerContext& file_manager_context)
: is_built_(false),
schema_(file_manager_context.fieldDataMeta.field_schema) {
: ScalarIndex<T>(BITMAP_INDEX_TYPE),
is_built_(false),
schema_(file_manager_context.fieldDataMeta.field_schema),
is_mmap_(false) {
if (file_manager_context.Valid()) {
file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
@ -42,6 +48,19 @@ BitmapIndex<T>::BitmapIndex(
}
}
template <typename T>
void
BitmapIndex<T>::UnmapIndexData() {
if (mmap_data_ != nullptr && mmap_data_ != MAP_FAILED) {
if (munmap(mmap_data_, mmap_size_) != 0) {
AssertInfo(
true, "failed to unmap bitmap index, err={}", strerror(errno));
}
mmap_data_ = nullptr;
mmap_size_ = 0;
}
}
template <typename T>
void
BitmapIndex<T>::Build(const Config& config) {
@ -348,20 +367,30 @@ BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
template <typename T>
void
BitmapIndex<T>::BuildOffsetCache() {
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
data_offsets_cache_.resize(total_num_rows_);
for (auto it = data_.begin(); it != data_.end(); it++) {
for (const auto& v : it->second) {
data_offsets_cache_[v] = it;
if (is_mmap_) {
mmap_offsets_cache_.resize(total_num_rows_);
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
++it) {
for (const auto& v : AccessBitmap(it->second)) {
mmap_offsets_cache_[v] = it;
}
}
} else {
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
data_offsets_cache_.resize(total_num_rows_);
for (auto it = data_.begin(); it != data_.end(); it++) {
for (const auto& v : it->second) {
data_offsets_cache_[v] = it;
}
}
} else {
bitsets_offsets_cache_.resize(total_num_rows_);
const auto& bits = it->second;
for (int i = 0; i < bits.size(); i++) {
if (bits[i]) {
bitsets_offsets_cache_[i] = it;
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
const auto& bits = it->second;
for (int i = 0; i < bits.size(); i++) {
if (bits[i]) {
bitsets_offsets_cache_[i] = it;
}
}
}
}
@ -398,6 +427,83 @@ BitmapIndex<std::string>::DeserializeIndexData(const uint8_t* data_ptr,
}
}
template <typename T>
void
BitmapIndex<T>::DeserializeIndexDataForMmap(const char* data_ptr,
size_t index_length) {
for (size_t i = 0; i < index_length; ++i) {
T key;
memcpy(&key, data_ptr, sizeof(T));
data_ptr += sizeof(T);
roaring::Roaring value;
value = roaring::Roaring::read(reinterpret_cast<const char*>(data_ptr));
auto size = value.getSizeInBytes();
bitmap_info_map_[key] = {static_cast<size_t>(data_ptr - mmap_data_),
size};
data_ptr += size;
}
}
template <>
void
BitmapIndex<std::string>::DeserializeIndexDataForMmap(const char* data_ptr,
size_t index_length) {
for (size_t i = 0; i < index_length; ++i) {
size_t key_size;
memcpy(&key_size, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
std::string key(reinterpret_cast<const char*>(data_ptr), key_size);
data_ptr += key_size;
roaring::Roaring value;
value = roaring::Roaring::read(reinterpret_cast<const char*>(data_ptr));
auto size = value.getSizeInBytes();
bitmap_info_map_[key] = {static_cast<size_t>(data_ptr - mmap_data_),
size};
data_ptr += size;
}
}
template <typename T>
void
BitmapIndex<T>::MMapIndexData(const std::string& file_name,
const uint8_t* data_ptr,
size_t data_size,
size_t index_length) {
std::filesystem::create_directories(
std::filesystem::path(file_name).parent_path());
auto file = File::Open(file_name, O_RDWR | O_CREAT | O_TRUNC);
auto written = file.Write(data_ptr, data_size);
if (written != data_size) {
file.Close();
remove(file_name.c_str());
PanicInfo(ErrorCode::UnistdError,
fmt::format("write index to fd error: {}", strerror(errno)));
}
file.Seek(0, SEEK_SET);
mmap_data_ = static_cast<char*>(
mmap(NULL, data_size, PROT_READ, MAP_PRIVATE, file.Descriptor(), 0));
if (mmap_data_ == MAP_FAILED) {
file.Close();
remove(file_name.c_str());
PanicInfo(
ErrorCode::UnexpectedError, "failed to mmap: {}", strerror(errno));
}
mmap_size_ = data_size;
unlink(file_name.c_str());
char* ptr = mmap_data_;
DeserializeIndexDataForMmap(ptr, index_length);
is_mmap_ = true;
}
template <typename T>
void
BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
@ -413,15 +519,37 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
valid_bitset = TargetBitmap(total_num_rows_, false);
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
DeserializeIndexData(index_data_buffer->data.get(), index_length);
ChooseIndexLoadMode(index_length);
// only using mmap when build mode is raw roaring bitmap
if (config.contains(MMAP_FILE_PATH) &&
build_mode_ == BitmapIndexBuildMode::ROARING) {
auto mmap_filepath =
GetValueFromConfig<std::string>(config, MMAP_FILE_PATH);
AssertInfo(mmap_filepath.has_value(),
"mmap filepath is empty when load index");
MMapIndexData(mmap_filepath.value(),
index_data_buffer->data.get(),
index_data_buffer->size,
index_length);
} else {
DeserializeIndexData(index_data_buffer->data.get(), index_length);
}
if (enable_offset_cache.has_value() && enable_offset_cache.value()) {
BuildOffsetCache();
}
LOG_INFO("load bitmap index with cardinality = {}, num_rows = {}",
Cardinality(),
total_num_rows_);
auto file_index_meta = file_manager_->GetIndexMeta();
LOG_INFO(
"load bitmap index with cardinality = {}, num_rows = {} for segment_id "
"= {}, field_id = {}, mmap = {}",
Cardinality(),
total_num_rows_,
file_index_meta.segment_id,
file_index_meta.field_id,
is_mmap_);
is_built_ = true;
}
@ -429,6 +557,7 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
template <typename T>
void
BitmapIndex<T>::Load(milvus::tracer::TraceContext ctx, const Config& config) {
LOG_DEBUG("load bitmap index with config {}", config.dump());
auto index_files =
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
AssertInfo(index_files.has_value(),
@ -453,6 +582,18 @@ BitmapIndex<T>::In(const size_t n, const T* values) {
AssertInfo(is_built_, "index has not been built");
TargetBitmap res(total_num_rows_, false);
if (is_mmap_) {
for (size_t i = 0; i < n; ++i) {
auto val = values[i];
auto it = bitmap_info_map_.find(val);
if (it != bitmap_info_map_.end()) {
for (const auto& v : AccessBitmap(it->second)) {
res.set(v);
}
}
}
return res;
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
for (size_t i = 0; i < n; ++i) {
auto val = values[i];
@ -479,6 +620,19 @@ const TargetBitmap
BitmapIndex<T>::NotIn(const size_t n, const T* values) {
AssertInfo(is_built_, "index has not been built");
if (is_mmap_) {
TargetBitmap res(total_num_rows_, true);
for (int i = 0; i < n; ++i) {
auto val = values[i];
auto it = bitmap_info_map_.find(val);
if (it != bitmap_info_map_.end()) {
for (const auto& v : AccessBitmap(it->second)) {
res.reset(v);
}
}
}
return res;
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
TargetBitmap res(total_num_rows_, true);
for (int i = 0; i < n; ++i) {
@ -590,12 +744,76 @@ BitmapIndex<T>::RangeForBitset(const T value, const OpType op) {
template <typename T>
const TargetBitmap
BitmapIndex<T>::Range(const T value, OpType op) {
if (is_mmap_) {
return std::move(RangeForMmap(value, op));
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
return std::move(RangeForRoaring(value, op));
} else {
return std::move(RangeForBitset(value, op));
}
}
template <typename T>
TargetBitmap
BitmapIndex<T>::RangeForMmap(const T value, const OpType op) {
AssertInfo(is_built_, "index has not been built");
TargetBitmap res(total_num_rows_, false);
if (ShouldSkip(value, value, op)) {
return res;
}
auto lb = bitmap_info_map_.begin();
auto ub = bitmap_info_map_.end();
switch (op) {
case OpType::LessThan: {
ub = std::lower_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
break;
}
case OpType::LessEqual: {
ub = std::upper_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
break;
}
case OpType::GreaterThan: {
lb = std::upper_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
break;
}
case OpType::GreaterEqual: {
lb = std::lower_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
break;
}
default: {
PanicInfo(OpTypeInvalid,
fmt::format("Invalid OperatorType: {}", op));
}
}
for (; lb != ub; lb++) {
for (const auto& v : AccessBitmap(lb->second)) {
res.set(v);
}
}
return res;
}
template <typename T>
TargetBitmap
@ -721,6 +939,10 @@ BitmapIndex<T>::Range(const T lower_value,
bool lb_inclusive,
const T upper_value,
bool ub_inclusive) {
if (is_mmap_) {
return RangeForMmap(
lower_value, lb_inclusive, upper_value, ub_inclusive);
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
return RangeForRoaring(
lower_value, lb_inclusive, upper_value, ub_inclusive);
@ -730,6 +952,65 @@ BitmapIndex<T>::Range(const T lower_value,
}
}
template <typename T>
TargetBitmap
BitmapIndex<T>::RangeForMmap(const T lower_value,
bool lb_inclusive,
const T upper_value,
bool ub_inclusive) {
AssertInfo(is_built_, "index has not been built");
TargetBitmap res(total_num_rows_, false);
if (lower_value > upper_value ||
(lower_value == upper_value && !(lb_inclusive && ub_inclusive))) {
return res;
}
if (ShouldSkip(lower_value, upper_value, OpType::Range)) {
return res;
}
auto lb = bitmap_info_map_.begin();
auto ub = bitmap_info_map_.end();
if (lb_inclusive) {
lb = std::lower_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(lower_value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
} else {
lb = std::upper_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(lower_value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
}
if (ub_inclusive) {
ub = std::upper_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(upper_value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
} else {
ub = std::lower_bound(bitmap_info_map_.begin(),
bitmap_info_map_.end(),
std::make_pair(upper_value, TargetBitmap()),
[](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
}
for (; lb != ub; lb++) {
for (const auto& v : AccessBitmap(lb->second)) {
res.set(v);
}
}
return res;
}
template <typename T>
TargetBitmap
BitmapIndex<T>::RangeForRoaring(const T lower_value,
@ -792,6 +1073,11 @@ BitmapIndex<T>::RangeForRoaring(const T lower_value,
template <typename T>
T
BitmapIndex<T>::Reverse_Lookup_InCache(size_t idx) const {
if (is_mmap_) {
Assert(build_mode_ == BitmapIndexBuildMode::ROARING);
return mmap_offsets_cache_[idx]->first;
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
return data_offsets_cache_[idx]->first;
} else {
@ -809,18 +1095,29 @@ BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
return Reverse_Lookup_InCache(idx);
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
for (auto it = data_.begin(); it != data_.end(); it++) {
for (const auto& v : it->second) {
if (is_mmap_) {
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
it++) {
for (const auto& v : AccessBitmap(it->second)) {
if (v == idx) {
return it->first;
}
}
}
} else {
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
if (it->second[idx]) {
return it->first;
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
for (auto it = data_.begin(); it != data_.end(); it++) {
for (const auto& v : it->second) {
if (v == idx) {
return it->first;
}
}
}
} else {
for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) {
if (it->second[idx]) {
return it->first;
}
}
}
}
@ -873,6 +1170,15 @@ BitmapIndex<T>::ShouldSkip(const T lower_value,
return should_skip;
};
if (is_mmap_) {
if (!bitmap_info_map_.empty()) {
auto lower_bound = bitmap_info_map_.begin()->first;
auto upper_bound = bitmap_info_map_.rbegin()->first;
bool should_skip = skip(op, lower_bound, upper_bound);
return should_skip;
}
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
if (!data_.empty()) {
auto lower_bound = data_.begin()->first;
@ -906,6 +1212,19 @@ BitmapIndex<std::string>::Query(const DatasetPtr& dataset) {
if (op == OpType::PrefixMatch) {
auto prefix = dataset->Get<std::string>(PREFIX_VALUE);
TargetBitmap res(total_num_rows_, false);
if (is_mmap_) {
for (auto it = bitmap_info_map_.begin();
it != bitmap_info_map_.end();
++it) {
const auto& key = it->first;
if (milvus::query::Match(key, prefix, op)) {
for (const auto& v : AccessBitmap(it->second)) {
res.set(v);
}
}
}
return res;
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
for (auto it = data_.begin(); it != data_.end(); ++it) {
const auto& key = it->first;
@ -943,6 +1262,18 @@ BitmapIndex<std::string>::RegexQuery(const std::string& regex_pattern) {
AssertInfo(is_built_, "index has not been built");
RegexMatcher matcher(regex_pattern);
TargetBitmap res(total_num_rows_, false);
if (is_mmap_) {
for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end();
++it) {
const auto& key = it->first;
if (matcher(key)) {
for (const auto& v : AccessBitmap(it->second)) {
res.set(v);
}
}
}
return res;
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
for (auto it = data_.begin(); it != data_.end(); ++it) {
const auto& key = it->first;

View File

@ -30,6 +30,11 @@
namespace milvus {
namespace index {
struct BitmapInfo {
size_t offset_;
size_t size_;
};
enum class BitmapIndexBuildMode {
ROARING,
BITSET,
@ -46,7 +51,11 @@ class BitmapIndex : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());
~BitmapIndex() override = default;
~BitmapIndex() {
if (is_mmap_) {
UnmapIndexData();
}
}
BinarySet
Serialize(const Config& config) override;
@ -146,6 +155,10 @@ class BitmapIndex : public ScalarIndex<T> {
public:
int64_t
Cardinality() {
if (is_mmap_) {
return bitmap_info_map_.size();
}
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
return data_.size();
} else {
@ -172,6 +185,9 @@ class BitmapIndex : public ScalarIndex<T> {
std::pair<size_t, size_t>
DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size);
void
DeserializeIndexDataForMmap(const char* data_ptr, size_t index_length);
void
DeserializeIndexData(const uint8_t* data_ptr, size_t index_length);
@ -196,6 +212,9 @@ class BitmapIndex : public ScalarIndex<T> {
TargetBitmap
RangeForBitset(T value, OpType op);
TargetBitmap
RangeForMmap(T value, OpType op);
TargetBitmap
RangeForRoaring(T lower_bound_value,
bool lb_inclusive,
@ -208,12 +227,35 @@ class BitmapIndex : public ScalarIndex<T> {
T upper_bound_value,
bool ub_inclusive);
TargetBitmap
RangeForMmap(T lower_bound_value,
bool lb_inclusive,
T upper_bound_value,
bool ub_inclusive);
void
MMapIndexData(const std::string& filepath,
const uint8_t* data,
size_t data_size,
size_t index_length);
roaring::Roaring
AccessBitmap(const BitmapInfo& info) const {
return roaring::Roaring::read(mmap_data_ + info.offset_, info.size_);
}
void
UnmapIndexData();
public:
bool is_built_{false};
Config config_;
BitmapIndexBuildMode build_mode_;
std::map<T, roaring::Roaring> data_;
std::map<T, TargetBitmap> bitsets_;
bool is_mmap_{false};
char* mmap_data_;
int64_t mmap_size_;
std::map<T, BitmapInfo> bitmap_info_map_;
size_t total_num_rows_{0};
proto::schema::FieldSchema schema_;
bool use_offset_cache_{false};
@ -221,6 +263,7 @@ class BitmapIndex : public ScalarIndex<T> {
data_offsets_cache_;
std::vector<typename std::map<T, TargetBitmap>::iterator>
bitsets_offsets_cache_;
std::vector<typename std::map<T, BitmapInfo>::iterator> mmap_offsets_cache_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate

View File

@ -30,7 +30,8 @@ namespace index {
template <typename T>
HybridScalarIndex<T>::HybridScalarIndex(
const storage::FileManagerContext& file_manager_context)
: is_built_(false),
: ScalarIndex<T>(HYBRID_INDEX_TYPE),
is_built_(false),
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
file_manager_context_(file_manager_context) {
if (file_manager_context.Valid()) {

View File

@ -24,6 +24,7 @@
#include "knowhere/dataset.h"
#include "common/Tracer.h"
#include "common/Types.h"
#include "index/Meta.h"
namespace milvus::index {
@ -73,7 +74,10 @@ class IndexBase {
index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP ||
index_type_ ==
knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND;
index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND ||
// support mmap for bitmap/hybrid index
index_type_ == milvus::index::BITMAP_INDEX_TYPE ||
index_type_ == milvus::index::HYBRID_INDEX_TYPE;
}
const IndexType&

View File

@ -71,7 +71,8 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx)
: schema_(ctx.fieldDataMeta.field_schema) {
: ScalarIndex<T>(INVERTED_INDEX_TYPE),
schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
auto field =

View File

@ -35,7 +35,9 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
using DiskFileManager = storage::DiskFileManagerImpl;
using DiskFileManagerPtr = std::shared_ptr<DiskFileManager>;
InvertedIndexTantivy() = default;
InvertedIndexTantivy() : ScalarIndex<T>(INVERTED_INDEX_TYPE) {
}
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx);
~InvertedIndexTantivy();

View File

@ -60,6 +60,9 @@ ToString(ScalarIndexType type) {
template <typename T>
class ScalarIndex : public IndexBase {
public:
ScalarIndex(const std::string& index_type) : IndexBase(index_type) {
}
void
BuildWithRawData(size_t n,
const void* values,

View File

@ -36,7 +36,7 @@ namespace milvus::index {
template <typename T>
ScalarIndexSort<T>::ScalarIndexSort(
const storage::FileManagerContext& file_manager_context)
: is_built_(false), data_() {
: ScalarIndex<T>(ASCENDING_SORT), is_built_(false), data_() {
if (file_manager_context.Valid()) {
file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);

View File

@ -29,6 +29,10 @@ namespace milvus::index {
class StringIndex : public ScalarIndex<std::string> {
public:
StringIndex(const std::string& index_type)
: ScalarIndex<std::string>(index_type) {
}
const TargetBitmap
Query(const DatasetPtr& dataset) override {
auto op = dataset->Get<OpType>(OPERATOR_TYPE);

View File

@ -40,7 +40,8 @@
namespace milvus::index {
StringIndexMarisa::StringIndexMarisa(
const storage::FileManagerContext& file_manager_context) {
const storage::FileManagerContext& file_manager_context)
: StringIndex(MARISA_TRIE) {
if (file_manager_context.Valid()) {
file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);

View File

@ -31,6 +31,9 @@ namespace milvus::storage {
struct FileManagerContext {
FileManagerContext() : chunkManagerPtr(nullptr) {
}
FileManagerContext(const ChunkManagerPtr& chunkManagerPtr)
: chunkManagerPtr(chunkManagerPtr) {
}
FileManagerContext(const FieldDataMeta& fieldDataMeta,
const IndexMeta& indexMeta,
const ChunkManagerPtr& chunkManagerPtr)

View File

@ -105,7 +105,7 @@ class BitmapIndexTest : public testing::Test {
auto serialized_bytes = insert_data.Serialize(storage::Remote);
auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}",
"/tmp/test_bitmap/",
"/tmp/test-bitmap-index/",
collection_id,
partition_id,
segment_id,
@ -137,6 +137,16 @@ class BitmapIndexTest : public testing::Test {
config["index_files"] = index_files;
if (is_mmap_) {
config["enable_mmap"] = "true";
config["mmap_filepath"] = fmt::format("/{}/{}/{}/{}/{}",
"/tmp/test-bitmap-index/",
collection_id,
1,
segment_id,
field_id);
;
}
index_ =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index_->Load(milvus::tracer::TraceContext{}, config);
@ -247,7 +257,7 @@ class BitmapIndexTest : public testing::Test {
auto should = ref(i);
ASSERT_EQ(ans, should)
<< "op: " << op << ", @" << i << ", ans: " << ans
<< ", ref: " << should;
<< ", ref: " << should << "|" << data_[i];
}
}
}
@ -318,6 +328,7 @@ class BitmapIndexTest : public testing::Test {
DataType type_;
size_t nb_;
size_t cardinality_;
bool is_mmap_ = false;
boost::container::vector<T> data_;
std::shared_ptr<storage::ChunkManager> chunk_manager_;
};
@ -400,4 +411,55 @@ REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV2,
INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_HighCardinality,
BitmapIndexTestV2,
BitmapType);
template <typename T>
class BitmapIndexTestV3 : public BitmapIndexTest<T> {
public:
virtual void
SetParam() override {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->is_mmap_ = true;
}
virtual ~BitmapIndexTestV3() {
}
};
TYPED_TEST_SUITE_P(BitmapIndexTestV3);
TYPED_TEST_P(BitmapIndexTestV3, CountFuncTest) {
auto count = this->index_->Count();
EXPECT_EQ(count, this->nb_);
}
TYPED_TEST_P(BitmapIndexTestV3, INFuncTest) {
this->TestInFunc();
}
TYPED_TEST_P(BitmapIndexTestV3, NotINFuncTest) {
this->TestNotInFunc();
}
TYPED_TEST_P(BitmapIndexTestV3, CompareValFuncTest) {
this->TestCompareValueFunc();
}
TYPED_TEST_P(BitmapIndexTestV3, TestRangeCompareFuncTest) {
this->TestRangeCompareFunc();
}
using BitmapType =
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;
REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV3,
CountFuncTest,
INFuncTest,
NotINFuncTest,
CompareValFuncTest,
TestRangeCompareFuncTest);
INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_Mmap,
BitmapIndexTestV3,
BitmapType);

View File

@ -27,6 +27,7 @@
#include <boost/filesystem.hpp>
#include "test_utils/storage_test_utils.h"
#include "test_utils/TmpPath.h"
#include "storage/Util.h"
constexpr int64_t nb = 100;
namespace indexcgo = milvus::proto::indexcgo;
@ -55,7 +56,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) {
auto
GetTempFileManagerCtx(CDataType data_type) {
auto ctx = milvus::storage::FileManagerContext();
milvus::storage::StorageConfig storage_config;
storage_config.storage_type = "local";
storage_config.root_path = "/tmp/local/";
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config);
auto ctx = milvus::storage::FileManagerContext(chunk_manager);
ctx.fieldDataMeta.field_schema.set_data_type(
static_cast<milvus::proto::schema::DataType>(data_type));
return ctx;

View File

@ -82,7 +82,9 @@ func IsDiskIndex(indexType IndexType) bool {
}
func IsScalarMmapIndex(indexType IndexType) bool {
return indexType == IndexINVERTED
return indexType == IndexINVERTED ||
indexType == IndexBitmap ||
indexType == IndexHybrid
}
func ValidateMmapIndexParams(indexType IndexType, indexParams map[string]string) error {
@ -110,7 +112,7 @@ func ValidateOffsetCacheIndexParams(indexType IndexType, indexParams map[string]
if err != nil {
return fmt.Errorf("invalid %s value: %s, expected: true, false", common.IndexOffsetCacheEnabledKey, offsetCacheEnable)
}
if enable && IsOffsetCacheSupported(indexType) {
if enable && !IsOffsetCacheSupported(indexType) {
return fmt.Errorf("only bitmap index support %s now", common.IndexOffsetCacheEnabledKey)
}
return nil