mirror of https://github.com/milvus-io/milvus.git
Structured index (#2418)
* Support Structured Index Based on Sort Signed-off-by: cmli <chengming.li@zilliz.com> * fix lint errors and make clang-format Signed-off-by: cmli <chengming.li@zilliz.com> * pick some advices from laojin Signed-off-by: cmli <chengming.li@zilliz.com> * make clang-format Signed-off-by: cmli <chengming.li@zilliz.com> * rename n_ to size_, replace self-implemented lower_bound and upper_bound by std implemention as laojin requires Signed-off-by: cmli <chengming.li@zilliz.com> * fix lint error Signed-off-by: cmli <chengming.li@zilliz.com> * update implementation by shengjun's advice Signed-off-by: cmli <chengming.li@zilliz.com> * remove index member size_ Signed-off-by: cmli <chengming.li@zilliz.com> Co-authored-by: cmli <chengming.li@zilliz.com>pull/2435/head^2
parent
9aa26d4e9c
commit
098f5a534d
|
@ -56,6 +56,7 @@ Please mark all change in change log and use the issue from GitHub
|
|||
- \#2240 Obtain running rpc requests information
|
||||
- \#2268 Intelligently detect openblas library in system to avoid installing from source code every time
|
||||
- \#2283 Suspend the building tasks when any query comand arrives.
|
||||
- \#2417 Support Structured Index Based on Sort
|
||||
|
||||
## Improvement
|
||||
- \#221 Refactor LOG macro
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "faiss/utils/ConcurrentBitset.h"
|
||||
#include "knowhere/index/Index.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
enum OperatorType { LT = 0, LE, GT, GE };
|
||||
|
||||
template <typename T>
|
||||
class StructuredIndex : public Index {
|
||||
public:
|
||||
virtual void
|
||||
Build(const size_t n, const T* values) = 0;
|
||||
|
||||
virtual const faiss::ConcurrentBitsetPtr
|
||||
In(const size_t n, const T* values) = 0;
|
||||
|
||||
virtual const faiss::ConcurrentBitsetPtr
|
||||
NotIn(const size_t n, const T* values) = 0;
|
||||
|
||||
virtual const faiss::ConcurrentBitsetPtr
|
||||
Range(const T value, const OperatorType op) = 0;
|
||||
|
||||
virtual const faiss::ConcurrentBitsetPtr
|
||||
Range(const T lower_bound_value, bool lb_inclusive, const T upper_bound_value, bool ub_inclusive) = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using StructuredIndexPtr = std::shared_ptr<StructuredIndex<T>>;
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
|
@ -0,0 +1,199 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <src/index/knowhere/knowhere/common/Log.h>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "knowhere/index/structured_index/StructuredIndexSort.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
template <typename T>
|
||||
StructuredIndexSort<T>::StructuredIndexSort() : is_built_(false), data_(nullptr) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
StructuredIndexSort<T>::StructuredIndexSort(const size_t n, const T* values) : is_built_(false) {
|
||||
Build(n, values);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
StructuredIndexSort<T>::~StructuredIndexSort() {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
StructuredIndexSort<T>::Build(const size_t n, const T* values) {
|
||||
data_.reserve(n);
|
||||
T* p = const_cast<T*>(values);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
data_.emplace_back(IndexStructure(*p++, i));
|
||||
}
|
||||
build();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
StructuredIndexSort<T>::build() {
|
||||
if (is_built_)
|
||||
return;
|
||||
if (data_.size() == 0) {
|
||||
// todo: throw an exception
|
||||
KNOWHERE_THROW_MSG("StructuredIndexSort cannot build null values!");
|
||||
}
|
||||
std::sort(data_.begin(), data_.end());
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
StructuredIndexSort<T>::Serialize(const milvus::knowhere::Config& config) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
|
||||
auto index_data_size = data_.size() * sizeof(IndexStructure<T>);
|
||||
std::shared_ptr<uint8_t[]> index_data(new uint8_t[index_data_size]);
|
||||
memcpy(index_data.get(), data_.data(), index_data_size);
|
||||
|
||||
std::shared_ptr<uint8_t[]> index_length(new uint8_t[sizeof(size_t)]);
|
||||
auto index_size = data_.size();
|
||||
memcpy(index_length.get(), &index_size, sizeof(size_t));
|
||||
|
||||
BinarySet res_set;
|
||||
res_set.Append("index_data", index_data, index_data_size);
|
||||
res_set.Append("index_length", index_length, sizeof(size_t));
|
||||
return res_set;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
StructuredIndexSort<T>::Load(const milvus::knowhere::BinarySet& index_binary) {
|
||||
try {
|
||||
size_t index_size;
|
||||
auto index_length = index_binary.GetByName("index_length");
|
||||
memcpy(&index_size, index_length->data.get(), (size_t)index_length->size);
|
||||
|
||||
auto index_data = index_binary.GetByName("index_data");
|
||||
data_.resize(index_size);
|
||||
memcpy(data_.data(), index_data->data.get(), (size_t)index_data->size);
|
||||
is_built_ = true;
|
||||
} catch (...) {
|
||||
KNOHWERE_ERROR_MSG("StructuredIndexSort Load failed!");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
StructuredIndexSort<T>::In(const size_t n, const T* values) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(data_.size());
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto lb = std::lower_bound(data_.begin(), data_.end(), IndexStructure<T>(*(values + i)));
|
||||
auto ub = std::upper_bound(data_.begin(), data_.end(), IndexStructure<T>(*(values + i)));
|
||||
for (; lb < ub; ++lb) {
|
||||
if (lb->a_ != *(values + i)) {
|
||||
LOG_KNOWHERE_ERROR_ << "error happens in StructuredIndexSort<T>::In, experted value is: "
|
||||
<< *(values + i) << ", but real value is: " << lb->a_;
|
||||
}
|
||||
bitset->set(lb->idx_);
|
||||
}
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
StructuredIndexSort<T>::NotIn(const size_t n, const T* values) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(data_.size(), 0xff);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto lb = std::lower_bound(data_.begin(), data_.end(), IndexStructure<T>(*(values + i)));
|
||||
auto ub = std::upper_bound(data_.begin(), data_.end(), IndexStructure<T>(*(values + i)));
|
||||
for (; lb < ub; ++lb) {
|
||||
if (lb->a_ != *(values + i)) {
|
||||
LOG_KNOWHERE_ERROR_ << "error happens in StructuredIndexSort<T>::NotIn, experted value is: "
|
||||
<< *(values + i) << ", but real value is: " << lb->a_;
|
||||
}
|
||||
bitset->clear(lb->idx_);
|
||||
}
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
StructuredIndexSort<T>::Range(const T value, const OperatorType op) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(data_.size());
|
||||
auto lb = data_.begin();
|
||||
auto ub = data_.end();
|
||||
switch (op) {
|
||||
case OperatorType::LT:
|
||||
ub = std::lower_bound(data_.begin(), data_.end(), IndexStructure<T>(value));
|
||||
break;
|
||||
case OperatorType::LE:
|
||||
ub = std::upper_bound(data_.begin(), data_.end(), IndexStructure<T>(value));
|
||||
break;
|
||||
case OperatorType::GT:
|
||||
lb = std::upper_bound(data_.begin(), data_.end(), IndexStructure<T>(value));
|
||||
break;
|
||||
case OperatorType::GE:
|
||||
lb = std::lower_bound(data_.begin(), data_.end(), IndexStructure<T>(value));
|
||||
break;
|
||||
default:
|
||||
KNOWHERE_THROW_MSG("Invalid OperatorType:" + std::to_string((int)op) + "!");
|
||||
}
|
||||
for (; lb < ub; ++lb) {
|
||||
bitset->set(lb->idx_);
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
StructuredIndexSort<T>::Range(T lower_bound_value, bool lb_inclusive, T upper_bound_value, bool ub_inclusive) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(data_.size());
|
||||
if (lower_bound_value > upper_bound_value) {
|
||||
std::swap(lower_bound_value, upper_bound_value);
|
||||
std::swap(lb_inclusive, ub_inclusive);
|
||||
}
|
||||
auto lb = data_.begin();
|
||||
auto ub = data_.end();
|
||||
if (lb_inclusive) {
|
||||
lb = std::lower_bound(data_.begin(), data_.end(), IndexStructure<T>(lower_bound_value));
|
||||
} else {
|
||||
lb = std::upper_bound(data_.begin(), data_.end(), IndexStructure<T>(lower_bound_value));
|
||||
}
|
||||
if (ub_inclusive) {
|
||||
ub = std::upper_bound(data_.begin(), data_.end(), IndexStructure<T>(upper_bound_value));
|
||||
} else {
|
||||
ub = std::lower_bound(data_.begin(), data_.end(), IndexStructure<T>(upper_bound_value));
|
||||
}
|
||||
for (; lb < ub; ++lb) {
|
||||
bitset->set(lb->idx_);
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
|
@ -0,0 +1,100 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/index/structured_index/StructuredIndex.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
template <typename T>
|
||||
struct IndexStructure {
|
||||
IndexStructure() : a_(0), idx_(0) {
|
||||
}
|
||||
explicit IndexStructure(const T a) : a_(a), idx_(0) {
|
||||
}
|
||||
IndexStructure(const T a, const size_t idx) : a_(a), idx_(idx) {
|
||||
}
|
||||
bool
|
||||
operator<(const IndexStructure& b) const {
|
||||
return a_ < b.a_;
|
||||
}
|
||||
bool
|
||||
operator==(const IndexStructure& b) const {
|
||||
return a_ == b.a_;
|
||||
}
|
||||
T a_;
|
||||
size_t idx_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class StructuredIndexSort : public StructuredIndex<T> {
|
||||
public:
|
||||
StructuredIndexSort();
|
||||
StructuredIndexSort(const size_t n, const T* values);
|
||||
~StructuredIndexSort();
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config = Config()) override;
|
||||
|
||||
void
|
||||
Load(const BinarySet& index_binary) override;
|
||||
|
||||
void
|
||||
Build(const size_t n, const T* values) override;
|
||||
|
||||
void
|
||||
build();
|
||||
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
In(const size_t n, const T* values) override;
|
||||
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
NotIn(const size_t n, const T* values) override;
|
||||
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
Range(const T value, const OperatorType op) override;
|
||||
|
||||
const faiss::ConcurrentBitsetPtr
|
||||
Range(T lower_bound_value, bool lb_inclusive, T upper_bound_value, bool ub_inclusive) override;
|
||||
|
||||
const std::vector<IndexStructure<T>>&
|
||||
GetData() {
|
||||
return data_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size() override {
|
||||
return (int64_t)data_.size();
|
||||
}
|
||||
|
||||
bool
|
||||
IsBuilt() const {
|
||||
return is_built_;
|
||||
}
|
||||
|
||||
private:
|
||||
bool is_built_;
|
||||
std::vector<IndexStructure<T>> data_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using StructuredIndexSortPtr = std::shared_ptr<StructuredIndexSort<T>>;
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
||||
|
||||
#include "knowhere/index/structured_index/StructuredIndexSort-inl.h"
|
|
@ -15,11 +15,15 @@
|
|||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <cstring>
|
||||
#include "ConcurrentBitset.h"
|
||||
|
||||
namespace faiss {
|
||||
|
||||
ConcurrentBitset::ConcurrentBitset(id_type_t capacity) : capacity_(capacity), bitset_((capacity + 8 - 1) >> 3) {
|
||||
ConcurrentBitset::ConcurrentBitset(id_type_t capacity, uint8_t init_value) : capacity_(capacity), bitset_(((capacity + 8 - 1) >> 3)) {
|
||||
if (init_value) {
|
||||
memset(mutable_data(), init_value, (capacity + 8 - 1) >> 3);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::atomic<uint8_t>>&
|
||||
|
@ -202,4 +206,8 @@ ConcurrentBitset::data() {
|
|||
return reinterpret_cast<const uint8_t*>(bitset_.data());
|
||||
}
|
||||
|
||||
uint8_t*
|
||||
ConcurrentBitset::mutable_data() {
|
||||
return reinterpret_cast<uint8_t*>(bitset_.data());
|
||||
}
|
||||
} // namespace faiss
|
||||
|
|
|
@ -27,7 +27,7 @@ class ConcurrentBitset {
|
|||
public:
|
||||
using id_type_t = int64_t;
|
||||
|
||||
explicit ConcurrentBitset(id_type_t size);
|
||||
explicit ConcurrentBitset(id_type_t size, uint8_t init_value = 0);
|
||||
|
||||
// ConcurrentBitset(const ConcurrentBitset&) = delete;
|
||||
// ConcurrentBitset&
|
||||
|
@ -69,6 +69,9 @@ class ConcurrentBitset {
|
|||
const uint8_t*
|
||||
data();
|
||||
|
||||
uint8_t*
|
||||
mutable_data();
|
||||
|
||||
private:
|
||||
size_t capacity_;
|
||||
std::vector<std::atomic<uint8_t>> bitset_;
|
||||
|
|
|
@ -198,6 +198,16 @@ endif ()
|
|||
target_link_libraries(test_annoy ${depend_libs} ${unittest_libs} ${basic_libs})
|
||||
install(TARGETS test_annoy DESTINATION unittest)
|
||||
|
||||
################################################################################
|
||||
#<STRUCTURED-INDEX-SORT-TEST>
|
||||
set(structured_index_sort_srcs
|
||||
${INDEX_SOURCE_DIR}/knowhere/knowhere/index/structured_index/StructuredIndexSort-inl.h
|
||||
)
|
||||
if (NOT TARGET test_structured_index_sort)
|
||||
add_executable(test_structured_index_sort test_structured_index_sort.cpp ${structured_index_sort_srcs} ${util_srcs})
|
||||
endif ()
|
||||
target_link_libraries(test_structured_index_sort ${depend_libs} ${unittest_libs} ${basic_libs})
|
||||
install(TARGETS test_structured_index_sort DESTINATION unittest)
|
||||
|
||||
#add_subdirectory(faiss_ori)
|
||||
#add_subdirectory(faiss_benchmark)
|
||||
|
|
|
@ -0,0 +1,247 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "knowhere/index/structured_index/StructuredIndexSort.h"
|
||||
|
||||
#include "unittest/utils.h"
|
||||
|
||||
void
|
||||
gen_rand_data(int range, int n, int*& p) {
|
||||
srand((unsigned int)time(nullptr));
|
||||
p = (int*)malloc(n * sizeof(int));
|
||||
int* q = p;
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
*q++ = (int)random() % range;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(STRUCTUREDINDEXSORT_TEST, test_build) {
|
||||
int range = 100, n = 1000, *p = nullptr;
|
||||
gen_rand_data(range, n, p);
|
||||
|
||||
milvus::knowhere::StructuredIndexSort<int> structuredIndexSort((size_t)n, p); // Build default
|
||||
std::sort(p, p + n);
|
||||
const std::vector<milvus::knowhere::IndexStructure<int>> index_data = structuredIndexSort.GetData();
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
ASSERT_EQ(*(p + i), index_data[i].a_);
|
||||
}
|
||||
free(p);
|
||||
}
|
||||
|
||||
TEST(STRUCTUREDINDEXSORT_TEST, test_serialize_and_load) {
|
||||
auto serialize = [](const std::string& filename, milvus::knowhere::BinaryPtr& bin, uint8_t* ret) {
|
||||
{
|
||||
// write and flush
|
||||
FileIOWriter writer(filename);
|
||||
writer(static_cast<void*>(bin->data.get()), bin->size);
|
||||
}
|
||||
|
||||
FileIOReader reader(filename);
|
||||
reader(ret, bin->size);
|
||||
};
|
||||
|
||||
int range = 100, n = 1000, *p = nullptr;
|
||||
gen_rand_data(range, n, p);
|
||||
|
||||
milvus::knowhere::StructuredIndexSort<int> structuredIndexSort((size_t)n, p); // Build default
|
||||
auto binaryset = structuredIndexSort.Serialize();
|
||||
|
||||
auto bin_data = binaryset.GetByName("index_data");
|
||||
std::string data_file = "/tmp/sort_test_data_serialize.bin";
|
||||
auto load_data = new uint8_t[bin_data->size];
|
||||
serialize(data_file, bin_data, load_data);
|
||||
|
||||
auto bin_length = binaryset.GetByName("index_length");
|
||||
std::string length_file = "/tmp/sort_test_length_serialize.bin";
|
||||
auto load_length = new uint8_t[bin_length->size];
|
||||
serialize(length_file, bin_length, load_length);
|
||||
|
||||
binaryset.clear();
|
||||
std::shared_ptr<uint8_t[]> index_data(load_data);
|
||||
binaryset.Append("index_data", index_data, bin_data->size);
|
||||
|
||||
std::shared_ptr<uint8_t[]> length_data(load_length);
|
||||
binaryset.Append("index_length", length_data, bin_length->size);
|
||||
|
||||
structuredIndexSort.Load(binaryset);
|
||||
EXPECT_EQ(n, (int)structuredIndexSort.Size());
|
||||
EXPECT_EQ(true, structuredIndexSort.IsBuilt());
|
||||
std::sort(p, p + n);
|
||||
const std::vector<milvus::knowhere::IndexStructure<int>> const_index_data = structuredIndexSort.GetData();
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
ASSERT_EQ(*(p + i), const_index_data[i].a_);
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
TEST(STRUCTUREDINDEXSORT_TEST, test_in) {
|
||||
int range = 1000, n = 1000, *p = nullptr;
|
||||
gen_rand_data(range, n, p);
|
||||
milvus::knowhere::StructuredIndexSort<int> structuredIndexSort((size_t)n, p); // Build default
|
||||
|
||||
size_t test_times = 10;
|
||||
std::vector<int> test_vals, test_off;
|
||||
test_vals.reserve(test_times);
|
||||
test_off.reserve(test_times);
|
||||
// std::cout << "STRUCTUREDINDEXSORT_TEST test_in" << std::endl;
|
||||
for (auto i = 0; i < test_times; ++i) {
|
||||
auto off = random() % n;
|
||||
test_vals.emplace_back(*(p + off));
|
||||
test_off.emplace_back(off);
|
||||
// std::cout << "val: " << *(p + off) << ", off: " << off << std::endl;
|
||||
}
|
||||
auto res = structuredIndexSort.In(test_times, test_vals.data());
|
||||
for (auto i = 0; i < test_times; ++i) {
|
||||
// std::cout << test_off[i] << " ";
|
||||
ASSERT_EQ(true, res->test(test_off[i]));
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
TEST(STRUCTUREDINDEXSORT_TEST, test_not_in) {
|
||||
int range = 10000, n = 1000, *p = nullptr;
|
||||
gen_rand_data(range, n, p);
|
||||
milvus::knowhere::StructuredIndexSort<int> structuredIndexSort((size_t)n, p); // Build default
|
||||
|
||||
size_t test_times = 10;
|
||||
std::vector<int> test_vals, test_off;
|
||||
test_vals.reserve(test_times);
|
||||
test_off.reserve(test_times);
|
||||
// std::cout << "STRUCTUREDINDEXSORT_TEST test_notin" << std::endl;
|
||||
for (auto i = 0; i < test_times; ++i) {
|
||||
auto off = random() % n;
|
||||
test_vals.emplace_back(*(p + off));
|
||||
test_off.emplace_back(off);
|
||||
// std::cout << off << " ";
|
||||
}
|
||||
// std::cout << std::endl;
|
||||
auto res = structuredIndexSort.NotIn(test_times, test_vals.data());
|
||||
// std::cout << "assert values: " << std::endl;
|
||||
for (auto i = 0; i < test_times; ++i) {
|
||||
// std::cout << test_off[i] << " ";
|
||||
ASSERT_EQ(false, res->test(test_off[i]));
|
||||
}
|
||||
// std::cout << std::endl;
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
TEST(STRUCTUREDINDEXSORT_TEST, test_single_border_range) {
|
||||
int range = 100, n = 1000, *p = nullptr;
|
||||
gen_rand_data(range, n, p);
|
||||
milvus::knowhere::StructuredIndexSort<int> structuredIndexSort((size_t)n, p); // Build default
|
||||
|
||||
srand((unsigned int)time(nullptr));
|
||||
int val;
|
||||
// test LT
|
||||
val = (int)random() % 100;
|
||||
auto lt_res = structuredIndexSort.Range(val, milvus::knowhere::OperatorType::LT);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) < val)
|
||||
ASSERT_EQ(true, lt_res->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, lt_res->test(i));
|
||||
}
|
||||
// test LE
|
||||
val = (int)random() % 100;
|
||||
auto le_res = structuredIndexSort.Range(val, milvus::knowhere::OperatorType::LE);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) <= val)
|
||||
ASSERT_EQ(true, le_res->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, le_res->test(i));
|
||||
}
|
||||
// test GE
|
||||
val = (int)random() % 100;
|
||||
auto ge_res = structuredIndexSort.Range(val, milvus::knowhere::OperatorType::GE);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) >= val)
|
||||
ASSERT_EQ(true, ge_res->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, ge_res->test(i));
|
||||
}
|
||||
// test GT
|
||||
val = (int)random() % 100;
|
||||
auto gt_res = structuredIndexSort.Range(val, milvus::knowhere::OperatorType::GT);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) > val)
|
||||
ASSERT_EQ(true, gt_res->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, gt_res->test(i));
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
TEST(STRUCTUREDINDEXSORT_TEST, test_double_border_range) {
|
||||
int range = 100, n = 1000, *p = nullptr;
|
||||
gen_rand_data(range, n, p);
|
||||
milvus::knowhere::StructuredIndexSort<int> structuredIndexSort((size_t)n, p); // Build default
|
||||
|
||||
srand((unsigned int)time(nullptr));
|
||||
int lb, ub;
|
||||
// []
|
||||
lb = (int)random() % 100;
|
||||
ub = (int)random() % 100;
|
||||
if (lb > ub)
|
||||
std::swap(lb, ub);
|
||||
auto res1 = structuredIndexSort.Range(lb, true, ub, true);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) >= lb && *(p + i) <= ub)
|
||||
ASSERT_EQ(true, res1->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, res1->test(i));
|
||||
}
|
||||
// [)
|
||||
lb = (int)random() % 100;
|
||||
ub = (int)random() % 100;
|
||||
if (lb > ub)
|
||||
std::swap(lb, ub);
|
||||
auto res2 = structuredIndexSort.Range(lb, true, ub, false);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) >= lb && *(p + i) < ub)
|
||||
ASSERT_EQ(true, res2->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, res2->test(i));
|
||||
}
|
||||
// (]
|
||||
lb = (int)random() % 100;
|
||||
ub = (int)random() % 100;
|
||||
if (lb > ub)
|
||||
std::swap(lb, ub);
|
||||
auto res3 = structuredIndexSort.Range(lb, false, ub, true);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) > lb && *(p + i) <= ub)
|
||||
ASSERT_EQ(true, res3->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, res3->test(i));
|
||||
}
|
||||
// ()
|
||||
lb = (int)random() % 100;
|
||||
ub = (int)random() % 100;
|
||||
if (lb > ub)
|
||||
std::swap(lb, ub);
|
||||
auto res4 = structuredIndexSort.Range(lb, false, ub, false);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
if (*(p + i) > lb && *(p + i) < ub)
|
||||
ASSERT_EQ(true, res4->test(i));
|
||||
else
|
||||
ASSERT_EQ(false, res4->test(i));
|
||||
}
|
||||
free(p);
|
||||
}
|
Loading…
Reference in New Issue