mirror of https://github.com/milvus-io/milvus.git
parent
04c36eb8dd
commit
66146223ca
|
@ -15,6 +15,7 @@
|
||||||
#include "utils/EasyAssert.h"
|
#include "utils/EasyAssert.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
inline int
|
inline int
|
||||||
|
@ -82,7 +83,7 @@ datatype_is_vector(DataType datatype) {
|
||||||
return datatype == DataType::VECTOR_BINARY || datatype == DataType::VECTOR_FLOAT;
|
return datatype == DataType::VECTOR_BINARY || datatype == DataType::VECTOR_FLOAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FieldMeta {
|
class FieldMeta {
|
||||||
public:
|
public:
|
||||||
FieldMeta(const FieldMeta&) = delete;
|
FieldMeta(const FieldMeta&) = delete;
|
||||||
FieldMeta(FieldMeta&&) = default;
|
FieldMeta(FieldMeta&&) = default;
|
||||||
|
@ -95,7 +96,7 @@ struct FieldMeta {
|
||||||
Assert(!is_vector());
|
Assert(!is_vector());
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, MetricType metric_type)
|
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, std::optional<MetricType> metric_type)
|
||||||
: name_(name), id_(id), type_(type), vector_info_(VectorInfo{dim, metric_type}) {
|
: name_(name), id_(id), type_(type), vector_info_(VectorInfo{dim, metric_type}) {
|
||||||
Assert(is_vector());
|
Assert(is_vector());
|
||||||
}
|
}
|
||||||
|
@ -113,7 +114,7 @@ struct FieldMeta {
|
||||||
return vector_info_->dim_;
|
return vector_info_->dim_;
|
||||||
}
|
}
|
||||||
|
|
||||||
MetricType
|
std::optional<MetricType>
|
||||||
get_metric_type() const {
|
get_metric_type() const {
|
||||||
Assert(is_vector());
|
Assert(is_vector());
|
||||||
Assert(vector_info_.has_value());
|
Assert(vector_info_.has_value());
|
||||||
|
@ -147,7 +148,7 @@ struct FieldMeta {
|
||||||
private:
|
private:
|
||||||
struct VectorInfo {
|
struct VectorInfo {
|
||||||
int64_t dim_;
|
int64_t dim_;
|
||||||
MetricType metric_type_;
|
std::optional<MetricType> metric_type_;
|
||||||
};
|
};
|
||||||
FieldName name_;
|
FieldName name_;
|
||||||
FieldId id_;
|
FieldId id_;
|
||||||
|
|
|
@ -34,7 +34,7 @@ class Schema {
|
||||||
|
|
||||||
// auto gen field_id for convenience
|
// auto gen field_id for convenience
|
||||||
FieldId
|
FieldId
|
||||||
AddDebugField(const std::string& name, DataType data_type, int64_t dim, MetricType metric_type) {
|
AddDebugField(const std::string& name, DataType data_type, int64_t dim, std::optional<MetricType> metric_type) {
|
||||||
static int64_t debug_id = 2001;
|
static int64_t debug_id = 2001;
|
||||||
auto field_id = FieldId(debug_id);
|
auto field_id = FieldId(debug_id);
|
||||||
debug_id += 2;
|
debug_id += 2;
|
||||||
|
|
|
@ -70,35 +70,40 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||||
// std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
|
// std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
|
||||||
SubQueryResult final_qr(num_queries, topK, metric_type);
|
SubQueryResult final_qr(num_queries, topK, metric_type);
|
||||||
dataset::QueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
dataset::QueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
||||||
|
|
||||||
auto max_indexed_id = indexing_record.get_finished_ack();
|
|
||||||
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset);
|
|
||||||
auto search_conf = field_indexing.get_search_conf(topK);
|
|
||||||
|
|
||||||
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
|
|
||||||
auto size_per_chunk = field_indexing.get_size_per_chunk();
|
|
||||||
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
|
|
||||||
|
|
||||||
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
|
|
||||||
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
|
||||||
|
|
||||||
// convert chunk uid to segment uid
|
|
||||||
for (auto& x : sub_qr.mutable_labels()) {
|
|
||||||
if (x != -1) {
|
|
||||||
x += chunk_id * size_per_chunk;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final_qr.merge(sub_qr);
|
|
||||||
}
|
|
||||||
auto vec_ptr = record.get_field_data<FloatVector>(vecfield_offset);
|
auto vec_ptr = record.get_field_data<FloatVector>(vecfield_offset);
|
||||||
|
|
||||||
|
int current_chunk_id = 0;
|
||||||
|
|
||||||
|
if (indexing_record.is_in(vecfield_offset)) {
|
||||||
|
auto max_indexed_id = indexing_record.get_finished_ack();
|
||||||
|
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset);
|
||||||
|
auto search_conf = field_indexing.get_search_conf(topK);
|
||||||
|
Assert(vec_ptr->get_size_per_chunk() == field_indexing.get_size_per_chunk());
|
||||||
|
|
||||||
|
for (int chunk_id = current_chunk_id; chunk_id < max_indexed_id; ++chunk_id) {
|
||||||
|
auto size_per_chunk = field_indexing.get_size_per_chunk();
|
||||||
|
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
|
||||||
|
|
||||||
|
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
|
||||||
|
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
||||||
|
|
||||||
|
// convert chunk uid to segment uid
|
||||||
|
for (auto& x : sub_qr.mutable_labels()) {
|
||||||
|
if (x != -1) {
|
||||||
|
x += chunk_id * size_per_chunk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final_qr.merge(sub_qr);
|
||||||
|
}
|
||||||
|
current_chunk_id = max_indexed_id;
|
||||||
|
}
|
||||||
|
|
||||||
// step 4: brute force search where small indexing is unavailable
|
// step 4: brute force search where small indexing is unavailable
|
||||||
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
|
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
|
||||||
Assert(vec_size_per_chunk == field_indexing.get_size_per_chunk());
|
|
||||||
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
|
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
|
||||||
|
|
||||||
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
for (int chunk_id = current_chunk_id; chunk_id < max_chunk; ++chunk_id) {
|
||||||
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
||||||
|
|
||||||
auto element_begin = chunk_id * vec_size_per_chunk;
|
auto element_begin = chunk_id * vec_size_per_chunk;
|
||||||
|
@ -116,6 +121,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||||
}
|
}
|
||||||
final_qr.merge(sub_qr);
|
final_qr.merge(sub_qr);
|
||||||
}
|
}
|
||||||
|
current_chunk_id = max_chunk;
|
||||||
|
|
||||||
results.result_distances_ = std::move(final_qr.mutable_values());
|
results.result_distances_ = std::move(final_qr.mutable_values());
|
||||||
results.internal_seg_offsets_ = std::move(final_qr.mutable_labels());
|
results.internal_seg_offsets_ = std::move(final_qr.mutable_labels());
|
||||||
|
|
|
@ -39,21 +39,26 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vec
|
||||||
|
|
||||||
knowhere::Config
|
knowhere::Config
|
||||||
VectorFieldIndexing::get_build_conf() const {
|
VectorFieldIndexing::get_build_conf() const {
|
||||||
|
// TODO
|
||||||
|
auto type_opt = field_meta_.get_metric_type();
|
||||||
|
Assert(type_opt.has_value());
|
||||||
|
auto type_name = MetricTypeToName(type_opt.value());
|
||||||
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
||||||
{knowhere::IndexParams::nlist, 100},
|
{knowhere::IndexParams::nlist, 100},
|
||||||
{knowhere::IndexParams::nprobe, 4},
|
{knowhere::IndexParams::nprobe, 4},
|
||||||
{knowhere::Metric::TYPE, MetricTypeToName(field_meta_.get_metric_type())},
|
{knowhere::Metric::TYPE, type_name},
|
||||||
{knowhere::meta::DEVICEID, 0}};
|
{knowhere::meta::DEVICEID, 0}};
|
||||||
}
|
}
|
||||||
|
|
||||||
knowhere::Config
|
knowhere::Config
|
||||||
VectorFieldIndexing::get_search_conf(int top_K) const {
|
VectorFieldIndexing::get_search_conf(int top_K) const {
|
||||||
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
// TODO
|
||||||
{knowhere::meta::TOPK, top_K},
|
auto type_opt = field_meta_.get_metric_type();
|
||||||
{knowhere::IndexParams::nlist, 100},
|
Assert(type_opt.has_value());
|
||||||
{knowhere::IndexParams::nprobe, 4},
|
auto type_name = MetricTypeToName(type_opt.value());
|
||||||
{knowhere::Metric::TYPE, MetricTypeToName(field_meta_.get_metric_type())},
|
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()}, {knowhere::meta::TOPK, top_K},
|
||||||
{knowhere::meta::DEVICEID, 0}};
|
{knowhere::IndexParams::nlist, 100}, {knowhere::IndexParams::nprobe, 4},
|
||||||
|
{knowhere::Metric::TYPE, type_name}, {knowhere::meta::DEVICEID, 0}};
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -109,14 +109,25 @@ class IndexingRecord {
|
||||||
|
|
||||||
void
|
void
|
||||||
Initialize() {
|
Initialize() {
|
||||||
int offset = 0;
|
int offset_id = 0;
|
||||||
for (auto& field : schema_) {
|
for (const FieldMeta& field : schema_) {
|
||||||
if (field.get_data_type() != DataType::VECTOR_BINARY) {
|
auto offset = FieldOffset(offset_id);
|
||||||
field_indexings_.try_emplace(FieldOffset(offset), CreateIndex(field, size_per_chunk_));
|
++offset_id;
|
||||||
|
|
||||||
|
if (field.is_vector()) {
|
||||||
|
// TODO: skip binary small index now, reenable after config.yaml is ready
|
||||||
|
if (field.get_data_type() == DataType::VECTOR_BINARY) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// flat should be skipped
|
||||||
|
if (!field.get_metric_type().has_value()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
++offset;
|
|
||||||
|
field_indexings_.try_emplace(offset, CreateIndex(field, size_per_chunk_));
|
||||||
}
|
}
|
||||||
assert(offset == schema_.size());
|
assert(offset_id == schema_.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// concurrent, reentrant
|
// concurrent, reentrant
|
||||||
|
@ -131,7 +142,7 @@ class IndexingRecord {
|
||||||
|
|
||||||
const FieldIndexing&
|
const FieldIndexing&
|
||||||
get_field_indexing(FieldOffset field_offset) const {
|
get_field_indexing(FieldOffset field_offset) const {
|
||||||
assert(field_indexings_.count(field_offset));
|
Assert(field_indexings_.count(field_offset));
|
||||||
return *field_indexings_.at(field_offset);
|
return *field_indexings_.at(field_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,6 +153,12 @@ class IndexingRecord {
|
||||||
AssertInfo(ptr, "invalid indexing");
|
AssertInfo(ptr, "invalid indexing");
|
||||||
return *ptr;
|
return *ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
is_in(FieldOffset field_offset) const {
|
||||||
|
return field_indexings_.count(field_offset);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
auto
|
auto
|
||||||
get_scalar_field_indexing(FieldOffset field_offset) const -> const ScalarFieldIndexing<T>& {
|
get_scalar_field_indexing(FieldOffset field_offset) const -> const ScalarFieldIndexing<T>& {
|
||||||
|
|
|
@ -178,7 +178,8 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
|
||||||
query::dataset::QueryDataset dataset;
|
query::dataset::QueryDataset dataset;
|
||||||
dataset.query_data = query_data;
|
dataset.query_data = query_data;
|
||||||
dataset.num_queries = query_count;
|
dataset.num_queries = query_count;
|
||||||
dataset.metric_type = field_meta.get_metric_type();
|
// if(field_meta.is)
|
||||||
|
dataset.metric_type = query_info.metric_type_;
|
||||||
dataset.topk = query_info.topK_;
|
dataset.topk = query_info.topK_;
|
||||||
dataset.dim = field_meta.get_dim();
|
dataset.dim = field_meta.get_dim();
|
||||||
|
|
||||||
|
|
|
@ -29,17 +29,8 @@ FetchContent_Declare(
|
||||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/googletest-build
|
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/googletest-build
|
||||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
|
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
|
||||||
|
|
||||||
FetchContent_GetProperties( googletest )
|
FetchContent_MakeAvailable(googletest)
|
||||||
if ( NOT googletest_POPULATED )
|
# include(GoogleTest)
|
||||||
|
|
||||||
FetchContent_Populate( googletest )
|
|
||||||
|
|
||||||
# Adding the following targets:
|
|
||||||
# gtest, gtest_main, gmock, gmock_main
|
|
||||||
add_subdirectory( ${googletest_SOURCE_DIR}
|
|
||||||
${googletest_BINARY_DIR}
|
|
||||||
EXCLUDE_FROM_ALL )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# ****************************************************************
|
# ****************************************************************
|
||||||
# Create ALIAS Target
|
# Create ALIAS Target
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
enable_testing()
|
|
||||||
|
|
||||||
include_directories(${CMAKE_HOME_DIRECTORY}/src)
|
include_directories(${CMAKE_HOME_DIRECTORY}/src)
|
||||||
include_directories(${CMAKE_HOME_DIRECTORY}/src/index/knowhere)
|
include_directories(${CMAKE_HOME_DIRECTORY}/src/index/knowhere)
|
||||||
set(MILVUS_TEST_FILES
|
set(MILVUS_TEST_FILES
|
||||||
|
|
|
@ -411,6 +411,50 @@ TEST(Query, ExecEmpty) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Query, ExecWithoutPredicateFlat) {
|
||||||
|
using namespace milvus::query;
|
||||||
|
using namespace milvus::segcore;
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, std::nullopt);
|
||||||
|
schema->AddDebugField("age", DataType::FLOAT);
|
||||||
|
std::string dsl = R"({
|
||||||
|
"bool": {
|
||||||
|
"must": [
|
||||||
|
{
|
||||||
|
"vector": {
|
||||||
|
"fakevec": {
|
||||||
|
"metric_type": "L2",
|
||||||
|
"params": {
|
||||||
|
"nprobe": 10
|
||||||
|
},
|
||||||
|
"query": "$0",
|
||||||
|
"topk": 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
})";
|
||||||
|
auto plan = CreatePlan(*schema, dsl);
|
||||||
|
int64_t N = 1000 * 1000;
|
||||||
|
auto dataset = DataGen(schema, N);
|
||||||
|
auto segment = CreateGrowingSegment(schema);
|
||||||
|
segment->PreInsert(N);
|
||||||
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||||
|
|
||||||
|
auto num_queries = 5;
|
||||||
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||||
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||||
|
QueryResult qr;
|
||||||
|
Timestamp time = 1000000;
|
||||||
|
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||||
|
qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1);
|
||||||
|
std::vector<std::vector<std::string>> results;
|
||||||
|
int topk = 5;
|
||||||
|
auto json = QueryResultToJson(qr);
|
||||||
|
std::cout << json.dump(2);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(Query, ExecWithoutPredicate) {
|
TEST(Query, ExecWithoutPredicate) {
|
||||||
using namespace milvus::query;
|
using namespace milvus::query;
|
||||||
using namespace milvus::segcore;
|
using namespace milvus::segcore;
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
// #include "utils/Json.h"
|
// #include "utils/Json.h"
|
||||||
#include "test_utils/DataGen.h"
|
#include "test_utils/DataGen.h"
|
||||||
#include <random>
|
#include <random>
|
||||||
|
#include <optional>
|
||||||
using std::cin;
|
using std::cin;
|
||||||
using std::cout;
|
using std::cout;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
|
|
Loading…
Reference in New Issue