mirror of https://github.com/milvus-io/milvus.git
parent
04c36eb8dd
commit
66146223ca
|
@ -15,6 +15,7 @@
|
|||
#include "utils/EasyAssert.h"
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <optional>
|
||||
|
||||
namespace milvus {
|
||||
inline int
|
||||
|
@ -82,7 +83,7 @@ datatype_is_vector(DataType datatype) {
|
|||
return datatype == DataType::VECTOR_BINARY || datatype == DataType::VECTOR_FLOAT;
|
||||
}
|
||||
|
||||
struct FieldMeta {
|
||||
class FieldMeta {
|
||||
public:
|
||||
FieldMeta(const FieldMeta&) = delete;
|
||||
FieldMeta(FieldMeta&&) = default;
|
||||
|
@ -95,7 +96,7 @@ struct FieldMeta {
|
|||
Assert(!is_vector());
|
||||
}
|
||||
|
||||
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, MetricType metric_type)
|
||||
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, std::optional<MetricType> metric_type)
|
||||
: name_(name), id_(id), type_(type), vector_info_(VectorInfo{dim, metric_type}) {
|
||||
Assert(is_vector());
|
||||
}
|
||||
|
@ -113,7 +114,7 @@ struct FieldMeta {
|
|||
return vector_info_->dim_;
|
||||
}
|
||||
|
||||
MetricType
|
||||
std::optional<MetricType>
|
||||
get_metric_type() const {
|
||||
Assert(is_vector());
|
||||
Assert(vector_info_.has_value());
|
||||
|
@ -147,7 +148,7 @@ struct FieldMeta {
|
|||
private:
|
||||
struct VectorInfo {
|
||||
int64_t dim_;
|
||||
MetricType metric_type_;
|
||||
std::optional<MetricType> metric_type_;
|
||||
};
|
||||
FieldName name_;
|
||||
FieldId id_;
|
||||
|
|
|
@ -34,7 +34,7 @@ class Schema {
|
|||
|
||||
// auto gen field_id for convenience
|
||||
FieldId
|
||||
AddDebugField(const std::string& name, DataType data_type, int64_t dim, MetricType metric_type) {
|
||||
AddDebugField(const std::string& name, DataType data_type, int64_t dim, std::optional<MetricType> metric_type) {
|
||||
static int64_t debug_id = 2001;
|
||||
auto field_id = FieldId(debug_id);
|
||||
debug_id += 2;
|
||||
|
|
|
@ -70,35 +70,40 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
// std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
|
||||
SubQueryResult final_qr(num_queries, topK, metric_type);
|
||||
dataset::QueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data};
|
||||
|
||||
auto max_indexed_id = indexing_record.get_finished_ack();
|
||||
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset);
|
||||
auto search_conf = field_indexing.get_search_conf(topK);
|
||||
|
||||
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
|
||||
auto size_per_chunk = field_indexing.get_size_per_chunk();
|
||||
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
|
||||
|
||||
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
|
||||
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_labels()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * size_per_chunk;
|
||||
}
|
||||
}
|
||||
|
||||
final_qr.merge(sub_qr);
|
||||
}
|
||||
auto vec_ptr = record.get_field_data<FloatVector>(vecfield_offset);
|
||||
|
||||
int current_chunk_id = 0;
|
||||
|
||||
if (indexing_record.is_in(vecfield_offset)) {
|
||||
auto max_indexed_id = indexing_record.get_finished_ack();
|
||||
const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset);
|
||||
auto search_conf = field_indexing.get_search_conf(topK);
|
||||
Assert(vec_ptr->get_size_per_chunk() == field_indexing.get_size_per_chunk());
|
||||
|
||||
for (int chunk_id = current_chunk_id; chunk_id < max_indexed_id; ++chunk_id) {
|
||||
auto size_per_chunk = field_indexing.get_size_per_chunk();
|
||||
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
|
||||
|
||||
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
|
||||
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
for (auto& x : sub_qr.mutable_labels()) {
|
||||
if (x != -1) {
|
||||
x += chunk_id * size_per_chunk;
|
||||
}
|
||||
}
|
||||
|
||||
final_qr.merge(sub_qr);
|
||||
}
|
||||
current_chunk_id = max_indexed_id;
|
||||
}
|
||||
|
||||
// step 4: brute force search where small indexing is unavailable
|
||||
auto vec_size_per_chunk = vec_ptr->get_size_per_chunk();
|
||||
Assert(vec_size_per_chunk == field_indexing.get_size_per_chunk());
|
||||
auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk);
|
||||
|
||||
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
|
||||
for (int chunk_id = current_chunk_id; chunk_id < max_chunk; ++chunk_id) {
|
||||
auto& chunk = vec_ptr->get_chunk(chunk_id);
|
||||
|
||||
auto element_begin = chunk_id * vec_size_per_chunk;
|
||||
|
@ -116,6 +121,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
|||
}
|
||||
final_qr.merge(sub_qr);
|
||||
}
|
||||
current_chunk_id = max_chunk;
|
||||
|
||||
results.result_distances_ = std::move(final_qr.mutable_values());
|
||||
results.internal_seg_offsets_ = std::move(final_qr.mutable_labels());
|
||||
|
|
|
@ -39,21 +39,26 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vec
|
|||
|
||||
knowhere::Config
|
||||
VectorFieldIndexing::get_build_conf() const {
|
||||
// TODO
|
||||
auto type_opt = field_meta_.get_metric_type();
|
||||
Assert(type_opt.has_value());
|
||||
auto type_name = MetricTypeToName(type_opt.value());
|
||||
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
||||
{knowhere::IndexParams::nlist, 100},
|
||||
{knowhere::IndexParams::nprobe, 4},
|
||||
{knowhere::Metric::TYPE, MetricTypeToName(field_meta_.get_metric_type())},
|
||||
{knowhere::Metric::TYPE, type_name},
|
||||
{knowhere::meta::DEVICEID, 0}};
|
||||
}
|
||||
|
||||
knowhere::Config
|
||||
VectorFieldIndexing::get_search_conf(int top_K) const {
|
||||
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()},
|
||||
{knowhere::meta::TOPK, top_K},
|
||||
{knowhere::IndexParams::nlist, 100},
|
||||
{knowhere::IndexParams::nprobe, 4},
|
||||
{knowhere::Metric::TYPE, MetricTypeToName(field_meta_.get_metric_type())},
|
||||
{knowhere::meta::DEVICEID, 0}};
|
||||
// TODO
|
||||
auto type_opt = field_meta_.get_metric_type();
|
||||
Assert(type_opt.has_value());
|
||||
auto type_name = MetricTypeToName(type_opt.value());
|
||||
return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()}, {knowhere::meta::TOPK, top_K},
|
||||
{knowhere::IndexParams::nlist, 100}, {knowhere::IndexParams::nprobe, 4},
|
||||
{knowhere::Metric::TYPE, type_name}, {knowhere::meta::DEVICEID, 0}};
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -109,14 +109,25 @@ class IndexingRecord {
|
|||
|
||||
void
|
||||
Initialize() {
|
||||
int offset = 0;
|
||||
for (auto& field : schema_) {
|
||||
if (field.get_data_type() != DataType::VECTOR_BINARY) {
|
||||
field_indexings_.try_emplace(FieldOffset(offset), CreateIndex(field, size_per_chunk_));
|
||||
int offset_id = 0;
|
||||
for (const FieldMeta& field : schema_) {
|
||||
auto offset = FieldOffset(offset_id);
|
||||
++offset_id;
|
||||
|
||||
if (field.is_vector()) {
|
||||
// TODO: skip binary small index now, reenable after config.yaml is ready
|
||||
if (field.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
continue;
|
||||
}
|
||||
// flat should be skipped
|
||||
if (!field.get_metric_type().has_value()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
++offset;
|
||||
|
||||
field_indexings_.try_emplace(offset, CreateIndex(field, size_per_chunk_));
|
||||
}
|
||||
assert(offset == schema_.size());
|
||||
assert(offset_id == schema_.size());
|
||||
}
|
||||
|
||||
// concurrent, reentrant
|
||||
|
@ -131,7 +142,7 @@ class IndexingRecord {
|
|||
|
||||
const FieldIndexing&
|
||||
get_field_indexing(FieldOffset field_offset) const {
|
||||
assert(field_indexings_.count(field_offset));
|
||||
Assert(field_indexings_.count(field_offset));
|
||||
return *field_indexings_.at(field_offset);
|
||||
}
|
||||
|
||||
|
@ -142,6 +153,12 @@ class IndexingRecord {
|
|||
AssertInfo(ptr, "invalid indexing");
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
bool
|
||||
is_in(FieldOffset field_offset) const {
|
||||
return field_indexings_.count(field_offset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
get_scalar_field_indexing(FieldOffset field_offset) const -> const ScalarFieldIndexing<T>& {
|
||||
|
|
|
@ -178,7 +178,8 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
|
|||
query::dataset::QueryDataset dataset;
|
||||
dataset.query_data = query_data;
|
||||
dataset.num_queries = query_count;
|
||||
dataset.metric_type = field_meta.get_metric_type();
|
||||
// if(field_meta.is)
|
||||
dataset.metric_type = query_info.metric_type_;
|
||||
dataset.topk = query_info.topK_;
|
||||
dataset.dim = field_meta.get_dim();
|
||||
|
||||
|
|
|
@ -29,17 +29,8 @@ FetchContent_Declare(
|
|||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/googletest-build
|
||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
|
||||
|
||||
FetchContent_GetProperties( googletest )
|
||||
if ( NOT googletest_POPULATED )
|
||||
|
||||
FetchContent_Populate( googletest )
|
||||
|
||||
# Adding the following targets:
|
||||
# gtest, gtest_main, gmock, gmock_main
|
||||
add_subdirectory( ${googletest_SOURCE_DIR}
|
||||
${googletest_BINARY_DIR}
|
||||
EXCLUDE_FROM_ALL )
|
||||
endif()
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
# include(GoogleTest)
|
||||
|
||||
# ****************************************************************
|
||||
# Create ALIAS Target
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
enable_testing()
|
||||
|
||||
include_directories(${CMAKE_HOME_DIRECTORY}/src)
|
||||
include_directories(${CMAKE_HOME_DIRECTORY}/src/index/knowhere)
|
||||
set(MILVUS_TEST_FILES
|
||||
|
|
|
@ -411,6 +411,50 @@ TEST(Query, ExecEmpty) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(Query, ExecWithoutPredicateFlat) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, std::nullopt);
|
||||
schema->AddDebugField("age", DataType::FLOAT);
|
||||
std::string dsl = R"({
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"vector": {
|
||||
"fakevec": {
|
||||
"metric_type": "L2",
|
||||
"params": {
|
||||
"nprobe": 10
|
||||
},
|
||||
"query": "$0",
|
||||
"topk": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
})";
|
||||
auto plan = CreatePlan(*schema, dsl);
|
||||
int64_t N = 1000 * 1000;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
QueryResult qr;
|
||||
Timestamp time = 1000000;
|
||||
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1);
|
||||
std::vector<std::vector<std::string>> results;
|
||||
int topk = 5;
|
||||
auto json = QueryResultToJson(qr);
|
||||
std::cout << json.dump(2);
|
||||
}
|
||||
|
||||
TEST(Query, ExecWithoutPredicate) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
// #include "utils/Json.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
#include <random>
|
||||
#include <optional>
|
||||
using std::cin;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
|
|
Loading…
Reference in New Issue