diff --git a/internal/core/src/common/FieldMeta.h b/internal/core/src/common/FieldMeta.h index b9bd2102a5..fa997cf4c0 100644 --- a/internal/core/src/common/FieldMeta.h +++ b/internal/core/src/common/FieldMeta.h @@ -15,6 +15,7 @@ #include "utils/EasyAssert.h" #include #include +#include namespace milvus { inline int @@ -82,7 +83,7 @@ datatype_is_vector(DataType datatype) { return datatype == DataType::VECTOR_BINARY || datatype == DataType::VECTOR_FLOAT; } -struct FieldMeta { +class FieldMeta { public: FieldMeta(const FieldMeta&) = delete; FieldMeta(FieldMeta&&) = default; @@ -95,7 +96,7 @@ struct FieldMeta { Assert(!is_vector()); } - FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, MetricType metric_type) + FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, std::optional metric_type) : name_(name), id_(id), type_(type), vector_info_(VectorInfo{dim, metric_type}) { Assert(is_vector()); } @@ -113,7 +114,7 @@ struct FieldMeta { return vector_info_->dim_; } - MetricType + std::optional get_metric_type() const { Assert(is_vector()); Assert(vector_info_.has_value()); @@ -147,7 +148,7 @@ struct FieldMeta { private: struct VectorInfo { int64_t dim_; - MetricType metric_type_; + std::optional metric_type_; }; FieldName name_; FieldId id_; diff --git a/internal/core/src/common/Schema.h b/internal/core/src/common/Schema.h index 2a23e0d428..cadf62e8cb 100644 --- a/internal/core/src/common/Schema.h +++ b/internal/core/src/common/Schema.h @@ -34,7 +34,7 @@ class Schema { // auto gen field_id for convenience FieldId - AddDebugField(const std::string& name, DataType data_type, int64_t dim, MetricType metric_type) { + AddDebugField(const std::string& name, DataType data_type, int64_t dim, std::optional metric_type) { static int64_t debug_id = 2001; auto field_id = FieldId(debug_id); debug_id += 2; diff --git a/internal/core/src/query/SearchOnGrowing.cpp b/internal/core/src/query/SearchOnGrowing.cpp index 0e15fc107b..1e6f1c7252 100644 --- a/internal/core/src/query/SearchOnGrowing.cpp +++ b/internal/core/src/query/SearchOnGrowing.cpp @@ -70,35 +70,40 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, // std::vector final_dis(total_count, std::numeric_limits::max()); SubQueryResult final_qr(num_queries, topK, metric_type); dataset::QueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data}; - - auto max_indexed_id = indexing_record.get_finished_ack(); - const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset); - auto search_conf = field_indexing.get_search_conf(topK); - - for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) { - auto size_per_chunk = field_indexing.get_size_per_chunk(); - auto indexing = field_indexing.get_chunk_indexing(chunk_id); - - auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk); - auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view); - - // convert chunk uid to segment uid - for (auto& x : sub_qr.mutable_labels()) { - if (x != -1) { - x += chunk_id * size_per_chunk; - } - } - - final_qr.merge(sub_qr); - } auto vec_ptr = record.get_field_data(vecfield_offset); + int current_chunk_id = 0; + + if (indexing_record.is_in(vecfield_offset)) { + auto max_indexed_id = indexing_record.get_finished_ack(); + const auto& field_indexing = indexing_record.get_vec_field_indexing(vecfield_offset); + auto search_conf = field_indexing.get_search_conf(topK); + Assert(vec_ptr->get_size_per_chunk() == field_indexing.get_size_per_chunk()); + + for (int chunk_id = current_chunk_id; chunk_id < max_indexed_id; ++chunk_id) { + auto size_per_chunk = field_indexing.get_size_per_chunk(); + auto indexing = field_indexing.get_chunk_indexing(chunk_id); + + auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk); + auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view); + + // convert chunk uid to segment uid + for (auto& x : sub_qr.mutable_labels()) { + if (x != -1) { + x += chunk_id * size_per_chunk; + } + } + + final_qr.merge(sub_qr); + } + current_chunk_id = max_indexed_id; + } + // step 4: brute force search where small indexing is unavailable auto vec_size_per_chunk = vec_ptr->get_size_per_chunk(); - Assert(vec_size_per_chunk == field_indexing.get_size_per_chunk()); auto max_chunk = upper_div(ins_barrier, vec_size_per_chunk); - for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) { + for (int chunk_id = current_chunk_id; chunk_id < max_chunk; ++chunk_id) { auto& chunk = vec_ptr->get_chunk(chunk_id); auto element_begin = chunk_id * vec_size_per_chunk; @@ -116,6 +121,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, } final_qr.merge(sub_qr); } + current_chunk_id = max_chunk; results.result_distances_ = std::move(final_qr.mutable_values()); results.internal_seg_offsets_ = std::move(final_qr.mutable_labels()); diff --git a/internal/core/src/segcore/FieldIndexing.cpp b/internal/core/src/segcore/FieldIndexing.cpp index 1309e2eb96..57c7654c85 100644 --- a/internal/core/src/segcore/FieldIndexing.cpp +++ b/internal/core/src/segcore/FieldIndexing.cpp @@ -39,21 +39,26 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vec knowhere::Config VectorFieldIndexing::get_build_conf() const { + // TODO + auto type_opt = field_meta_.get_metric_type(); + Assert(type_opt.has_value()); + auto type_name = MetricTypeToName(type_opt.value()); return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()}, {knowhere::IndexParams::nlist, 100}, {knowhere::IndexParams::nprobe, 4}, - {knowhere::Metric::TYPE, MetricTypeToName(field_meta_.get_metric_type())}, + {knowhere::Metric::TYPE, type_name}, {knowhere::meta::DEVICEID, 0}}; } knowhere::Config VectorFieldIndexing::get_search_conf(int top_K) const { - return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()}, - {knowhere::meta::TOPK, top_K}, - {knowhere::IndexParams::nlist, 100}, - {knowhere::IndexParams::nprobe, 4}, - {knowhere::Metric::TYPE, MetricTypeToName(field_meta_.get_metric_type())}, - {knowhere::meta::DEVICEID, 0}}; + // TODO + auto type_opt = field_meta_.get_metric_type(); + Assert(type_opt.has_value()); + auto type_name = MetricTypeToName(type_opt.value()); + return knowhere::Config{{knowhere::meta::DIM, field_meta_.get_dim()}, {knowhere::meta::TOPK, top_K}, + {knowhere::IndexParams::nlist, 100}, {knowhere::IndexParams::nprobe, 4}, + {knowhere::Metric::TYPE, type_name}, {knowhere::meta::DEVICEID, 0}}; } void diff --git a/internal/core/src/segcore/FieldIndexing.h b/internal/core/src/segcore/FieldIndexing.h index c18bc9d04f..be15ff5562 100644 --- a/internal/core/src/segcore/FieldIndexing.h +++ b/internal/core/src/segcore/FieldIndexing.h @@ -109,14 +109,25 @@ class IndexingRecord { void Initialize() { - int offset = 0; - for (auto& field : schema_) { - if (field.get_data_type() != DataType::VECTOR_BINARY) { - field_indexings_.try_emplace(FieldOffset(offset), CreateIndex(field, size_per_chunk_)); + int offset_id = 0; + for (const FieldMeta& field : schema_) { + auto offset = FieldOffset(offset_id); + ++offset_id; + + if (field.is_vector()) { + // TODO: skip binary small index now, reenable after config.yaml is ready + if (field.get_data_type() == DataType::VECTOR_BINARY) { + continue; + } + // flat should be skipped + if (!field.get_metric_type().has_value()) { + continue; + } } - ++offset; + + field_indexings_.try_emplace(offset, CreateIndex(field, size_per_chunk_)); } - assert(offset == schema_.size()); + assert(offset_id == schema_.size()); } // concurrent, reentrant @@ -131,7 +142,7 @@ class IndexingRecord { const FieldIndexing& get_field_indexing(FieldOffset field_offset) const { - assert(field_indexings_.count(field_offset)); + Assert(field_indexings_.count(field_offset)); return *field_indexings_.at(field_offset); } @@ -142,6 +153,12 @@ class IndexingRecord { AssertInfo(ptr, "invalid indexing"); return *ptr; } + + bool + is_in(FieldOffset field_offset) const { + return field_indexings_.count(field_offset); + } + template auto get_scalar_field_indexing(FieldOffset field_offset) const -> const ScalarFieldIndexing& { diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 7505e5410d..769e3e0867 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -178,7 +178,8 @@ SegmentSealedImpl::vector_search(int64_t vec_count, query::dataset::QueryDataset dataset; dataset.query_data = query_data; dataset.num_queries = query_count; - dataset.metric_type = field_meta.get_metric_type(); + // if(field_meta.is) + dataset.metric_type = query_info.metric_type_; dataset.topk = query_info.topK_; dataset.dim = field_meta.get_dim(); diff --git a/internal/core/thirdparty/gtest/CMakeLists.txt b/internal/core/thirdparty/gtest/CMakeLists.txt index 0e6b65313e..895cae7166 100644 --- a/internal/core/thirdparty/gtest/CMakeLists.txt +++ b/internal/core/thirdparty/gtest/CMakeLists.txt @@ -29,17 +29,8 @@ FetchContent_Declare( BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/googletest-build DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} ) -FetchContent_GetProperties( googletest ) -if ( NOT googletest_POPULATED ) - - FetchContent_Populate( googletest ) - - # Adding the following targets: - # gtest, gtest_main, gmock, gmock_main - add_subdirectory( ${googletest_SOURCE_DIR} - ${googletest_BINARY_DIR} - EXCLUDE_FROM_ALL ) -endif() +FetchContent_MakeAvailable(googletest) +# include(GoogleTest) # **************************************************************** # Create ALIAS Target diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index bfc06eea76..51bc740e6e 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -1,5 +1,3 @@ -enable_testing() - include_directories(${CMAKE_HOME_DIRECTORY}/src) include_directories(${CMAKE_HOME_DIRECTORY}/src/index/knowhere) set(MILVUS_TEST_FILES diff --git a/internal/core/unittest/test_query.cpp b/internal/core/unittest/test_query.cpp index b80f92dbad..c6a69f98d2 100644 --- a/internal/core/unittest/test_query.cpp +++ b/internal/core/unittest/test_query.cpp @@ -411,6 +411,50 @@ TEST(Query, ExecEmpty) { } } +TEST(Query, ExecWithoutPredicateFlat) { + using namespace milvus::query; + using namespace milvus::segcore; + auto schema = std::make_shared(); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, std::nullopt); + schema->AddDebugField("age", DataType::FLOAT); + std::string dsl = R"({ + "bool": { + "must": [ + { + "vector": { + "fakevec": { + "metric_type": "L2", + "params": { + "nprobe": 10 + }, + "query": "$0", + "topk": 5 + } + } + } + ] + } + })"; + auto plan = CreatePlan(*schema, dsl); + int64_t N = 1000 * 1000; + auto dataset = DataGen(schema, N); + auto segment = CreateGrowingSegment(schema); + segment->PreInsert(N); + segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); + + auto num_queries = 5; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + QueryResult qr; + Timestamp time = 1000000; + std::vector ph_group_arr = {ph_group.get()}; + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); + std::vector> results; + int topk = 5; + auto json = QueryResultToJson(qr); + std::cout << json.dump(2); +} + TEST(Query, ExecWithoutPredicate) { using namespace milvus::query; using namespace milvus::segcore; diff --git a/internal/core/unittest/test_segcore.cpp b/internal/core/unittest/test_segcore.cpp index d1e0befc57..598b414f7e 100644 --- a/internal/core/unittest/test_segcore.cpp +++ b/internal/core/unittest/test_segcore.cpp @@ -21,6 +21,7 @@ // #include "utils/Json.h" #include "test_utils/DataGen.h" #include +#include using std::cin; using std::cout; using std::endl;