From aba4993c6ce8d5e9b865dbddd81035d53affbf2a Mon Sep 17 00:00:00 2001 From: cqy123456 <39671710+cqy123456@users.noreply.github.com> Date: Sun, 7 Apr 2024 01:13:16 -0500 Subject: [PATCH] fix: fix some fp16/bf16 code miss in segcore. (#31771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit issue:https://github.com/milvus-io/milvus/issues/22837 Signed-off-by: cqy123456 --- internal/core/src/index/IndexFactory.cpp | 42 ++-- internal/core/src/index/VectorDiskIndex.cpp | 4 +- internal/core/src/segcore/Utils.cpp | 9 + .../core/src/storage/DiskFileManagerImpl.cpp | 27 +- .../core/src/storage/DiskFileManagerImpl.h | 2 + internal/core/src/storage/Util.cpp | 18 ++ internal/core/unittest/test_c_api.cpp | 32 ++- internal/core/unittest/test_group_by.cpp | 4 + internal/core/unittest/test_indexing.cpp | 232 ++++++++++++------ .../test_utils/indexbuilder_test_utils.h | 6 +- pkg/util/funcutil/func.go | 3 +- 11 files changed, 274 insertions(+), 105 deletions(-) diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index 06b1629543..e6e8a4cf93 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -189,15 +189,14 @@ IndexFactory::CreateVectorIndex( return std::make_unique>( index_type, metric_type, version, file_manager_context); } - // // Uncomment after adding diskann part - // case DataType::VECTOR_FLOAT16: { - // return std::make_unique>( - // index_type, metric_type, version, file_manager_context); - // } - // case DataType::VECTOR_BFLOAT16: { - // return std::make_unique>( - // index_type, metric_type, version, file_manager_context); - // } + case DataType::VECTOR_FLOAT16: { + return std::make_unique>( + index_type, metric_type, version, file_manager_context); + } + case DataType::VECTOR_BFLOAT16: { + return std::make_unique>( + index_type, metric_type, version, file_manager_context); + } default: throw SegcoreError( DataTypeInvalid, @@ -296,15 +295,22 @@ IndexFactory::CreateVectorIndex( space, file_manager_context); } - // // Uncomment after adding diskann part - // case DataType::VECTOR_FLOAT16: { - // return std::make_unique>( - // index_type, metric_type, version, file_manager_context); - // } - // case DataType::VECTOR_BFLOAT16: { - // return std::make_unique>( - // index_type, metric_type, version, file_manager_context); - // } + case DataType::VECTOR_FLOAT16: { + return std::make_unique>( + index_type, + metric_type, + version, + space, + file_manager_context); + } + case DataType::VECTOR_BFLOAT16: { + return std::make_unique>( + index_type, + metric_type, + version, + space, + file_manager_context); + } default: throw SegcoreError( DataTypeInvalid, diff --git a/internal/core/src/index/VectorDiskIndex.cpp b/internal/core/src/index/VectorDiskIndex.cpp index d2c4ef3a03..f4856a1cbb 100644 --- a/internal/core/src/index/VectorDiskIndex.cpp +++ b/internal/core/src/index/VectorDiskIndex.cpp @@ -177,7 +177,7 @@ VectorDiskAnnIndex::BuildV2(const Config& config) { knowhere::Json build_config; build_config.update(config); - auto local_data_path = file_manager_->CacheRawDataToDisk(space_); + auto local_data_path = file_manager_->CacheRawDataToDisk(space_); build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path; auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix(); @@ -224,7 +224,7 @@ VectorDiskAnnIndex::Build(const Config& config) { AssertInfo(insert_files.has_value(), "insert file paths is empty when build disk ann index"); auto local_data_path = - file_manager_->CacheRawDataToDisk(insert_files.value()); + file_manager_->CacheRawDataToDisk(insert_files.value()); build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path; auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix(); diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index 2d73f0223a..0adc911c2e 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -557,6 +557,15 @@ MergeDataArray( auto obj = vector_array->mutable_float_vector(); obj->mutable_data()->Add(data + src_offset * dim, data + (src_offset + 1) * dim); + } else if (field_meta.get_data_type() == DataType::VECTOR_FLOAT16) { + auto data = VEC_FIELD_DATA(src_field_data, float16); + auto obj = vector_array->mutable_float16_vector(); + obj->assign(data, dim * sizeof(float16)); + } else if (field_meta.get_data_type() == + DataType::VECTOR_BFLOAT16) { + auto data = VEC_FIELD_DATA(src_field_data, bfloat16); + auto obj = vector_array->mutable_bfloat16_vector(); + obj->assign(data, dim * sizeof(bfloat16)); } else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) { AssertInfo( dim % 8 == 0, diff --git a/internal/core/src/storage/DiskFileManagerImpl.cpp b/internal/core/src/storage/DiskFileManagerImpl.cpp index 9b0080170d..57dbce8728 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.cpp +++ b/internal/core/src/storage/DiskFileManagerImpl.cpp @@ -377,6 +377,7 @@ DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2( } return offset; } +template std::string DiskFileManagerImpl::CacheRawDataToDisk( std::shared_ptr space) { @@ -413,7 +414,7 @@ DiskFileManagerImpl::CacheRawDataToDisk( field_data->FillFieldData(col_data); dim = field_data->get_dim(); auto data_size = - field_data->get_num_rows() * index_meta_.dim * sizeof(float); + field_data->get_num_rows() * index_meta_.dim * sizeof(DataType); local_chunk_manager->Write(local_data_path, write_offset, const_cast(field_data->Data()), @@ -441,7 +442,7 @@ SortByPath(std::vector& paths) { std::stol(b.substr(b.find_last_of("/") + 1)); }); } - +template std::string DiskFileManagerImpl::CacheRawDataToDisk(std::vector remote_files) { SortByPath(remote_files); @@ -476,7 +477,8 @@ DiskFileManagerImpl::CacheRawDataToDisk(std::vector remote_files) { "inconsistent dim value in multi binlogs!"); dim = field_data->get_dim(); - auto data_size = field_data->get_num_rows() * dim * sizeof(float); + auto data_size = + field_data->get_num_rows() * dim * sizeof(DataType); local_chunk_manager->Write(local_data_path, write_offset, const_cast(field_data->Data()), @@ -825,4 +827,23 @@ DiskFileManagerImpl::IsExisted(const std::string& file) noexcept { return isExist; } +template std::string +DiskFileManagerImpl::CacheRawDataToDisk( + std::vector remote_files); +template std::string +DiskFileManagerImpl::CacheRawDataToDisk( + std::vector remote_files); +template std::string +DiskFileManagerImpl::CacheRawDataToDisk( + std::vector remote_files); +template std::string +DiskFileManagerImpl::CacheRawDataToDisk( + std::shared_ptr space); +template std::string +DiskFileManagerImpl::CacheRawDataToDisk( + std::shared_ptr space); +template std::string +DiskFileManagerImpl::CacheRawDataToDisk( + std::shared_ptr space); + } // namespace milvus::storage diff --git a/internal/core/src/storage/DiskFileManagerImpl.h b/internal/core/src/storage/DiskFileManagerImpl.h index 66d5830659..9a6b27d591 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.h +++ b/internal/core/src/storage/DiskFileManagerImpl.h @@ -96,9 +96,11 @@ class DiskFileManagerImpl : public FileManagerImpl { const std::vector& remote_files, const std::vector& remote_file_sizes); + template std::string CacheRawDataToDisk(std::vector remote_files); + template std::string CacheRawDataToDisk(std::shared_ptr space); diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 0437cd0785..0e714f0a97 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -415,6 +415,24 @@ GetDimensionFromArrowArray(std::shared_ptr data, std::dynamic_pointer_cast(data); return array->byte_width() * 8; } + case DataType::VECTOR_FLOAT16: { + AssertInfo( + data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY, + "inconsistent data type: {}", + data->type_id()); + auto array = + std::dynamic_pointer_cast(data); + return array->byte_width() / sizeof(float16); + } + case DataType::VECTOR_BFLOAT16: { + AssertInfo( + data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY, + "inconsistent data type: {}", + data->type_id()); + auto array = + std::dynamic_pointer_cast(data); + return array->byte_width() / sizeof(bfloat16); + } default: PanicInfo(DataTypeInvalid, "unsupported data type {}", data_type); } diff --git a/internal/core/unittest/test_c_api.cpp b/internal/core/unittest/test_c_api.cpp index bbb7be66de..f9c87ca0b8 100644 --- a/internal/core/unittest/test_c_api.cpp +++ b/internal/core/unittest/test_c_api.cpp @@ -1787,6 +1787,7 @@ TEST(CApiTest, ReduceRemoveDuplicates) { DeleteSegment(segment); } +template void testReduceSearchWithExpr(int N, int topK, @@ -1794,8 +1795,19 @@ testReduceSearchWithExpr(int N, bool filter_all = false) { std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", " << num_queries << ")" << std::endl; - - auto collection = NewCollection(get_default_schema_config()); + std::function schema_fun; + std::function query_gen_fun; + if constexpr (std::is_same_v) { + schema_fun = get_default_schema_config; + query_gen_fun = generate_query_data; + } else if constexpr (std::is_same_v) { + schema_fun = get_float16_schema_config; + query_gen_fun = generate_query_data_float16; + } else if constexpr (std::is_same_v) { + schema_fun = get_bfloat16_schema_config; + query_gen_fun = generate_query_data_bfloat16; + } + auto collection = NewCollection(schema_fun()); CSegmentInterface segment; auto status = NewSegment(collection, Growing, -1, &segment); ASSERT_EQ(status.error_code, Success); @@ -1853,7 +1865,7 @@ testReduceSearchWithExpr(int N, topK % N; } auto serialized_expr_plan = fmt.str(); - auto blob = generate_query_data(num_queries); + auto blob = query_gen_fun(num_queries); void* plan = nullptr; auto binary_plan = @@ -1942,17 +1954,29 @@ testReduceSearchWithExpr(int N, } TEST(CApiTest, ReduceSearchWithExpr) { + //float32 testReduceSearchWithExpr(2, 1, 1); testReduceSearchWithExpr(2, 10, 10); testReduceSearchWithExpr(100, 1, 1); testReduceSearchWithExpr(100, 10, 10); testReduceSearchWithExpr(10000, 1, 1); testReduceSearchWithExpr(10000, 10, 10); + //float16 + testReduceSearchWithExpr(2, 10, 10, false); + testReduceSearchWithExpr(100, 10, 10, false); + //bfloat16 + testReduceSearchWithExpr(2, 10, 10, false); + testReduceSearchWithExpr(100, 10, 10, false); } TEST(CApiTest, ReduceSearchWithExprFilterAll) { + //float32 testReduceSearchWithExpr(2, 1, 1, true); testReduceSearchWithExpr(2, 10, 10, true); + //float16 + testReduceSearchWithExpr(2, 1, 1, true); + //bfloat16 + testReduceSearchWithExpr(2, 1, 1, true); } TEST(CApiTest, LoadIndexInfo) { @@ -5277,4 +5301,4 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_BFLOAT16) { TEST(CApiTest, IsLoadWithDisk) { ASSERT_TRUE(IsLoadWithDisk(INVERTED_INDEX_TYPE, 0)); -} +} \ No newline at end of file diff --git a/internal/core/unittest/test_group_by.cpp b/internal/core/unittest/test_group_by.cpp index d26c04dde4..bddfb40fe6 100644 --- a/internal/core/unittest/test_group_by.cpp +++ b/internal/core/unittest/test_group_by.cpp @@ -520,6 +520,10 @@ TEST(GroupBY, Reduce) { auto vec_fid = schema->AddDebugField( "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2); auto int64_fid = schema->AddDebugField("int64", DataType::INT64); + auto fp16_fid = schema->AddDebugField( + "fakevec_fp16", DataType::VECTOR_FLOAT16, dim, knowhere::metric::L2); + auto bf16_fid = schema->AddDebugField( + "fakevec_bf16", DataType::VECTOR_BFLOAT16, dim, knowhere::metric::L2); schema->set_primary_field_id(int64_fid); auto segment1 = CreateSealedSegment(schema); auto segment2 = CreateSealedSegment(schema); diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 2262958572..b94c442ead 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -689,7 +689,7 @@ TEST_P(IndexTest, GetVector) { #ifdef BUILD_DISK_ANN TEST(Indexing, SearchDiskAnnWithInvalidParam) { - int64_t NB = 10000; + int64_t NB = 1000; IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN; MetricType metric_type = knowhere::metric::L2; milvus::index::CreateIndexInfo create_index_info; @@ -720,8 +720,8 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) { auto build_conf = Config{ {knowhere::meta::METRIC_TYPE, metric_type}, {knowhere::meta::DIM, std::to_string(DIM)}, - {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)}, - {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)}, + {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)}, + {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)}, {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)}, {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)}, {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)}, @@ -768,85 +768,169 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) { std::runtime_error); } -// TEST(Indexing, SearchDiskAnnWithInvalidParam_Float16) { -// int64_t NB = 10000; -// IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN; -// MetricType metric_type = knowhere::metric::L2; -// milvus::index::CreateIndexInfo create_index_info; -// create_index_info.index_type = index_type; -// create_index_info.metric_type = metric_type; -// create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16; -// create_index_info.index_engine_version = -// knowhere::Version::GetCurrentVersion().VersionNumber(); +TEST(Indexing, SearchDiskAnnWithFloat16) { + int64_t NB = 1000; + int64_t NQ = 2; + int64_t K = 4; + IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN; + MetricType metric_type = knowhere::metric::L2; + milvus::index::CreateIndexInfo create_index_info; + create_index_info.index_type = index_type; + create_index_info.metric_type = metric_type; + create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16; + create_index_info.index_engine_version = + knowhere::Version::GetCurrentVersion().VersionNumber(); -// int64_t collection_id = 1; -// int64_t partition_id = 2; -// int64_t segment_id = 3; -// int64_t field_id = 100; -// int64_t build_id = 1000; -// int64_t index_version = 1; + int64_t collection_id = 1; + int64_t partition_id = 2; + int64_t segment_id = 3; + int64_t field_id = 100; + int64_t build_id = 1000; + int64_t index_version = 1; -// StorageConfig storage_config = get_default_local_storage_config(); -// milvus::storage::FieldDataMeta field_data_meta{ -// collection_id, partition_id, segment_id, field_id}; -// milvus::storage::IndexMeta index_meta{ -// segment_id, field_id, build_id, index_version}; -// auto chunk_manager = storage::CreateChunkManager(storage_config); -// milvus::storage::FileManagerContext file_manager_context( -// field_data_meta, index_meta, chunk_manager); -// auto index = milvus::index::IndexFactory::GetInstance().CreateIndex( -// create_index_info, file_manager_context); + StorageConfig storage_config = get_default_local_storage_config(); + milvus::storage::FieldDataMeta field_data_meta{ + collection_id, partition_id, segment_id, field_id}; + milvus::storage::IndexMeta index_meta{ + segment_id, field_id, build_id, index_version}; + auto chunk_manager = storage::CreateChunkManager(storage_config); + milvus::storage::FileManagerContext file_manager_context( + field_data_meta, index_meta, chunk_manager); + auto index = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, file_manager_context); -// auto build_conf = Config{ -// {knowhere::meta::METRIC_TYPE, metric_type}, -// {knowhere::meta::DIM, std::to_string(DIM)}, -// {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)}, -// {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)}, -// {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)}, -// {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)}, -// {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)}, -// }; + auto build_conf = Config{ + {knowhere::meta::METRIC_TYPE, metric_type}, + {knowhere::meta::DIM, std::to_string(DIM)}, + {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)}, + {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)}, + {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)}, + {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)}, + {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)}, + }; -// // build disk ann index -// auto dataset = GenDatasetWithDataType( -// NB, metric_type, milvus::DataType::VECTOR_FLOAT16); -// FixedVector xb_data = -// dataset.get_col(milvus::FieldId(field_id)); -// knowhere::DataSetPtr xb_dataset = -// knowhere::GenDataSet(NB, DIM, xb_data.data()); -// ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf)); + // build disk ann index + auto dataset = GenDatasetWithDataType( + NB, metric_type, milvus::DataType::VECTOR_FLOAT16); + FixedVector xb_data = + dataset.get_col(milvus::FieldId(field_id)); + knowhere::DataSetPtr xb_dataset = + knowhere::GenDataSet(NB, DIM, xb_data.data()); + ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf)); -// // serialize and load disk index, disk index can only be search after loading for now -// auto binary_set = index->Upload(); -// index.reset(); + // serialize and load disk index, disk index can only be search after loading for now + auto binary_set = index->Upload(); + index.reset(); -// auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex( -// create_index_info, file_manager_context); -// auto vec_index = dynamic_cast(new_index.get()); -// std::vector index_files; -// for (auto& binary : binary_set.binary_map_) { -// index_files.emplace_back(binary.first); -// } -// auto load_conf = generate_load_conf(index_type, metric_type, NB); -// load_conf["index_files"] = index_files; -// vec_index->Load(load_conf); -// EXPECT_EQ(vec_index->Count(), NB); + auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, file_manager_context); + auto vec_index = dynamic_cast(new_index.get()); + std::vector index_files; + for (auto& binary : binary_set.binary_map_) { + index_files.emplace_back(binary.first); + } + auto load_conf = generate_load_conf(index_type, metric_type, NB); + load_conf["index_files"] = index_files; + vec_index->Load(milvus::tracer::TraceContext{}, load_conf); + EXPECT_EQ(vec_index->Count(), NB); -// // search disk index with search_list == limit -// int query_offset = 100; -// knowhere::DataSetPtr xq_dataset = -// knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset); + // search disk index with search_list == limit + int query_offset = 100; + knowhere::DataSetPtr xq_dataset = + knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset); -// milvus::SearchInfo search_info; -// search_info.topk_ = K; -// search_info.metric_type_ = metric_type; -// search_info.search_params_ = milvus::Config{ -// {knowhere::meta::METRIC_TYPE, metric_type}, -// {milvus::index::DISK_ANN_QUERY_LIST, K - 1}, -// }; -// EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr), -// std::runtime_error); -// } + milvus::SearchInfo search_info; + search_info.topk_ = K; + search_info.metric_type_ = metric_type; + search_info.search_params_ = milvus::Config{ + {knowhere::meta::METRIC_TYPE, metric_type}, + {milvus::index::DISK_ANN_QUERY_LIST, K * 2}, + }; + SearchResult result; + EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result)); +} + +TEST(Indexing, SearchDiskAnnWithBFloat16) { + int64_t NB = 1000; + int64_t NQ = 2; + int64_t K = 4; + IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN; + MetricType metric_type = knowhere::metric::L2; + milvus::index::CreateIndexInfo create_index_info; + create_index_info.index_type = index_type; + create_index_info.metric_type = metric_type; + create_index_info.field_type = milvus::DataType::VECTOR_BFLOAT16; + create_index_info.index_engine_version = + knowhere::Version::GetCurrentVersion().VersionNumber(); + + int64_t collection_id = 1; + int64_t partition_id = 2; + int64_t segment_id = 3; + int64_t field_id = 100; + int64_t build_id = 1000; + int64_t index_version = 1; + + StorageConfig storage_config = get_default_local_storage_config(); + milvus::storage::FieldDataMeta field_data_meta{ + collection_id, partition_id, segment_id, field_id}; + milvus::storage::IndexMeta index_meta{ + segment_id, field_id, build_id, index_version}; + auto chunk_manager = storage::CreateChunkManager(storage_config); + milvus::storage::FileManagerContext file_manager_context( + field_data_meta, index_meta, chunk_manager); + auto index = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, file_manager_context); + + auto build_conf = Config{ + {knowhere::meta::METRIC_TYPE, metric_type}, + {knowhere::meta::DIM, std::to_string(DIM)}, + {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)}, + {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)}, + {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)}, + {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)}, + {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)}, + }; + + // build disk ann index + auto dataset = GenDatasetWithDataType( + NB, metric_type, milvus::DataType::VECTOR_BFLOAT16); + FixedVector xb_data = + dataset.get_col(milvus::FieldId(field_id)); + knowhere::DataSetPtr xb_dataset = + knowhere::GenDataSet(NB, DIM, xb_data.data()); + ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf)); + + // serialize and load disk index, disk index can only be search after loading for now + auto binary_set = index->Upload(); + index.reset(); + + auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, file_manager_context); + auto vec_index = dynamic_cast(new_index.get()); + std::vector index_files; + for (auto& binary : binary_set.binary_map_) { + index_files.emplace_back(binary.first); + } + auto load_conf = generate_load_conf(index_type, metric_type, NB); + load_conf["index_files"] = index_files; + vec_index->Load(milvus::tracer::TraceContext{}, load_conf); + EXPECT_EQ(vec_index->Count(), NB); + + // search disk index with search_list == limit + int query_offset = 100; + knowhere::DataSetPtr xq_dataset = + knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset); + + milvus::SearchInfo search_info; + search_info.topk_ = K; + search_info.metric_type_ = metric_type; + search_info.search_params_ = milvus::Config{ + {knowhere::meta::METRIC_TYPE, metric_type}, + {milvus::index::DISK_ANN_QUERY_LIST, K * 2}, + }; + SearchResult result; + EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result)); +} #endif //class IndexTestV2 diff --git a/internal/core/unittest/test_utils/indexbuilder_test_utils.h b/internal/core/unittest/test_utils/indexbuilder_test_utils.h index dd2b08cf7a..2c4a283cd0 100644 --- a/internal/core/unittest/test_utils/indexbuilder_test_utils.h +++ b/internal/core/unittest/test_utils/indexbuilder_test_utils.h @@ -108,7 +108,8 @@ generate_build_conf(const milvus::IndexType& index_type, return knowhere::Json(); } -auto +template +inline auto generate_load_conf(const milvus::IndexType& index_type, const milvus::MetricType& metric_type, int64_t nb) { @@ -118,7 +119,8 @@ generate_load_conf(const milvus::IndexType& index_type, {knowhere::meta::DIM, std::to_string(DIM)}, {milvus::index::DISK_ANN_LOAD_THREAD_NUM, std::to_string(2)}, {milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET, - std::to_string(0.0002)}, + std::to_string(0.05 * sizeof(DataType) * nb / + (1024.0 * 1024.0 * 1024.0))}, }; } return knowhere::Json{ diff --git a/pkg/util/funcutil/func.go b/pkg/util/funcutil/func.go index 231f59c188..6a8c8d0e7c 100644 --- a/pkg/util/funcutil/func.go +++ b/pkg/util/funcutil/func.go @@ -146,11 +146,10 @@ func CheckCtxValid(ctx context.Context) bool { func GetVecFieldIDs(schema *schemapb.CollectionSchema) []int64 { var vecFieldIDs []int64 for _, field := range schema.Fields { - if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector || field.DataType == schemapb.DataType_Float16Vector || field.DataType == schemapb.DataType_BFloat16Vector || field.DataType == schemapb.DataType_SparseFloatVector { + if typeutil.IsVectorType(field.DataType) { vecFieldIDs = append(vecFieldIDs, field.FieldID) } } - return vecFieldIDs }