fix: fix some fp16/bf16 code miss in segcore. (#31771)

issue:https://github.com/milvus-io/milvus/issues/22837

Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
pull/31957/head
cqy123456 2024-04-07 01:13:16 -05:00 committed by GitHub
parent 5429c353c5
commit aba4993c6c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 274 additions and 105 deletions

View File

@ -189,15 +189,14 @@ IndexFactory::CreateVectorIndex(
return std::make_unique<VectorDiskAnnIndex<float>>( return std::make_unique<VectorDiskAnnIndex<float>>(
index_type, metric_type, version, file_manager_context); index_type, metric_type, version, file_manager_context);
} }
// // Uncomment after adding diskann part case DataType::VECTOR_FLOAT16: {
// case DataType::VECTOR_FLOAT16: { return std::make_unique<VectorDiskAnnIndex<float16>>(
// return std::make_unique<VectorDiskAnnIndex<float16>>( index_type, metric_type, version, file_manager_context);
// index_type, metric_type, version, file_manager_context); }
// } case DataType::VECTOR_BFLOAT16: {
// case DataType::VECTOR_BFLOAT16: { return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
// return std::make_unique<VectorDiskAnnIndex<bfloat16>>( index_type, metric_type, version, file_manager_context);
// index_type, metric_type, version, file_manager_context); }
// }
default: default:
throw SegcoreError( throw SegcoreError(
DataTypeInvalid, DataTypeInvalid,
@ -296,15 +295,22 @@ IndexFactory::CreateVectorIndex(
space, space,
file_manager_context); file_manager_context);
} }
// // Uncomment after adding diskann part case DataType::VECTOR_FLOAT16: {
// case DataType::VECTOR_FLOAT16: { return std::make_unique<VectorDiskAnnIndex<float16>>(
// return std::make_unique<VectorDiskAnnIndex<float16>>( index_type,
// index_type, metric_type, version, file_manager_context); metric_type,
// } version,
// case DataType::VECTOR_BFLOAT16: { space,
// return std::make_unique<VectorDiskAnnIndex<bfloat16>>( file_manager_context);
// index_type, metric_type, version, file_manager_context); }
// } case DataType::VECTOR_BFLOAT16: {
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
index_type,
metric_type,
version,
space,
file_manager_context);
}
default: default:
throw SegcoreError( throw SegcoreError(
DataTypeInvalid, DataTypeInvalid,

View File

@ -177,7 +177,7 @@ VectorDiskAnnIndex<T>::BuildV2(const Config& config) {
knowhere::Json build_config; knowhere::Json build_config;
build_config.update(config); build_config.update(config);
auto local_data_path = file_manager_->CacheRawDataToDisk(space_); auto local_data_path = file_manager_->CacheRawDataToDisk<T>(space_);
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path; build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix(); auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
@ -224,7 +224,7 @@ VectorDiskAnnIndex<T>::Build(const Config& config) {
AssertInfo(insert_files.has_value(), AssertInfo(insert_files.has_value(),
"insert file paths is empty when build disk ann index"); "insert file paths is empty when build disk ann index");
auto local_data_path = auto local_data_path =
file_manager_->CacheRawDataToDisk(insert_files.value()); file_manager_->CacheRawDataToDisk<T>(insert_files.value());
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path; build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix(); auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();

View File

@ -557,6 +557,15 @@ MergeDataArray(
auto obj = vector_array->mutable_float_vector(); auto obj = vector_array->mutable_float_vector();
obj->mutable_data()->Add(data + src_offset * dim, obj->mutable_data()->Add(data + src_offset * dim,
data + (src_offset + 1) * dim); data + (src_offset + 1) * dim);
} else if (field_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
auto data = VEC_FIELD_DATA(src_field_data, float16);
auto obj = vector_array->mutable_float16_vector();
obj->assign(data, dim * sizeof(float16));
} else if (field_meta.get_data_type() ==
DataType::VECTOR_BFLOAT16) {
auto data = VEC_FIELD_DATA(src_field_data, bfloat16);
auto obj = vector_array->mutable_bfloat16_vector();
obj->assign(data, dim * sizeof(bfloat16));
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) { } else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
AssertInfo( AssertInfo(
dim % 8 == 0, dim % 8 == 0,

View File

@ -377,6 +377,7 @@ DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2(
} }
return offset; return offset;
} }
template <typename DataType>
std::string std::string
DiskFileManagerImpl::CacheRawDataToDisk( DiskFileManagerImpl::CacheRawDataToDisk(
std::shared_ptr<milvus_storage::Space> space) { std::shared_ptr<milvus_storage::Space> space) {
@ -413,7 +414,7 @@ DiskFileManagerImpl::CacheRawDataToDisk(
field_data->FillFieldData(col_data); field_data->FillFieldData(col_data);
dim = field_data->get_dim(); dim = field_data->get_dim();
auto data_size = auto data_size =
field_data->get_num_rows() * index_meta_.dim * sizeof(float); field_data->get_num_rows() * index_meta_.dim * sizeof(DataType);
local_chunk_manager->Write(local_data_path, local_chunk_manager->Write(local_data_path,
write_offset, write_offset,
const_cast<void*>(field_data->Data()), const_cast<void*>(field_data->Data()),
@ -441,7 +442,7 @@ SortByPath(std::vector<std::string>& paths) {
std::stol(b.substr(b.find_last_of("/") + 1)); std::stol(b.substr(b.find_last_of("/") + 1));
}); });
} }
template <typename DataType>
std::string std::string
DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) { DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
SortByPath(remote_files); SortByPath(remote_files);
@ -476,7 +477,8 @@ DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
"inconsistent dim value in multi binlogs!"); "inconsistent dim value in multi binlogs!");
dim = field_data->get_dim(); dim = field_data->get_dim();
auto data_size = field_data->get_num_rows() * dim * sizeof(float); auto data_size =
field_data->get_num_rows() * dim * sizeof(DataType);
local_chunk_manager->Write(local_data_path, local_chunk_manager->Write(local_data_path,
write_offset, write_offset,
const_cast<void*>(field_data->Data()), const_cast<void*>(field_data->Data()),
@ -825,4 +827,23 @@ DiskFileManagerImpl::IsExisted(const std::string& file) noexcept {
return isExist; return isExist;
} }
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<float>(
std::vector<std::string> remote_files);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
std::vector<std::string> remote_files);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
std::vector<std::string> remote_files);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<float>(
std::shared_ptr<milvus_storage::Space> space);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
std::shared_ptr<milvus_storage::Space> space);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
std::shared_ptr<milvus_storage::Space> space);
} // namespace milvus::storage } // namespace milvus::storage

View File

@ -96,9 +96,11 @@ class DiskFileManagerImpl : public FileManagerImpl {
const std::vector<std::string>& remote_files, const std::vector<std::string>& remote_files,
const std::vector<int64_t>& remote_file_sizes); const std::vector<int64_t>& remote_file_sizes);
template <typename DataType>
std::string std::string
CacheRawDataToDisk(std::vector<std::string> remote_files); CacheRawDataToDisk(std::vector<std::string> remote_files);
template <typename DataType>
std::string std::string
CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space); CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space);

View File

@ -415,6 +415,24 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data,
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data); std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
return array->byte_width() * 8; return array->byte_width() * 8;
} }
case DataType::VECTOR_FLOAT16: {
AssertInfo(
data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
"inconsistent data type: {}",
data->type_id());
auto array =
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
return array->byte_width() / sizeof(float16);
}
case DataType::VECTOR_BFLOAT16: {
AssertInfo(
data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
"inconsistent data type: {}",
data->type_id());
auto array =
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
return array->byte_width() / sizeof(bfloat16);
}
default: default:
PanicInfo(DataTypeInvalid, "unsupported data type {}", data_type); PanicInfo(DataTypeInvalid, "unsupported data type {}", data_type);
} }

View File

@ -1787,6 +1787,7 @@ TEST(CApiTest, ReduceRemoveDuplicates) {
DeleteSegment(segment); DeleteSegment(segment);
} }
template <typename VecType = float>
void void
testReduceSearchWithExpr(int N, testReduceSearchWithExpr(int N,
int topK, int topK,
@ -1794,8 +1795,19 @@ testReduceSearchWithExpr(int N,
bool filter_all = false) { bool filter_all = false) {
std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", " std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", "
<< num_queries << ")" << std::endl; << num_queries << ")" << std::endl;
std::function<const char*()> schema_fun;
auto collection = NewCollection(get_default_schema_config()); std::function<std::string(int)> query_gen_fun;
if constexpr (std::is_same_v<VecType, float>) {
schema_fun = get_default_schema_config;
query_gen_fun = generate_query_data;
} else if constexpr (std::is_same_v<VecType, float16>) {
schema_fun = get_float16_schema_config;
query_gen_fun = generate_query_data_float16;
} else if constexpr (std::is_same_v<VecType, bfloat16>) {
schema_fun = get_bfloat16_schema_config;
query_gen_fun = generate_query_data_bfloat16;
}
auto collection = NewCollection(schema_fun());
CSegmentInterface segment; CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment); auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success); ASSERT_EQ(status.error_code, Success);
@ -1853,7 +1865,7 @@ testReduceSearchWithExpr(int N,
topK % N; topK % N;
} }
auto serialized_expr_plan = fmt.str(); auto serialized_expr_plan = fmt.str();
auto blob = generate_query_data(num_queries); auto blob = query_gen_fun(num_queries);
void* plan = nullptr; void* plan = nullptr;
auto binary_plan = auto binary_plan =
@ -1942,17 +1954,29 @@ testReduceSearchWithExpr(int N,
} }
TEST(CApiTest, ReduceSearchWithExpr) { TEST(CApiTest, ReduceSearchWithExpr) {
//float32
testReduceSearchWithExpr(2, 1, 1); testReduceSearchWithExpr(2, 1, 1);
testReduceSearchWithExpr(2, 10, 10); testReduceSearchWithExpr(2, 10, 10);
testReduceSearchWithExpr(100, 1, 1); testReduceSearchWithExpr(100, 1, 1);
testReduceSearchWithExpr(100, 10, 10); testReduceSearchWithExpr(100, 10, 10);
testReduceSearchWithExpr(10000, 1, 1); testReduceSearchWithExpr(10000, 1, 1);
testReduceSearchWithExpr(10000, 10, 10); testReduceSearchWithExpr(10000, 10, 10);
//float16
testReduceSearchWithExpr(2, 10, 10, false);
testReduceSearchWithExpr(100, 10, 10, false);
//bfloat16
testReduceSearchWithExpr(2, 10, 10, false);
testReduceSearchWithExpr(100, 10, 10, false);
} }
TEST(CApiTest, ReduceSearchWithExprFilterAll) { TEST(CApiTest, ReduceSearchWithExprFilterAll) {
//float32
testReduceSearchWithExpr(2, 1, 1, true); testReduceSearchWithExpr(2, 1, 1, true);
testReduceSearchWithExpr(2, 10, 10, true); testReduceSearchWithExpr(2, 10, 10, true);
//float16
testReduceSearchWithExpr(2, 1, 1, true);
//bfloat16
testReduceSearchWithExpr(2, 1, 1, true);
} }
TEST(CApiTest, LoadIndexInfo) { TEST(CApiTest, LoadIndexInfo) {

View File

@ -520,6 +520,10 @@ TEST(GroupBY, Reduce) {
auto vec_fid = schema->AddDebugField( auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2); "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
auto int64_fid = schema->AddDebugField("int64", DataType::INT64); auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
auto fp16_fid = schema->AddDebugField(
"fakevec_fp16", DataType::VECTOR_FLOAT16, dim, knowhere::metric::L2);
auto bf16_fid = schema->AddDebugField(
"fakevec_bf16", DataType::VECTOR_BFLOAT16, dim, knowhere::metric::L2);
schema->set_primary_field_id(int64_fid); schema->set_primary_field_id(int64_fid);
auto segment1 = CreateSealedSegment(schema); auto segment1 = CreateSealedSegment(schema);
auto segment2 = CreateSealedSegment(schema); auto segment2 = CreateSealedSegment(schema);

View File

@ -689,7 +689,7 @@ TEST_P(IndexTest, GetVector) {
#ifdef BUILD_DISK_ANN #ifdef BUILD_DISK_ANN
TEST(Indexing, SearchDiskAnnWithInvalidParam) { TEST(Indexing, SearchDiskAnnWithInvalidParam) {
int64_t NB = 10000; int64_t NB = 1000;
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN; IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
MetricType metric_type = knowhere::metric::L2; MetricType metric_type = knowhere::metric::L2;
milvus::index::CreateIndexInfo create_index_info; milvus::index::CreateIndexInfo create_index_info;
@ -720,8 +720,8 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
auto build_conf = Config{ auto build_conf = Config{
{knowhere::meta::METRIC_TYPE, metric_type}, {knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)}, {knowhere::meta::DIM, std::to_string(DIM)},
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)}, {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)}, {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)}, {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)}, {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)}, {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
@ -768,85 +768,169 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
std::runtime_error); std::runtime_error);
} }
// TEST(Indexing, SearchDiskAnnWithInvalidParam_Float16) { TEST(Indexing, SearchDiskAnnWithFloat16) {
// int64_t NB = 10000; int64_t NB = 1000;
// IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN; int64_t NQ = 2;
// MetricType metric_type = knowhere::metric::L2; int64_t K = 4;
// milvus::index::CreateIndexInfo create_index_info; IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
// create_index_info.index_type = index_type; MetricType metric_type = knowhere::metric::L2;
// create_index_info.metric_type = metric_type; milvus::index::CreateIndexInfo create_index_info;
// create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16; create_index_info.index_type = index_type;
// create_index_info.index_engine_version = create_index_info.metric_type = metric_type;
// knowhere::Version::GetCurrentVersion().VersionNumber(); create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16;
create_index_info.index_engine_version =
knowhere::Version::GetCurrentVersion().VersionNumber();
// int64_t collection_id = 1; int64_t collection_id = 1;
// int64_t partition_id = 2; int64_t partition_id = 2;
// int64_t segment_id = 3; int64_t segment_id = 3;
// int64_t field_id = 100; int64_t field_id = 100;
// int64_t build_id = 1000; int64_t build_id = 1000;
// int64_t index_version = 1; int64_t index_version = 1;
// StorageConfig storage_config = get_default_local_storage_config(); StorageConfig storage_config = get_default_local_storage_config();
// milvus::storage::FieldDataMeta field_data_meta{ milvus::storage::FieldDataMeta field_data_meta{
// collection_id, partition_id, segment_id, field_id}; collection_id, partition_id, segment_id, field_id};
// milvus::storage::IndexMeta index_meta{ milvus::storage::IndexMeta index_meta{
// segment_id, field_id, build_id, index_version}; segment_id, field_id, build_id, index_version};
// auto chunk_manager = storage::CreateChunkManager(storage_config); auto chunk_manager = storage::CreateChunkManager(storage_config);
// milvus::storage::FileManagerContext file_manager_context( milvus::storage::FileManagerContext file_manager_context(
// field_data_meta, index_meta, chunk_manager); field_data_meta, index_meta, chunk_manager);
// auto index = milvus::index::IndexFactory::GetInstance().CreateIndex( auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
// create_index_info, file_manager_context); create_index_info, file_manager_context);
// auto build_conf = Config{ auto build_conf = Config{
// {knowhere::meta::METRIC_TYPE, metric_type}, {knowhere::meta::METRIC_TYPE, metric_type},
// {knowhere::meta::DIM, std::to_string(DIM)}, {knowhere::meta::DIM, std::to_string(DIM)},
// {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)}, {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
// {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)}, {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
// {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)}, {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
// {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)}, {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
// {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)}, {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
// }; };
// // build disk ann index // build disk ann index
// auto dataset = GenDatasetWithDataType( auto dataset = GenDatasetWithDataType(
// NB, metric_type, milvus::DataType::VECTOR_FLOAT16); NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
// FixedVector<float16> xb_data = FixedVector<float16> xb_data =
// dataset.get_col<float16>(milvus::FieldId(field_id)); dataset.get_col<float16>(milvus::FieldId(field_id));
// knowhere::DataSetPtr xb_dataset = knowhere::DataSetPtr xb_dataset =
// knowhere::GenDataSet(NB, DIM, xb_data.data()); knowhere::GenDataSet(NB, DIM, xb_data.data());
// ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf)); ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
// // serialize and load disk index, disk index can only be search after loading for now // serialize and load disk index, disk index can only be search after loading for now
// auto binary_set = index->Upload(); auto binary_set = index->Upload();
// index.reset(); index.reset();
// auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex( auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
// create_index_info, file_manager_context); create_index_info, file_manager_context);
// auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get()); auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
// std::vector<std::string> index_files; std::vector<std::string> index_files;
// for (auto& binary : binary_set.binary_map_) { for (auto& binary : binary_set.binary_map_) {
// index_files.emplace_back(binary.first); index_files.emplace_back(binary.first);
// } }
// auto load_conf = generate_load_conf(index_type, metric_type, NB); auto load_conf = generate_load_conf<float16>(index_type, metric_type, NB);
// load_conf["index_files"] = index_files; load_conf["index_files"] = index_files;
// vec_index->Load(load_conf); vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
// EXPECT_EQ(vec_index->Count(), NB); EXPECT_EQ(vec_index->Count(), NB);
// // search disk index with search_list == limit // search disk index with search_list == limit
// int query_offset = 100; int query_offset = 100;
// knowhere::DataSetPtr xq_dataset = knowhere::DataSetPtr xq_dataset =
// knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset); knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
// milvus::SearchInfo search_info; milvus::SearchInfo search_info;
// search_info.topk_ = K; search_info.topk_ = K;
// search_info.metric_type_ = metric_type; search_info.metric_type_ = metric_type;
// search_info.search_params_ = milvus::Config{ search_info.search_params_ = milvus::Config{
// {knowhere::meta::METRIC_TYPE, metric_type}, {knowhere::meta::METRIC_TYPE, metric_type},
// {milvus::index::DISK_ANN_QUERY_LIST, K - 1}, {milvus::index::DISK_ANN_QUERY_LIST, K * 2},
// }; };
// EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr), SearchResult result;
// std::runtime_error); EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
// } }
TEST(Indexing, SearchDiskAnnWithBFloat16) {
int64_t NB = 1000;
int64_t NQ = 2;
int64_t K = 4;
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
MetricType metric_type = knowhere::metric::L2;
milvus::index::CreateIndexInfo create_index_info;
create_index_info.index_type = index_type;
create_index_info.metric_type = metric_type;
create_index_info.field_type = milvus::DataType::VECTOR_BFLOAT16;
create_index_info.index_engine_version =
knowhere::Version::GetCurrentVersion().VersionNumber();
int64_t collection_id = 1;
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 100;
int64_t build_id = 1000;
int64_t index_version = 1;
StorageConfig storage_config = get_default_local_storage_config();
milvus::storage::FieldDataMeta field_data_meta{
collection_id, partition_id, segment_id, field_id};
milvus::storage::IndexMeta index_meta{
segment_id, field_id, build_id, index_version};
auto chunk_manager = storage::CreateChunkManager(storage_config);
milvus::storage::FileManagerContext file_manager_context(
field_data_meta, index_meta, chunk_manager);
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, file_manager_context);
auto build_conf = Config{
{knowhere::meta::METRIC_TYPE, metric_type},
{knowhere::meta::DIM, std::to_string(DIM)},
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
};
// build disk ann index
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
FixedVector<bfloat16> xb_data =
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
knowhere::DataSetPtr xb_dataset =
knowhere::GenDataSet(NB, DIM, xb_data.data());
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
// serialize and load disk index, disk index can only be search after loading for now
auto binary_set = index->Upload();
index.reset();
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, file_manager_context);
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
std::vector<std::string> index_files;
for (auto& binary : binary_set.binary_map_) {
index_files.emplace_back(binary.first);
}
auto load_conf = generate_load_conf<bfloat16>(index_type, metric_type, NB);
load_conf["index_files"] = index_files;
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
EXPECT_EQ(vec_index->Count(), NB);
// search disk index with search_list == limit
int query_offset = 100;
knowhere::DataSetPtr xq_dataset =
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
milvus::SearchInfo search_info;
search_info.topk_ = K;
search_info.metric_type_ = metric_type;
search_info.search_params_ = milvus::Config{
{knowhere::meta::METRIC_TYPE, metric_type},
{milvus::index::DISK_ANN_QUERY_LIST, K * 2},
};
SearchResult result;
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
}
#endif #endif
//class IndexTestV2 //class IndexTestV2

View File

@ -108,7 +108,8 @@ generate_build_conf(const milvus::IndexType& index_type,
return knowhere::Json(); return knowhere::Json();
} }
auto template <typename DataType = float>
inline auto
generate_load_conf(const milvus::IndexType& index_type, generate_load_conf(const milvus::IndexType& index_type,
const milvus::MetricType& metric_type, const milvus::MetricType& metric_type,
int64_t nb) { int64_t nb) {
@ -118,7 +119,8 @@ generate_load_conf(const milvus::IndexType& index_type,
{knowhere::meta::DIM, std::to_string(DIM)}, {knowhere::meta::DIM, std::to_string(DIM)},
{milvus::index::DISK_ANN_LOAD_THREAD_NUM, std::to_string(2)}, {milvus::index::DISK_ANN_LOAD_THREAD_NUM, std::to_string(2)},
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET, {milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET,
std::to_string(0.0002)}, std::to_string(0.05 * sizeof(DataType) * nb /
(1024.0 * 1024.0 * 1024.0))},
}; };
} }
return knowhere::Json{ return knowhere::Json{

View File

@ -146,11 +146,10 @@ func CheckCtxValid(ctx context.Context) bool {
func GetVecFieldIDs(schema *schemapb.CollectionSchema) []int64 { func GetVecFieldIDs(schema *schemapb.CollectionSchema) []int64 {
var vecFieldIDs []int64 var vecFieldIDs []int64
for _, field := range schema.Fields { for _, field := range schema.Fields {
if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector || field.DataType == schemapb.DataType_Float16Vector || field.DataType == schemapb.DataType_BFloat16Vector || field.DataType == schemapb.DataType_SparseFloatVector { if typeutil.IsVectorType(field.DataType) {
vecFieldIDs = append(vecFieldIDs, field.FieldID) vecFieldIDs = append(vecFieldIDs, field.FieldID)
} }
} }
return vecFieldIDs return vecFieldIDs
} }