mirror of https://github.com/milvus-io/milvus.git
fix: fix some fp16/bf16 code miss in segcore. (#31771)
issue:https://github.com/milvus-io/milvus/issues/22837 Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>pull/31957/head
parent
5429c353c5
commit
aba4993c6c
|
@ -189,15 +189,14 @@ IndexFactory::CreateVectorIndex(
|
||||||
return std::make_unique<VectorDiskAnnIndex<float>>(
|
return std::make_unique<VectorDiskAnnIndex<float>>(
|
||||||
index_type, metric_type, version, file_manager_context);
|
index_type, metric_type, version, file_manager_context);
|
||||||
}
|
}
|
||||||
// // Uncomment after adding diskann part
|
case DataType::VECTOR_FLOAT16: {
|
||||||
// case DataType::VECTOR_FLOAT16: {
|
return std::make_unique<VectorDiskAnnIndex<float16>>(
|
||||||
// return std::make_unique<VectorDiskAnnIndex<float16>>(
|
index_type, metric_type, version, file_manager_context);
|
||||||
// index_type, metric_type, version, file_manager_context);
|
}
|
||||||
// }
|
case DataType::VECTOR_BFLOAT16: {
|
||||||
// case DataType::VECTOR_BFLOAT16: {
|
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
||||||
// return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
index_type, metric_type, version, file_manager_context);
|
||||||
// index_type, metric_type, version, file_manager_context);
|
}
|
||||||
// }
|
|
||||||
default:
|
default:
|
||||||
throw SegcoreError(
|
throw SegcoreError(
|
||||||
DataTypeInvalid,
|
DataTypeInvalid,
|
||||||
|
@ -296,15 +295,22 @@ IndexFactory::CreateVectorIndex(
|
||||||
space,
|
space,
|
||||||
file_manager_context);
|
file_manager_context);
|
||||||
}
|
}
|
||||||
// // Uncomment after adding diskann part
|
case DataType::VECTOR_FLOAT16: {
|
||||||
// case DataType::VECTOR_FLOAT16: {
|
return std::make_unique<VectorDiskAnnIndex<float16>>(
|
||||||
// return std::make_unique<VectorDiskAnnIndex<float16>>(
|
index_type,
|
||||||
// index_type, metric_type, version, file_manager_context);
|
metric_type,
|
||||||
// }
|
version,
|
||||||
// case DataType::VECTOR_BFLOAT16: {
|
space,
|
||||||
// return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
file_manager_context);
|
||||||
// index_type, metric_type, version, file_manager_context);
|
}
|
||||||
// }
|
case DataType::VECTOR_BFLOAT16: {
|
||||||
|
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
||||||
|
index_type,
|
||||||
|
metric_type,
|
||||||
|
version,
|
||||||
|
space,
|
||||||
|
file_manager_context);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
throw SegcoreError(
|
throw SegcoreError(
|
||||||
DataTypeInvalid,
|
DataTypeInvalid,
|
||||||
|
|
|
@ -177,7 +177,7 @@ VectorDiskAnnIndex<T>::BuildV2(const Config& config) {
|
||||||
knowhere::Json build_config;
|
knowhere::Json build_config;
|
||||||
build_config.update(config);
|
build_config.update(config);
|
||||||
|
|
||||||
auto local_data_path = file_manager_->CacheRawDataToDisk(space_);
|
auto local_data_path = file_manager_->CacheRawDataToDisk<T>(space_);
|
||||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
||||||
|
|
||||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||||
|
@ -224,7 +224,7 @@ VectorDiskAnnIndex<T>::Build(const Config& config) {
|
||||||
AssertInfo(insert_files.has_value(),
|
AssertInfo(insert_files.has_value(),
|
||||||
"insert file paths is empty when build disk ann index");
|
"insert file paths is empty when build disk ann index");
|
||||||
auto local_data_path =
|
auto local_data_path =
|
||||||
file_manager_->CacheRawDataToDisk(insert_files.value());
|
file_manager_->CacheRawDataToDisk<T>(insert_files.value());
|
||||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
||||||
|
|
||||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||||
|
|
|
@ -557,6 +557,15 @@ MergeDataArray(
|
||||||
auto obj = vector_array->mutable_float_vector();
|
auto obj = vector_array->mutable_float_vector();
|
||||||
obj->mutable_data()->Add(data + src_offset * dim,
|
obj->mutable_data()->Add(data + src_offset * dim,
|
||||||
data + (src_offset + 1) * dim);
|
data + (src_offset + 1) * dim);
|
||||||
|
} else if (field_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||||
|
auto data = VEC_FIELD_DATA(src_field_data, float16);
|
||||||
|
auto obj = vector_array->mutable_float16_vector();
|
||||||
|
obj->assign(data, dim * sizeof(float16));
|
||||||
|
} else if (field_meta.get_data_type() ==
|
||||||
|
DataType::VECTOR_BFLOAT16) {
|
||||||
|
auto data = VEC_FIELD_DATA(src_field_data, bfloat16);
|
||||||
|
auto obj = vector_array->mutable_bfloat16_vector();
|
||||||
|
obj->assign(data, dim * sizeof(bfloat16));
|
||||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||||
AssertInfo(
|
AssertInfo(
|
||||||
dim % 8 == 0,
|
dim % 8 == 0,
|
||||||
|
|
|
@ -377,6 +377,7 @@ DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2(
|
||||||
}
|
}
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
template <typename DataType>
|
||||||
std::string
|
std::string
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk(
|
DiskFileManagerImpl::CacheRawDataToDisk(
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
std::shared_ptr<milvus_storage::Space> space) {
|
||||||
|
@ -413,7 +414,7 @@ DiskFileManagerImpl::CacheRawDataToDisk(
|
||||||
field_data->FillFieldData(col_data);
|
field_data->FillFieldData(col_data);
|
||||||
dim = field_data->get_dim();
|
dim = field_data->get_dim();
|
||||||
auto data_size =
|
auto data_size =
|
||||||
field_data->get_num_rows() * index_meta_.dim * sizeof(float);
|
field_data->get_num_rows() * index_meta_.dim * sizeof(DataType);
|
||||||
local_chunk_manager->Write(local_data_path,
|
local_chunk_manager->Write(local_data_path,
|
||||||
write_offset,
|
write_offset,
|
||||||
const_cast<void*>(field_data->Data()),
|
const_cast<void*>(field_data->Data()),
|
||||||
|
@ -441,7 +442,7 @@ SortByPath(std::vector<std::string>& paths) {
|
||||||
std::stol(b.substr(b.find_last_of("/") + 1));
|
std::stol(b.substr(b.find_last_of("/") + 1));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
template <typename DataType>
|
||||||
std::string
|
std::string
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
|
DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
|
||||||
SortByPath(remote_files);
|
SortByPath(remote_files);
|
||||||
|
@ -476,7 +477,8 @@ DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
|
||||||
"inconsistent dim value in multi binlogs!");
|
"inconsistent dim value in multi binlogs!");
|
||||||
dim = field_data->get_dim();
|
dim = field_data->get_dim();
|
||||||
|
|
||||||
auto data_size = field_data->get_num_rows() * dim * sizeof(float);
|
auto data_size =
|
||||||
|
field_data->get_num_rows() * dim * sizeof(DataType);
|
||||||
local_chunk_manager->Write(local_data_path,
|
local_chunk_manager->Write(local_data_path,
|
||||||
write_offset,
|
write_offset,
|
||||||
const_cast<void*>(field_data->Data()),
|
const_cast<void*>(field_data->Data()),
|
||||||
|
@ -825,4 +827,23 @@ DiskFileManagerImpl::IsExisted(const std::string& file) noexcept {
|
||||||
return isExist;
|
return isExist;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template std::string
|
||||||
|
DiskFileManagerImpl::CacheRawDataToDisk<float>(
|
||||||
|
std::vector<std::string> remote_files);
|
||||||
|
template std::string
|
||||||
|
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
|
||||||
|
std::vector<std::string> remote_files);
|
||||||
|
template std::string
|
||||||
|
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
||||||
|
std::vector<std::string> remote_files);
|
||||||
|
template std::string
|
||||||
|
DiskFileManagerImpl::CacheRawDataToDisk<float>(
|
||||||
|
std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
template std::string
|
||||||
|
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
|
||||||
|
std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
template std::string
|
||||||
|
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
||||||
|
std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
|
||||||
} // namespace milvus::storage
|
} // namespace milvus::storage
|
||||||
|
|
|
@ -96,9 +96,11 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
||||||
const std::vector<std::string>& remote_files,
|
const std::vector<std::string>& remote_files,
|
||||||
const std::vector<int64_t>& remote_file_sizes);
|
const std::vector<int64_t>& remote_file_sizes);
|
||||||
|
|
||||||
|
template <typename DataType>
|
||||||
std::string
|
std::string
|
||||||
CacheRawDataToDisk(std::vector<std::string> remote_files);
|
CacheRawDataToDisk(std::vector<std::string> remote_files);
|
||||||
|
|
||||||
|
template <typename DataType>
|
||||||
std::string
|
std::string
|
||||||
CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space);
|
CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space);
|
||||||
|
|
||||||
|
|
|
@ -415,6 +415,24 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data,
|
||||||
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
|
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
|
||||||
return array->byte_width() * 8;
|
return array->byte_width() * 8;
|
||||||
}
|
}
|
||||||
|
case DataType::VECTOR_FLOAT16: {
|
||||||
|
AssertInfo(
|
||||||
|
data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
|
||||||
|
"inconsistent data type: {}",
|
||||||
|
data->type_id());
|
||||||
|
auto array =
|
||||||
|
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
|
||||||
|
return array->byte_width() / sizeof(float16);
|
||||||
|
}
|
||||||
|
case DataType::VECTOR_BFLOAT16: {
|
||||||
|
AssertInfo(
|
||||||
|
data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
|
||||||
|
"inconsistent data type: {}",
|
||||||
|
data->type_id());
|
||||||
|
auto array =
|
||||||
|
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
|
||||||
|
return array->byte_width() / sizeof(bfloat16);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid, "unsupported data type {}", data_type);
|
PanicInfo(DataTypeInvalid, "unsupported data type {}", data_type);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1787,6 +1787,7 @@ TEST(CApiTest, ReduceRemoveDuplicates) {
|
||||||
DeleteSegment(segment);
|
DeleteSegment(segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename VecType = float>
|
||||||
void
|
void
|
||||||
testReduceSearchWithExpr(int N,
|
testReduceSearchWithExpr(int N,
|
||||||
int topK,
|
int topK,
|
||||||
|
@ -1794,8 +1795,19 @@ testReduceSearchWithExpr(int N,
|
||||||
bool filter_all = false) {
|
bool filter_all = false) {
|
||||||
std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", "
|
std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", "
|
||||||
<< num_queries << ")" << std::endl;
|
<< num_queries << ")" << std::endl;
|
||||||
|
std::function<const char*()> schema_fun;
|
||||||
auto collection = NewCollection(get_default_schema_config());
|
std::function<std::string(int)> query_gen_fun;
|
||||||
|
if constexpr (std::is_same_v<VecType, float>) {
|
||||||
|
schema_fun = get_default_schema_config;
|
||||||
|
query_gen_fun = generate_query_data;
|
||||||
|
} else if constexpr (std::is_same_v<VecType, float16>) {
|
||||||
|
schema_fun = get_float16_schema_config;
|
||||||
|
query_gen_fun = generate_query_data_float16;
|
||||||
|
} else if constexpr (std::is_same_v<VecType, bfloat16>) {
|
||||||
|
schema_fun = get_bfloat16_schema_config;
|
||||||
|
query_gen_fun = generate_query_data_bfloat16;
|
||||||
|
}
|
||||||
|
auto collection = NewCollection(schema_fun());
|
||||||
CSegmentInterface segment;
|
CSegmentInterface segment;
|
||||||
auto status = NewSegment(collection, Growing, -1, &segment);
|
auto status = NewSegment(collection, Growing, -1, &segment);
|
||||||
ASSERT_EQ(status.error_code, Success);
|
ASSERT_EQ(status.error_code, Success);
|
||||||
|
@ -1853,7 +1865,7 @@ testReduceSearchWithExpr(int N,
|
||||||
topK % N;
|
topK % N;
|
||||||
}
|
}
|
||||||
auto serialized_expr_plan = fmt.str();
|
auto serialized_expr_plan = fmt.str();
|
||||||
auto blob = generate_query_data(num_queries);
|
auto blob = query_gen_fun(num_queries);
|
||||||
|
|
||||||
void* plan = nullptr;
|
void* plan = nullptr;
|
||||||
auto binary_plan =
|
auto binary_plan =
|
||||||
|
@ -1942,17 +1954,29 @@ testReduceSearchWithExpr(int N,
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CApiTest, ReduceSearchWithExpr) {
|
TEST(CApiTest, ReduceSearchWithExpr) {
|
||||||
|
//float32
|
||||||
testReduceSearchWithExpr(2, 1, 1);
|
testReduceSearchWithExpr(2, 1, 1);
|
||||||
testReduceSearchWithExpr(2, 10, 10);
|
testReduceSearchWithExpr(2, 10, 10);
|
||||||
testReduceSearchWithExpr(100, 1, 1);
|
testReduceSearchWithExpr(100, 1, 1);
|
||||||
testReduceSearchWithExpr(100, 10, 10);
|
testReduceSearchWithExpr(100, 10, 10);
|
||||||
testReduceSearchWithExpr(10000, 1, 1);
|
testReduceSearchWithExpr(10000, 1, 1);
|
||||||
testReduceSearchWithExpr(10000, 10, 10);
|
testReduceSearchWithExpr(10000, 10, 10);
|
||||||
|
//float16
|
||||||
|
testReduceSearchWithExpr(2, 10, 10, false);
|
||||||
|
testReduceSearchWithExpr(100, 10, 10, false);
|
||||||
|
//bfloat16
|
||||||
|
testReduceSearchWithExpr(2, 10, 10, false);
|
||||||
|
testReduceSearchWithExpr(100, 10, 10, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CApiTest, ReduceSearchWithExprFilterAll) {
|
TEST(CApiTest, ReduceSearchWithExprFilterAll) {
|
||||||
|
//float32
|
||||||
testReduceSearchWithExpr(2, 1, 1, true);
|
testReduceSearchWithExpr(2, 1, 1, true);
|
||||||
testReduceSearchWithExpr(2, 10, 10, true);
|
testReduceSearchWithExpr(2, 10, 10, true);
|
||||||
|
//float16
|
||||||
|
testReduceSearchWithExpr(2, 1, 1, true);
|
||||||
|
//bfloat16
|
||||||
|
testReduceSearchWithExpr(2, 1, 1, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CApiTest, LoadIndexInfo) {
|
TEST(CApiTest, LoadIndexInfo) {
|
||||||
|
|
|
@ -520,6 +520,10 @@ TEST(GroupBY, Reduce) {
|
||||||
auto vec_fid = schema->AddDebugField(
|
auto vec_fid = schema->AddDebugField(
|
||||||
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
||||||
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
auto fp16_fid = schema->AddDebugField(
|
||||||
|
"fakevec_fp16", DataType::VECTOR_FLOAT16, dim, knowhere::metric::L2);
|
||||||
|
auto bf16_fid = schema->AddDebugField(
|
||||||
|
"fakevec_bf16", DataType::VECTOR_BFLOAT16, dim, knowhere::metric::L2);
|
||||||
schema->set_primary_field_id(int64_fid);
|
schema->set_primary_field_id(int64_fid);
|
||||||
auto segment1 = CreateSealedSegment(schema);
|
auto segment1 = CreateSealedSegment(schema);
|
||||||
auto segment2 = CreateSealedSegment(schema);
|
auto segment2 = CreateSealedSegment(schema);
|
||||||
|
|
|
@ -689,7 +689,7 @@ TEST_P(IndexTest, GetVector) {
|
||||||
|
|
||||||
#ifdef BUILD_DISK_ANN
|
#ifdef BUILD_DISK_ANN
|
||||||
TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
||||||
int64_t NB = 10000;
|
int64_t NB = 1000;
|
||||||
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
||||||
MetricType metric_type = knowhere::metric::L2;
|
MetricType metric_type = knowhere::metric::L2;
|
||||||
milvus::index::CreateIndexInfo create_index_info;
|
milvus::index::CreateIndexInfo create_index_info;
|
||||||
|
@ -720,8 +720,8 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
||||||
auto build_conf = Config{
|
auto build_conf = Config{
|
||||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||||
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)},
|
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
||||||
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)},
|
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
||||||
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
||||||
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
||||||
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
||||||
|
@ -768,85 +768,169 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
||||||
std::runtime_error);
|
std::runtime_error);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TEST(Indexing, SearchDiskAnnWithInvalidParam_Float16) {
|
TEST(Indexing, SearchDiskAnnWithFloat16) {
|
||||||
// int64_t NB = 10000;
|
int64_t NB = 1000;
|
||||||
// IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
int64_t NQ = 2;
|
||||||
// MetricType metric_type = knowhere::metric::L2;
|
int64_t K = 4;
|
||||||
// milvus::index::CreateIndexInfo create_index_info;
|
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
||||||
// create_index_info.index_type = index_type;
|
MetricType metric_type = knowhere::metric::L2;
|
||||||
// create_index_info.metric_type = metric_type;
|
milvus::index::CreateIndexInfo create_index_info;
|
||||||
// create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16;
|
create_index_info.index_type = index_type;
|
||||||
// create_index_info.index_engine_version =
|
create_index_info.metric_type = metric_type;
|
||||||
// knowhere::Version::GetCurrentVersion().VersionNumber();
|
create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16;
|
||||||
|
create_index_info.index_engine_version =
|
||||||
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||||
|
|
||||||
// int64_t collection_id = 1;
|
int64_t collection_id = 1;
|
||||||
// int64_t partition_id = 2;
|
int64_t partition_id = 2;
|
||||||
// int64_t segment_id = 3;
|
int64_t segment_id = 3;
|
||||||
// int64_t field_id = 100;
|
int64_t field_id = 100;
|
||||||
// int64_t build_id = 1000;
|
int64_t build_id = 1000;
|
||||||
// int64_t index_version = 1;
|
int64_t index_version = 1;
|
||||||
|
|
||||||
// StorageConfig storage_config = get_default_local_storage_config();
|
StorageConfig storage_config = get_default_local_storage_config();
|
||||||
// milvus::storage::FieldDataMeta field_data_meta{
|
milvus::storage::FieldDataMeta field_data_meta{
|
||||||
// collection_id, partition_id, segment_id, field_id};
|
collection_id, partition_id, segment_id, field_id};
|
||||||
// milvus::storage::IndexMeta index_meta{
|
milvus::storage::IndexMeta index_meta{
|
||||||
// segment_id, field_id, build_id, index_version};
|
segment_id, field_id, build_id, index_version};
|
||||||
// auto chunk_manager = storage::CreateChunkManager(storage_config);
|
auto chunk_manager = storage::CreateChunkManager(storage_config);
|
||||||
// milvus::storage::FileManagerContext file_manager_context(
|
milvus::storage::FileManagerContext file_manager_context(
|
||||||
// field_data_meta, index_meta, chunk_manager);
|
field_data_meta, index_meta, chunk_manager);
|
||||||
// auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||||
// create_index_info, file_manager_context);
|
create_index_info, file_manager_context);
|
||||||
|
|
||||||
// auto build_conf = Config{
|
auto build_conf = Config{
|
||||||
// {knowhere::meta::METRIC_TYPE, metric_type},
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||||
// {knowhere::meta::DIM, std::to_string(DIM)},
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||||
// {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)},
|
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
||||||
// {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)},
|
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
||||||
// {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
||||||
// {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
||||||
// {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
||||||
// };
|
};
|
||||||
|
|
||||||
// // build disk ann index
|
// build disk ann index
|
||||||
// auto dataset = GenDatasetWithDataType(
|
auto dataset = GenDatasetWithDataType(
|
||||||
// NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||||
// FixedVector<float16> xb_data =
|
FixedVector<float16> xb_data =
|
||||||
// dataset.get_col<float16>(milvus::FieldId(field_id));
|
dataset.get_col<float16>(milvus::FieldId(field_id));
|
||||||
// knowhere::DataSetPtr xb_dataset =
|
knowhere::DataSetPtr xb_dataset =
|
||||||
// knowhere::GenDataSet(NB, DIM, xb_data.data());
|
knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||||
// ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
||||||
|
|
||||||
// // serialize and load disk index, disk index can only be search after loading for now
|
// serialize and load disk index, disk index can only be search after loading for now
|
||||||
// auto binary_set = index->Upload();
|
auto binary_set = index->Upload();
|
||||||
// index.reset();
|
index.reset();
|
||||||
|
|
||||||
// auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||||
// create_index_info, file_manager_context);
|
create_index_info, file_manager_context);
|
||||||
// auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
||||||
// std::vector<std::string> index_files;
|
std::vector<std::string> index_files;
|
||||||
// for (auto& binary : binary_set.binary_map_) {
|
for (auto& binary : binary_set.binary_map_) {
|
||||||
// index_files.emplace_back(binary.first);
|
index_files.emplace_back(binary.first);
|
||||||
// }
|
}
|
||||||
// auto load_conf = generate_load_conf(index_type, metric_type, NB);
|
auto load_conf = generate_load_conf<float16>(index_type, metric_type, NB);
|
||||||
// load_conf["index_files"] = index_files;
|
load_conf["index_files"] = index_files;
|
||||||
// vec_index->Load(load_conf);
|
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
||||||
// EXPECT_EQ(vec_index->Count(), NB);
|
EXPECT_EQ(vec_index->Count(), NB);
|
||||||
|
|
||||||
// // search disk index with search_list == limit
|
// search disk index with search_list == limit
|
||||||
// int query_offset = 100;
|
int query_offset = 100;
|
||||||
// knowhere::DataSetPtr xq_dataset =
|
knowhere::DataSetPtr xq_dataset =
|
||||||
// knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||||
|
|
||||||
// milvus::SearchInfo search_info;
|
milvus::SearchInfo search_info;
|
||||||
// search_info.topk_ = K;
|
search_info.topk_ = K;
|
||||||
// search_info.metric_type_ = metric_type;
|
search_info.metric_type_ = metric_type;
|
||||||
// search_info.search_params_ = milvus::Config{
|
search_info.search_params_ = milvus::Config{
|
||||||
// {knowhere::meta::METRIC_TYPE, metric_type},
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||||
// {milvus::index::DISK_ANN_QUERY_LIST, K - 1},
|
{milvus::index::DISK_ANN_QUERY_LIST, K * 2},
|
||||||
// };
|
};
|
||||||
// EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr),
|
SearchResult result;
|
||||||
// std::runtime_error);
|
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
||||||
// }
|
}
|
||||||
|
|
||||||
|
TEST(Indexing, SearchDiskAnnWithBFloat16) {
|
||||||
|
int64_t NB = 1000;
|
||||||
|
int64_t NQ = 2;
|
||||||
|
int64_t K = 4;
|
||||||
|
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
||||||
|
MetricType metric_type = knowhere::metric::L2;
|
||||||
|
milvus::index::CreateIndexInfo create_index_info;
|
||||||
|
create_index_info.index_type = index_type;
|
||||||
|
create_index_info.metric_type = metric_type;
|
||||||
|
create_index_info.field_type = milvus::DataType::VECTOR_BFLOAT16;
|
||||||
|
create_index_info.index_engine_version =
|
||||||
|
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||||
|
|
||||||
|
int64_t collection_id = 1;
|
||||||
|
int64_t partition_id = 2;
|
||||||
|
int64_t segment_id = 3;
|
||||||
|
int64_t field_id = 100;
|
||||||
|
int64_t build_id = 1000;
|
||||||
|
int64_t index_version = 1;
|
||||||
|
|
||||||
|
StorageConfig storage_config = get_default_local_storage_config();
|
||||||
|
milvus::storage::FieldDataMeta field_data_meta{
|
||||||
|
collection_id, partition_id, segment_id, field_id};
|
||||||
|
milvus::storage::IndexMeta index_meta{
|
||||||
|
segment_id, field_id, build_id, index_version};
|
||||||
|
auto chunk_manager = storage::CreateChunkManager(storage_config);
|
||||||
|
milvus::storage::FileManagerContext file_manager_context(
|
||||||
|
field_data_meta, index_meta, chunk_manager);
|
||||||
|
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||||
|
create_index_info, file_manager_context);
|
||||||
|
|
||||||
|
auto build_conf = Config{
|
||||||
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||||
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||||
|
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
||||||
|
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
||||||
|
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
||||||
|
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
||||||
|
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
||||||
|
};
|
||||||
|
|
||||||
|
// build disk ann index
|
||||||
|
auto dataset = GenDatasetWithDataType(
|
||||||
|
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||||
|
FixedVector<bfloat16> xb_data =
|
||||||
|
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
|
||||||
|
knowhere::DataSetPtr xb_dataset =
|
||||||
|
knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||||
|
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
||||||
|
|
||||||
|
// serialize and load disk index, disk index can only be search after loading for now
|
||||||
|
auto binary_set = index->Upload();
|
||||||
|
index.reset();
|
||||||
|
|
||||||
|
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||||
|
create_index_info, file_manager_context);
|
||||||
|
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
||||||
|
std::vector<std::string> index_files;
|
||||||
|
for (auto& binary : binary_set.binary_map_) {
|
||||||
|
index_files.emplace_back(binary.first);
|
||||||
|
}
|
||||||
|
auto load_conf = generate_load_conf<bfloat16>(index_type, metric_type, NB);
|
||||||
|
load_conf["index_files"] = index_files;
|
||||||
|
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
||||||
|
EXPECT_EQ(vec_index->Count(), NB);
|
||||||
|
|
||||||
|
// search disk index with search_list == limit
|
||||||
|
int query_offset = 100;
|
||||||
|
knowhere::DataSetPtr xq_dataset =
|
||||||
|
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||||
|
|
||||||
|
milvus::SearchInfo search_info;
|
||||||
|
search_info.topk_ = K;
|
||||||
|
search_info.metric_type_ = metric_type;
|
||||||
|
search_info.search_params_ = milvus::Config{
|
||||||
|
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||||
|
{milvus::index::DISK_ANN_QUERY_LIST, K * 2},
|
||||||
|
};
|
||||||
|
SearchResult result;
|
||||||
|
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//class IndexTestV2
|
//class IndexTestV2
|
||||||
|
|
|
@ -108,7 +108,8 @@ generate_build_conf(const milvus::IndexType& index_type,
|
||||||
return knowhere::Json();
|
return knowhere::Json();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto
|
template <typename DataType = float>
|
||||||
|
inline auto
|
||||||
generate_load_conf(const milvus::IndexType& index_type,
|
generate_load_conf(const milvus::IndexType& index_type,
|
||||||
const milvus::MetricType& metric_type,
|
const milvus::MetricType& metric_type,
|
||||||
int64_t nb) {
|
int64_t nb) {
|
||||||
|
@ -118,7 +119,8 @@ generate_load_conf(const milvus::IndexType& index_type,
|
||||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||||
{milvus::index::DISK_ANN_LOAD_THREAD_NUM, std::to_string(2)},
|
{milvus::index::DISK_ANN_LOAD_THREAD_NUM, std::to_string(2)},
|
||||||
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET,
|
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET,
|
||||||
std::to_string(0.0002)},
|
std::to_string(0.05 * sizeof(DataType) * nb /
|
||||||
|
(1024.0 * 1024.0 * 1024.0))},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return knowhere::Json{
|
return knowhere::Json{
|
||||||
|
|
|
@ -146,11 +146,10 @@ func CheckCtxValid(ctx context.Context) bool {
|
||||||
func GetVecFieldIDs(schema *schemapb.CollectionSchema) []int64 {
|
func GetVecFieldIDs(schema *schemapb.CollectionSchema) []int64 {
|
||||||
var vecFieldIDs []int64
|
var vecFieldIDs []int64
|
||||||
for _, field := range schema.Fields {
|
for _, field := range schema.Fields {
|
||||||
if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector || field.DataType == schemapb.DataType_Float16Vector || field.DataType == schemapb.DataType_BFloat16Vector || field.DataType == schemapb.DataType_SparseFloatVector {
|
if typeutil.IsVectorType(field.DataType) {
|
||||||
vecFieldIDs = append(vecFieldIDs, field.FieldID)
|
vecFieldIDs = append(vecFieldIDs, field.FieldID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return vecFieldIDs
|
return vecFieldIDs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue