diff --git a/internal/core/unittest/bench/bench_indexbuilder.cpp b/internal/core/unittest/bench/bench_indexbuilder.cpp index 1b5c91a487..58d20d578c 100644 --- a/internal/core/unittest/bench/bench_indexbuilder.cpp +++ b/internal/core/unittest/bench/bench_indexbuilder.cpp @@ -64,7 +64,10 @@ IndexBuilder_build(benchmark::State& state) { std::to_string(knowhere::Version::GetCurrentVersion().VersionNumber()); auto is_binary = state.range(2); - auto dataset = GenDataset(NB, metric_type, is_binary); + auto dataset = GenFieldData(NB, + metric_type, + is_binary ? milvus::DataType::VECTOR_BINARY + : milvus::DataType::VECTOR_FLOAT); auto xb_data = dataset.get_col(milvus::FieldId(START_USER_FIELDID)); auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data()); @@ -98,7 +101,10 @@ IndexBuilder_build_and_codec(benchmark::State& state) { } auto is_binary = state.range(2); - auto dataset = GenDataset(NB, metric_type, is_binary); + auto dataset = GenFieldData(NB, + metric_type, + is_binary ? milvus::DataType::VECTOR_BINARY + : milvus::DataType::VECTOR_FLOAT); auto xb_data = dataset.get_col(milvus::FieldId(100)); auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data()); diff --git a/internal/core/unittest/test_index_c_api.cpp b/internal/core/unittest/test_index_c_api.cpp index 042255028a..39544c2582 100644 --- a/internal/core/unittest/test_index_c_api.cpp +++ b/internal/core/unittest/test_index_c_api.cpp @@ -35,7 +35,7 @@ TEST(FloatVecIndex, All) { ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); assert(ok); - auto dataset = GenDataset(NB, metric_type, false); + auto dataset = GenFieldData(NB, metric_type); auto xb_data = dataset.get_col(milvus::FieldId(100)); CDataType dtype = FloatVector; @@ -93,8 +93,8 @@ TEST(SparseFloatVecIndex, All) { ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); assert(ok); - auto dataset = GenDatasetWithDataType( - NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT); + auto dataset = + GenFieldData(NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT); auto xb_data = dataset.get_col>( milvus::FieldId(100)); CDataType dtype = SparseFloatVector; @@ -157,8 +157,8 @@ TEST(Float16VecIndex, All) { ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); assert(ok); - auto dataset = GenDatasetWithDataType( - NB, metric_type, milvus::DataType::VECTOR_FLOAT16); + auto dataset = + GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16); auto xb_data = dataset.get_col(milvus::FieldId(100)); CDataType dtype = Float16Vector; @@ -216,8 +216,8 @@ TEST(BFloat16VecIndex, All) { ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); assert(ok); - auto dataset = GenDatasetWithDataType( - NB, metric_type, milvus::DataType::VECTOR_BFLOAT16); + auto dataset = + GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16); auto xb_data = dataset.get_col(milvus::FieldId(100)); CDataType dtype = BFloat16Vector; @@ -276,7 +276,8 @@ TEST(BinaryVecIndex, All) { ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); assert(ok); - auto dataset = GenDataset(NB, metric_type, true); + auto dataset = + GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BINARY); auto xb_data = dataset.get_col(milvus::FieldId(100)); CDataType dtype = BinaryVector; diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index ffef4a8fc2..9adf8fad7e 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -128,13 +128,12 @@ TEST_P(IndexWrapperTest, BuildAndQuery) { vec_field_data_type, config, file_manager_context); knowhere::DataSetPtr xb_dataset; if (vec_field_data_type == DataType::VECTOR_BINARY) { - auto dataset = GenDataset(NB, metric_type, true); + auto dataset = GenFieldData(NB, metric_type, vec_field_data_type); auto bin_vecs = dataset.get_col(milvus::FieldId(100)); xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data()); ASSERT_NO_THROW(index->Build(xb_dataset)); } else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) { - auto dataset = GenDatasetWithDataType( - NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT); + auto dataset = GenFieldData(NB, metric_type, vec_field_data_type); auto sparse_vecs = dataset.get_col>( milvus::FieldId(100)); xb_dataset = @@ -143,7 +142,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) { ASSERT_NO_THROW(index->Build(xb_dataset)); } else { // VECTOR_FLOAT - auto dataset = GenDataset(NB, metric_type, false); + auto dataset = GenFieldData(NB, metric_type); auto f_vecs = dataset.get_col(milvus::FieldId(100)); xb_dataset = knowhere::GenDataSet(NB, DIM, f_vecs.data()); ASSERT_NO_THROW(index->Build(xb_dataset)); @@ -173,14 +172,13 @@ TEST_P(IndexWrapperTest, BuildAndQuery) { std::unique_ptr result; if (vec_field_data_type == DataType::VECTOR_FLOAT) { auto nb_for_nq = NQ + query_offset; - auto dataset = GenDataset(nb_for_nq, metric_type, false); + auto dataset = GenFieldData(nb_for_nq, metric_type); auto xb_data = dataset.get_col(milvus::FieldId(100)); auto xq_dataset = knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset); result = vec_index->Query(xq_dataset, search_info, nullptr); } else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) { - auto dataset = GenDatasetWithDataType( - NQ, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT); + auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type); auto xb_data = dataset.get_col>( milvus::FieldId(100)); auto xq_dataset = @@ -189,7 +187,8 @@ TEST_P(IndexWrapperTest, BuildAndQuery) { result = vec_index->Query(xq_dataset, search_info, nullptr); } else { auto nb_for_nq = NQ + query_offset; - auto dataset = GenDataset(nb_for_nq, metric_type, true); + auto dataset = + GenFieldData(nb_for_nq, metric_type, DataType::VECTOR_BINARY); auto xb_bin_data = dataset.get_col(milvus::FieldId(100)); // offset of binary vector is 8-aligned bit-wise representation. auto xq_dataset = knowhere::GenDataSet( diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 7b4bb72aef..90ce6d2cec 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -338,8 +338,7 @@ class IndexTest : public ::testing::TestWithParam { vec_field_data_type = milvus::DataType::VECTOR_FLOAT; } - auto dataset = - GenDatasetWithDataType(NB, metric_type, vec_field_data_type); + auto dataset = GenFieldData(NB, metric_type, vec_field_data_type); if (is_binary) { // binary vector xb_bin_data = dataset.get_col(milvus::FieldId(100)); @@ -788,7 +787,7 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) { }; // build disk ann index - auto dataset = GenDataset(NB, metric_type, false); + auto dataset = GenFieldData(NB, metric_type); FixedVector xb_data = dataset.get_col(milvus::FieldId(field_id)); knowhere::DataSetPtr xb_dataset = @@ -871,8 +870,8 @@ TEST(Indexing, SearchDiskAnnWithFloat16) { }; // build disk ann index - auto dataset = GenDatasetWithDataType( - NB, metric_type, milvus::DataType::VECTOR_FLOAT16); + auto dataset = + GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16); FixedVector xb_data = dataset.get_col(milvus::FieldId(field_id)); knowhere::DataSetPtr xb_dataset = @@ -954,8 +953,8 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) { }; // build disk ann index - auto dataset = GenDatasetWithDataType( - NB, metric_type, milvus::DataType::VECTOR_BFLOAT16); + auto dataset = + GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16); FixedVector xb_data = dataset.get_col(milvus::FieldId(field_id)); knowhere::DataSetPtr xb_dataset = diff --git a/internal/core/unittest/test_utils/indexbuilder_test_utils.h b/internal/core/unittest/test_utils/indexbuilder_test_utils.h index 527d11bd25..ce67ba8bfa 100644 --- a/internal/core/unittest/test_utils/indexbuilder_test_utils.h +++ b/internal/core/unittest/test_utils/indexbuilder_test_utils.h @@ -218,49 +218,17 @@ generate_params(const knowhere::IndexType& index_type, } auto -GenDataset(int64_t N, - const knowhere::MetricType& metric_type, - bool is_binary, - int64_t dim = DIM) { +GenFieldData(int64_t N, + const knowhere::MetricType& metric_type, + milvus::DataType data_type = milvus::DataType::VECTOR_FLOAT, + int64_t dim = DIM) { auto schema = std::make_shared(); - if (!is_binary) { - schema->AddDebugField( - "fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type); - return milvus::segcore::DataGen(schema, N); - } else { - schema->AddDebugField( - "fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type); - return milvus::segcore::DataGen(schema, N); - } -} - -auto -GenDatasetWithDataType(int64_t N, - const knowhere::MetricType& metric_type, - milvus::DataType data_type, - int64_t dim = DIM) { - auto schema = std::make_shared(); - if (data_type == milvus::DataType::VECTOR_FLOAT16) { - schema->AddDebugField( - "fakevec", milvus::DataType::VECTOR_FLOAT16, dim, metric_type); - return milvus::segcore::DataGen(schema, N); - } else if (data_type == milvus::DataType::VECTOR_BFLOAT16) { - schema->AddDebugField( - "fakevec", milvus::DataType::VECTOR_BFLOAT16, dim, metric_type); - return milvus::segcore::DataGen(schema, N); - } else if (data_type == milvus::DataType::VECTOR_FLOAT) { - schema->AddDebugField( - "fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type); - return milvus::segcore::DataGen(schema, N); - } else if (data_type == milvus::DataType::VECTOR_SPARSE_FLOAT) { - schema->AddDebugField( - "fakevec", milvus::DataType::VECTOR_SPARSE_FLOAT, 0, metric_type); - return milvus::segcore::DataGen(schema, N); - } else { - schema->AddDebugField( - "fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type); - return milvus::segcore::DataGen(schema, N); - } + schema->AddDebugField( + "fakevec", + data_type, + (data_type != milvus::DataType::VECTOR_SPARSE_FLOAT ? dim : 0), + metric_type); + return milvus::segcore::DataGen(schema, N); } using QueryResultPtr = std::unique_ptr;