enhance: Rename API GenDataset to GenFieldData in unittest (#39386)

Issue: #38666

Signed-off-by: Cai Yudong <yudong.cai@zilliz.com>
pull/39396/head
Cai Yudong 2025-01-17 15:55:03 +08:00 committed by GitHub
parent d14bb67c62
commit 64feeb0e2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 40 additions and 67 deletions

View File

@ -64,7 +64,10 @@ IndexBuilder_build(benchmark::State& state) {
std::to_string(knowhere::Version::GetCurrentVersion().VersionNumber());
auto is_binary = state.range(2);
auto dataset = GenDataset(NB, metric_type, is_binary);
auto dataset = GenFieldData(NB,
metric_type,
is_binary ? milvus::DataType::VECTOR_BINARY
: milvus::DataType::VECTOR_FLOAT);
auto xb_data = dataset.get_col<float>(milvus::FieldId(START_USER_FIELDID));
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
@ -98,7 +101,10 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
}
auto is_binary = state.range(2);
auto dataset = GenDataset(NB, metric_type, is_binary);
auto dataset = GenFieldData(NB,
metric_type,
is_binary ? milvus::DataType::VECTOR_BINARY
: milvus::DataType::VECTOR_FLOAT);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());

View File

@ -35,7 +35,7 @@ TEST(FloatVecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, false);
auto dataset = GenFieldData(NB, metric_type);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
CDataType dtype = FloatVector;
@ -93,8 +93,8 @@ TEST(SparseFloatVecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
milvus::FieldId(100));
CDataType dtype = SparseFloatVector;
@ -157,8 +157,8 @@ TEST(Float16VecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
CDataType dtype = Float16Vector;
@ -216,8 +216,8 @@ TEST(BFloat16VecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
CDataType dtype = BFloat16Vector;
@ -276,7 +276,8 @@ TEST(BinaryVecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, true);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BINARY);
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
CDataType dtype = BinaryVector;

View File

@ -128,13 +128,12 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
vec_field_data_type, config, file_manager_context);
knowhere::DataSetPtr xb_dataset;
if (vec_field_data_type == DataType::VECTOR_BINARY) {
auto dataset = GenDataset(NB, metric_type, true);
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
ASSERT_NO_THROW(index->Build(xb_dataset));
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<float>>(
milvus::FieldId(100));
xb_dataset =
@ -143,7 +142,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
ASSERT_NO_THROW(index->Build(xb_dataset));
} else {
// VECTOR_FLOAT
auto dataset = GenDataset(NB, metric_type, false);
auto dataset = GenFieldData(NB, metric_type);
auto f_vecs = dataset.get_col<float>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, f_vecs.data());
ASSERT_NO_THROW(index->Build(xb_dataset));
@ -173,14 +172,13 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
std::unique_ptr<SearchResult> result;
if (vec_field_data_type == DataType::VECTOR_FLOAT) {
auto nb_for_nq = NQ + query_offset;
auto dataset = GenDataset(nb_for_nq, metric_type, false);
auto dataset = GenFieldData(nb_for_nq, metric_type);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
auto xq_dataset =
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
result = vec_index->Query(xq_dataset, search_info, nullptr);
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
auto dataset = GenDatasetWithDataType(
NQ, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
milvus::FieldId(100));
auto xq_dataset =
@ -189,7 +187,8 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
result = vec_index->Query(xq_dataset, search_info, nullptr);
} else {
auto nb_for_nq = NQ + query_offset;
auto dataset = GenDataset(nb_for_nq, metric_type, true);
auto dataset =
GenFieldData(nb_for_nq, metric_type, DataType::VECTOR_BINARY);
auto xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
// offset of binary vector is 8-aligned bit-wise representation.
auto xq_dataset = knowhere::GenDataSet(

View File

@ -338,8 +338,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
}
auto dataset =
GenDatasetWithDataType(NB, metric_type, vec_field_data_type);
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
if (is_binary) {
// binary vector
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
@ -788,7 +787,7 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
};
// build disk ann index
auto dataset = GenDataset(NB, metric_type, false);
auto dataset = GenFieldData(NB, metric_type);
FixedVector<float> xb_data =
dataset.get_col<float>(milvus::FieldId(field_id));
knowhere::DataSetPtr xb_dataset =
@ -871,8 +870,8 @@ TEST(Indexing, SearchDiskAnnWithFloat16) {
};
// build disk ann index
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
FixedVector<float16> xb_data =
dataset.get_col<float16>(milvus::FieldId(field_id));
knowhere::DataSetPtr xb_dataset =
@ -954,8 +953,8 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) {
};
// build disk ann index
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
FixedVector<bfloat16> xb_data =
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
knowhere::DataSetPtr xb_dataset =

View File

@ -218,49 +218,17 @@ generate_params(const knowhere::IndexType& index_type,
}
auto
GenDataset(int64_t N,
const knowhere::MetricType& metric_type,
bool is_binary,
int64_t dim = DIM) {
GenFieldData(int64_t N,
const knowhere::MetricType& metric_type,
milvus::DataType data_type = milvus::DataType::VECTOR_FLOAT,
int64_t dim = DIM) {
auto schema = std::make_shared<milvus::Schema>();
if (!is_binary) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else {
schema->AddDebugField(
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
}
}
auto
GenDatasetWithDataType(int64_t N,
const knowhere::MetricType& metric_type,
milvus::DataType data_type,
int64_t dim = DIM) {
auto schema = std::make_shared<milvus::Schema>();
if (data_type == milvus::DataType::VECTOR_FLOAT16) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_FLOAT16, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else if (data_type == milvus::DataType::VECTOR_BFLOAT16) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_BFLOAT16, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else if (data_type == milvus::DataType::VECTOR_FLOAT) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else if (data_type == milvus::DataType::VECTOR_SPARSE_FLOAT) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_SPARSE_FLOAT, 0, metric_type);
return milvus::segcore::DataGen(schema, N);
} else {
schema->AddDebugField(
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
}
schema->AddDebugField(
"fakevec",
data_type,
(data_type != milvus::DataType::VECTOR_SPARSE_FLOAT ? dim : 0),
metric_type);
return milvus::segcore::DataGen(schema, N);
}
using QueryResultPtr = std::unique_ptr<milvus::SearchResult>;