mirror of https://github.com/milvus-io/milvus.git
enhance: Rename API GenDataset to GenFieldData in unittest (#39386)
Issue: #38666 Signed-off-by: Cai Yudong <yudong.cai@zilliz.com>pull/39396/head
parent
d14bb67c62
commit
64feeb0e2b
|
@ -64,7 +64,10 @@ IndexBuilder_build(benchmark::State& state) {
|
|||
std::to_string(knowhere::Version::GetCurrentVersion().VersionNumber());
|
||||
|
||||
auto is_binary = state.range(2);
|
||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
||||
auto dataset = GenFieldData(NB,
|
||||
metric_type,
|
||||
is_binary ? milvus::DataType::VECTOR_BINARY
|
||||
: milvus::DataType::VECTOR_FLOAT);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(START_USER_FIELDID));
|
||||
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||
|
||||
|
@ -98,7 +101,10 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
|
|||
}
|
||||
|
||||
auto is_binary = state.range(2);
|
||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
||||
auto dataset = GenFieldData(NB,
|
||||
metric_type,
|
||||
is_binary ? milvus::DataType::VECTOR_BINARY
|
||||
: milvus::DataType::VECTOR_FLOAT);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ TEST(FloatVecIndex, All) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto dataset = GenFieldData(NB, metric_type);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
|
||||
CDataType dtype = FloatVector;
|
||||
|
@ -93,8 +93,8 @@ TEST(SparseFloatVecIndex, All) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
||||
auto dataset =
|
||||
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
||||
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
||||
milvus::FieldId(100));
|
||||
CDataType dtype = SparseFloatVector;
|
||||
|
@ -157,8 +157,8 @@ TEST(Float16VecIndex, All) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||
auto dataset =
|
||||
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
|
||||
CDataType dtype = Float16Vector;
|
||||
|
@ -216,8 +216,8 @@ TEST(BFloat16VecIndex, All) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||
auto dataset =
|
||||
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
|
||||
CDataType dtype = BFloat16Vector;
|
||||
|
@ -276,7 +276,8 @@ TEST(BinaryVecIndex, All) {
|
|||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto dataset =
|
||||
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BINARY);
|
||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
|
||||
CDataType dtype = BinaryVector;
|
||||
|
|
|
@ -128,13 +128,12 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||
vec_field_data_type, config, file_manager_context);
|
||||
knowhere::DataSetPtr xb_dataset;
|
||||
if (vec_field_data_type == DataType::VECTOR_BINARY) {
|
||||
auto dataset = GenDataset(NB, metric_type, true);
|
||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
|
||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
||||
milvus::FieldId(100));
|
||||
xb_dataset =
|
||||
|
@ -143,7 +142,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||
} else {
|
||||
// VECTOR_FLOAT
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto dataset = GenFieldData(NB, metric_type);
|
||||
auto f_vecs = dataset.get_col<float>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, f_vecs.data());
|
||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||
|
@ -173,14 +172,13 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||
std::unique_ptr<SearchResult> result;
|
||||
if (vec_field_data_type == DataType::VECTOR_FLOAT) {
|
||||
auto nb_for_nq = NQ + query_offset;
|
||||
auto dataset = GenDataset(nb_for_nq, metric_type, false);
|
||||
auto dataset = GenFieldData(nb_for_nq, metric_type);
|
||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||
auto xq_dataset =
|
||||
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NQ, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
||||
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
|
||||
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
||||
milvus::FieldId(100));
|
||||
auto xq_dataset =
|
||||
|
@ -189,7 +187,8 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
} else {
|
||||
auto nb_for_nq = NQ + query_offset;
|
||||
auto dataset = GenDataset(nb_for_nq, metric_type, true);
|
||||
auto dataset =
|
||||
GenFieldData(nb_for_nq, metric_type, DataType::VECTOR_BINARY);
|
||||
auto xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
// offset of binary vector is 8-aligned bit-wise representation.
|
||||
auto xq_dataset = knowhere::GenDataSet(
|
||||
|
|
|
@ -338,8 +338,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
|||
vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
||||
}
|
||||
|
||||
auto dataset =
|
||||
GenDatasetWithDataType(NB, metric_type, vec_field_data_type);
|
||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
if (is_binary) {
|
||||
// binary vector
|
||||
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
|
@ -788,7 +787,7 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
|||
};
|
||||
|
||||
// build disk ann index
|
||||
auto dataset = GenDataset(NB, metric_type, false);
|
||||
auto dataset = GenFieldData(NB, metric_type);
|
||||
FixedVector<float> xb_data =
|
||||
dataset.get_col<float>(milvus::FieldId(field_id));
|
||||
knowhere::DataSetPtr xb_dataset =
|
||||
|
@ -871,8 +870,8 @@ TEST(Indexing, SearchDiskAnnWithFloat16) {
|
|||
};
|
||||
|
||||
// build disk ann index
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||
auto dataset =
|
||||
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||
FixedVector<float16> xb_data =
|
||||
dataset.get_col<float16>(milvus::FieldId(field_id));
|
||||
knowhere::DataSetPtr xb_dataset =
|
||||
|
@ -954,8 +953,8 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) {
|
|||
};
|
||||
|
||||
// build disk ann index
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||
auto dataset =
|
||||
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||
FixedVector<bfloat16> xb_data =
|
||||
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
|
||||
knowhere::DataSetPtr xb_dataset =
|
||||
|
|
|
@ -218,49 +218,17 @@ generate_params(const knowhere::IndexType& index_type,
|
|||
}
|
||||
|
||||
auto
|
||||
GenDataset(int64_t N,
|
||||
const knowhere::MetricType& metric_type,
|
||||
bool is_binary,
|
||||
int64_t dim = DIM) {
|
||||
GenFieldData(int64_t N,
|
||||
const knowhere::MetricType& metric_type,
|
||||
milvus::DataType data_type = milvus::DataType::VECTOR_FLOAT,
|
||||
int64_t dim = DIM) {
|
||||
auto schema = std::make_shared<milvus::Schema>();
|
||||
if (!is_binary) {
|
||||
schema->AddDebugField(
|
||||
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
} else {
|
||||
schema->AddDebugField(
|
||||
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
}
|
||||
}
|
||||
|
||||
auto
|
||||
GenDatasetWithDataType(int64_t N,
|
||||
const knowhere::MetricType& metric_type,
|
||||
milvus::DataType data_type,
|
||||
int64_t dim = DIM) {
|
||||
auto schema = std::make_shared<milvus::Schema>();
|
||||
if (data_type == milvus::DataType::VECTOR_FLOAT16) {
|
||||
schema->AddDebugField(
|
||||
"fakevec", milvus::DataType::VECTOR_FLOAT16, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
} else if (data_type == milvus::DataType::VECTOR_BFLOAT16) {
|
||||
schema->AddDebugField(
|
||||
"fakevec", milvus::DataType::VECTOR_BFLOAT16, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
} else if (data_type == milvus::DataType::VECTOR_FLOAT) {
|
||||
schema->AddDebugField(
|
||||
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
} else if (data_type == milvus::DataType::VECTOR_SPARSE_FLOAT) {
|
||||
schema->AddDebugField(
|
||||
"fakevec", milvus::DataType::VECTOR_SPARSE_FLOAT, 0, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
} else {
|
||||
schema->AddDebugField(
|
||||
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
}
|
||||
schema->AddDebugField(
|
||||
"fakevec",
|
||||
data_type,
|
||||
(data_type != milvus::DataType::VECTOR_SPARSE_FLOAT ? dim : 0),
|
||||
metric_type);
|
||||
return milvus::segcore::DataGen(schema, N);
|
||||
}
|
||||
|
||||
using QueryResultPtr = std::unique_ptr<milvus::SearchResult>;
|
||||
|
|
Loading…
Reference in New Issue