mirror of https://github.com/milvus-io/milvus.git
enhance: Rename API GenDataset to GenFieldData in unittest (#39386)
Issue: #38666 Signed-off-by: Cai Yudong <yudong.cai@zilliz.com>pull/39396/head
parent
d14bb67c62
commit
64feeb0e2b
|
@ -64,7 +64,10 @@ IndexBuilder_build(benchmark::State& state) {
|
||||||
std::to_string(knowhere::Version::GetCurrentVersion().VersionNumber());
|
std::to_string(knowhere::Version::GetCurrentVersion().VersionNumber());
|
||||||
|
|
||||||
auto is_binary = state.range(2);
|
auto is_binary = state.range(2);
|
||||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
auto dataset = GenFieldData(NB,
|
||||||
|
metric_type,
|
||||||
|
is_binary ? milvus::DataType::VECTOR_BINARY
|
||||||
|
: milvus::DataType::VECTOR_FLOAT);
|
||||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(START_USER_FIELDID));
|
auto xb_data = dataset.get_col<float>(milvus::FieldId(START_USER_FIELDID));
|
||||||
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||||
|
|
||||||
|
@ -98,7 +101,10 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
|
||||||
}
|
}
|
||||||
|
|
||||||
auto is_binary = state.range(2);
|
auto is_binary = state.range(2);
|
||||||
auto dataset = GenDataset(NB, metric_type, is_binary);
|
auto dataset = GenFieldData(NB,
|
||||||
|
metric_type,
|
||||||
|
is_binary ? milvus::DataType::VECTOR_BINARY
|
||||||
|
: milvus::DataType::VECTOR_FLOAT);
|
||||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||||
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ TEST(FloatVecIndex, All) {
|
||||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||||
&index_params_str);
|
&index_params_str);
|
||||||
assert(ok);
|
assert(ok);
|
||||||
auto dataset = GenDataset(NB, metric_type, false);
|
auto dataset = GenFieldData(NB, metric_type);
|
||||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||||
|
|
||||||
CDataType dtype = FloatVector;
|
CDataType dtype = FloatVector;
|
||||||
|
@ -93,8 +93,8 @@ TEST(SparseFloatVecIndex, All) {
|
||||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||||
&index_params_str);
|
&index_params_str);
|
||||||
assert(ok);
|
assert(ok);
|
||||||
auto dataset = GenDatasetWithDataType(
|
auto dataset =
|
||||||
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
||||||
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
||||||
milvus::FieldId(100));
|
milvus::FieldId(100));
|
||||||
CDataType dtype = SparseFloatVector;
|
CDataType dtype = SparseFloatVector;
|
||||||
|
@ -157,8 +157,8 @@ TEST(Float16VecIndex, All) {
|
||||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||||
&index_params_str);
|
&index_params_str);
|
||||||
assert(ok);
|
assert(ok);
|
||||||
auto dataset = GenDatasetWithDataType(
|
auto dataset =
|
||||||
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||||
|
|
||||||
CDataType dtype = Float16Vector;
|
CDataType dtype = Float16Vector;
|
||||||
|
@ -216,8 +216,8 @@ TEST(BFloat16VecIndex, All) {
|
||||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||||
&index_params_str);
|
&index_params_str);
|
||||||
assert(ok);
|
assert(ok);
|
||||||
auto dataset = GenDatasetWithDataType(
|
auto dataset =
|
||||||
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||||
|
|
||||||
CDataType dtype = BFloat16Vector;
|
CDataType dtype = BFloat16Vector;
|
||||||
|
@ -276,7 +276,8 @@ TEST(BinaryVecIndex, All) {
|
||||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||||
&index_params_str);
|
&index_params_str);
|
||||||
assert(ok);
|
assert(ok);
|
||||||
auto dataset = GenDataset(NB, metric_type, true);
|
auto dataset =
|
||||||
|
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BINARY);
|
||||||
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||||
|
|
||||||
CDataType dtype = BinaryVector;
|
CDataType dtype = BinaryVector;
|
||||||
|
|
|
@ -128,13 +128,12 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||||
vec_field_data_type, config, file_manager_context);
|
vec_field_data_type, config, file_manager_context);
|
||||||
knowhere::DataSetPtr xb_dataset;
|
knowhere::DataSetPtr xb_dataset;
|
||||||
if (vec_field_data_type == DataType::VECTOR_BINARY) {
|
if (vec_field_data_type == DataType::VECTOR_BINARY) {
|
||||||
auto dataset = GenDataset(NB, metric_type, true);
|
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||||
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||||
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
|
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
|
||||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
||||||
auto dataset = GenDatasetWithDataType(
|
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||||
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
|
||||||
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
||||||
milvus::FieldId(100));
|
milvus::FieldId(100));
|
||||||
xb_dataset =
|
xb_dataset =
|
||||||
|
@ -143,7 +142,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||||
} else {
|
} else {
|
||||||
// VECTOR_FLOAT
|
// VECTOR_FLOAT
|
||||||
auto dataset = GenDataset(NB, metric_type, false);
|
auto dataset = GenFieldData(NB, metric_type);
|
||||||
auto f_vecs = dataset.get_col<float>(milvus::FieldId(100));
|
auto f_vecs = dataset.get_col<float>(milvus::FieldId(100));
|
||||||
xb_dataset = knowhere::GenDataSet(NB, DIM, f_vecs.data());
|
xb_dataset = knowhere::GenDataSet(NB, DIM, f_vecs.data());
|
||||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||||
|
@ -173,14 +172,13 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||||
std::unique_ptr<SearchResult> result;
|
std::unique_ptr<SearchResult> result;
|
||||||
if (vec_field_data_type == DataType::VECTOR_FLOAT) {
|
if (vec_field_data_type == DataType::VECTOR_FLOAT) {
|
||||||
auto nb_for_nq = NQ + query_offset;
|
auto nb_for_nq = NQ + query_offset;
|
||||||
auto dataset = GenDataset(nb_for_nq, metric_type, false);
|
auto dataset = GenFieldData(nb_for_nq, metric_type);
|
||||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
||||||
auto xq_dataset =
|
auto xq_dataset =
|
||||||
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
||||||
auto dataset = GenDatasetWithDataType(
|
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
|
||||||
NQ, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
|
|
||||||
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
||||||
milvus::FieldId(100));
|
milvus::FieldId(100));
|
||||||
auto xq_dataset =
|
auto xq_dataset =
|
||||||
|
@ -189,7 +187,8 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||||
} else {
|
} else {
|
||||||
auto nb_for_nq = NQ + query_offset;
|
auto nb_for_nq = NQ + query_offset;
|
||||||
auto dataset = GenDataset(nb_for_nq, metric_type, true);
|
auto dataset =
|
||||||
|
GenFieldData(nb_for_nq, metric_type, DataType::VECTOR_BINARY);
|
||||||
auto xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
auto xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||||
// offset of binary vector is 8-aligned bit-wise representation.
|
// offset of binary vector is 8-aligned bit-wise representation.
|
||||||
auto xq_dataset = knowhere::GenDataSet(
|
auto xq_dataset = knowhere::GenDataSet(
|
||||||
|
|
|
@ -338,8 +338,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
||||||
vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto dataset =
|
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||||
GenDatasetWithDataType(NB, metric_type, vec_field_data_type);
|
|
||||||
if (is_binary) {
|
if (is_binary) {
|
||||||
// binary vector
|
// binary vector
|
||||||
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||||
|
@ -788,7 +787,7 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
||||||
};
|
};
|
||||||
|
|
||||||
// build disk ann index
|
// build disk ann index
|
||||||
auto dataset = GenDataset(NB, metric_type, false);
|
auto dataset = GenFieldData(NB, metric_type);
|
||||||
FixedVector<float> xb_data =
|
FixedVector<float> xb_data =
|
||||||
dataset.get_col<float>(milvus::FieldId(field_id));
|
dataset.get_col<float>(milvus::FieldId(field_id));
|
||||||
knowhere::DataSetPtr xb_dataset =
|
knowhere::DataSetPtr xb_dataset =
|
||||||
|
@ -871,8 +870,8 @@ TEST(Indexing, SearchDiskAnnWithFloat16) {
|
||||||
};
|
};
|
||||||
|
|
||||||
// build disk ann index
|
// build disk ann index
|
||||||
auto dataset = GenDatasetWithDataType(
|
auto dataset =
|
||||||
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||||
FixedVector<float16> xb_data =
|
FixedVector<float16> xb_data =
|
||||||
dataset.get_col<float16>(milvus::FieldId(field_id));
|
dataset.get_col<float16>(milvus::FieldId(field_id));
|
||||||
knowhere::DataSetPtr xb_dataset =
|
knowhere::DataSetPtr xb_dataset =
|
||||||
|
@ -954,8 +953,8 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) {
|
||||||
};
|
};
|
||||||
|
|
||||||
// build disk ann index
|
// build disk ann index
|
||||||
auto dataset = GenDatasetWithDataType(
|
auto dataset =
|
||||||
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||||
FixedVector<bfloat16> xb_data =
|
FixedVector<bfloat16> xb_data =
|
||||||
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
|
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
|
||||||
knowhere::DataSetPtr xb_dataset =
|
knowhere::DataSetPtr xb_dataset =
|
||||||
|
|
|
@ -218,49 +218,17 @@ generate_params(const knowhere::IndexType& index_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
auto
|
auto
|
||||||
GenDataset(int64_t N,
|
GenFieldData(int64_t N,
|
||||||
const knowhere::MetricType& metric_type,
|
const knowhere::MetricType& metric_type,
|
||||||
bool is_binary,
|
milvus::DataType data_type = milvus::DataType::VECTOR_FLOAT,
|
||||||
int64_t dim = DIM) {
|
int64_t dim = DIM) {
|
||||||
auto schema = std::make_shared<milvus::Schema>();
|
auto schema = std::make_shared<milvus::Schema>();
|
||||||
if (!is_binary) {
|
schema->AddDebugField(
|
||||||
schema->AddDebugField(
|
"fakevec",
|
||||||
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
|
data_type,
|
||||||
return milvus::segcore::DataGen(schema, N);
|
(data_type != milvus::DataType::VECTOR_SPARSE_FLOAT ? dim : 0),
|
||||||
} else {
|
metric_type);
|
||||||
schema->AddDebugField(
|
return milvus::segcore::DataGen(schema, N);
|
||||||
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
|
|
||||||
return milvus::segcore::DataGen(schema, N);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto
|
|
||||||
GenDatasetWithDataType(int64_t N,
|
|
||||||
const knowhere::MetricType& metric_type,
|
|
||||||
milvus::DataType data_type,
|
|
||||||
int64_t dim = DIM) {
|
|
||||||
auto schema = std::make_shared<milvus::Schema>();
|
|
||||||
if (data_type == milvus::DataType::VECTOR_FLOAT16) {
|
|
||||||
schema->AddDebugField(
|
|
||||||
"fakevec", milvus::DataType::VECTOR_FLOAT16, dim, metric_type);
|
|
||||||
return milvus::segcore::DataGen(schema, N);
|
|
||||||
} else if (data_type == milvus::DataType::VECTOR_BFLOAT16) {
|
|
||||||
schema->AddDebugField(
|
|
||||||
"fakevec", milvus::DataType::VECTOR_BFLOAT16, dim, metric_type);
|
|
||||||
return milvus::segcore::DataGen(schema, N);
|
|
||||||
} else if (data_type == milvus::DataType::VECTOR_FLOAT) {
|
|
||||||
schema->AddDebugField(
|
|
||||||
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
|
|
||||||
return milvus::segcore::DataGen(schema, N);
|
|
||||||
} else if (data_type == milvus::DataType::VECTOR_SPARSE_FLOAT) {
|
|
||||||
schema->AddDebugField(
|
|
||||||
"fakevec", milvus::DataType::VECTOR_SPARSE_FLOAT, 0, metric_type);
|
|
||||||
return milvus::segcore::DataGen(schema, N);
|
|
||||||
} else {
|
|
||||||
schema->AddDebugField(
|
|
||||||
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
|
|
||||||
return milvus::segcore::DataGen(schema, N);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
using QueryResultPtr = std::unique_ptr<milvus::SearchResult>;
|
using QueryResultPtr = std::unique_ptr<milvus::SearchResult>;
|
||||||
|
|
Loading…
Reference in New Issue