Add unit tests in segcore (#15960)

Signed-off-by: Letian Jiang <letian.jiang@zilliz.com>
pull/15994/head
Letian Jiang 2022-03-10 16:33:59 +08:00 committed by GitHub
parent c6839bc729
commit 8f52e5b6c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 75 additions and 0 deletions

View File

@ -56,6 +56,7 @@ TEST(SegmentCoreTest, NormalDistributionTest) {
segment->PreDelete(N);
}
// Test insert row-based data
TEST(SegmentCoreTest, MockTest) {
using namespace milvus::segcore;
using namespace milvus::engine;
@ -95,6 +96,80 @@ TEST(SegmentCoreTest, MockTest) {
i++;
}
// Test insert column-based data
TEST(SegmentCoreTest, MockTest2) {
using namespace milvus::segcore;
using namespace milvus::engine;
// schema
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
schema->AddDebugField("age", DataType::INT32);
// generate random row-based data
std::vector<char> row_data;
std::vector<Timestamp> timestamps;
std::vector<int64_t> uids;
int N = 10000; // number of records
std::default_random_engine e(67);
for (int i = 0; i < N; ++i) {
uids.push_back(100000 + i);
timestamps.push_back(0);
// append vec
float vec[16];
for (auto& x : vec) {
x = e() % 2000 * 0.001 - 1.0;
}
row_data.insert(row_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec));
int age = e() % 100;
row_data.insert(row_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age));
}
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
assert(row_data.size() == line_sizeof * N);
int64_t size = N;
const int64_t* uids_raw = uids.data();
const Timestamp* timestamps_raw = timestamps.data();
std::vector<std::tuple<Timestamp, idx_t, int64_t>> ordering(size); // timestamp, pk, order_index
for (int i = 0; i < size; ++i) {
ordering[i] = std::make_tuple(timestamps_raw[i], uids_raw[i], i);
}
std::sort(ordering.begin(), ordering.end()); // sort according to timestamp
// convert row-based data to column-based data accordingly
auto sizeof_infos = schema->get_sizeof_infos();
std::vector<int> offset_infos(schema->size() + 1, 0);
std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1);
std::vector<aligned_vector<uint8_t>> entities(schema->size());
for (int fid = 0; fid < schema->size(); ++fid) {
auto len = sizeof_infos[fid];
entities[fid].resize(len * size);
}
auto raw_data = row_data.data();
std::vector<idx_t> sorted_uids(size);
std::vector<Timestamp> sorted_timestamps(size);
for (int index = 0; index < size; ++index) {
auto [t, uid, order_index] = ordering[index];
sorted_timestamps[index] = t;
sorted_uids[index] = uid;
for (int fid = 0; fid < schema->size(); ++fid) {
auto len = sizeof_infos[fid];
auto offset = offset_infos[fid];
auto src = raw_data + order_index * line_sizeof + offset;
auto dst = entities[fid].data() + index * len;
memcpy(dst, src, len);
}
}
// insert column-based data
ColumnBasedRawData data_chunk{entities, N};
auto segment = CreateGrowingSegment(schema);
auto reserved_begin = segment->PreInsert(N);
segment->Insert(reserved_begin, size, sorted_uids.data(), sorted_timestamps.data(), data_chunk);
}
TEST(SegmentCoreTest, SmallIndex) {
using namespace milvus::segcore;
using namespace milvus::engine;