Add comments and enchanme unittest for retrieve (#8114)

Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>
pull/8116/head
zhenshan.cao 2021-09-16 23:43:49 +08:00 committed by GitHub
parent d6f0c5069f
commit 4c184921f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 7 deletions

View File

@ -495,10 +495,16 @@ SegmentSealedImpl::mask_with_timestamps(boost::dynamic_bitset<>& bitset_chunk, T
// TODO change the
AssertInfo(this->timestamps_.size() == get_row_count(), "Timestamp size not equal to row count");
auto range = timestamp_index_.get_active_range(timestamp);
// range == (size_, size_) and size_ is this->timestamps_.size().
// it means these data are all useful, we don't need to update bitset_chunk.
// It can be thought of as an AND operation with another bitmask that is all 1s, but it is not necessary to do so.
if (range.first == range.second && range.first == this->timestamps_.size()) {
// just skip
return;
}
// range == (0, 0). it means these data can not be used, directly set bitset_chunk to all 0s.
// It can be thought of as an AND operation with another bitmask that is all 0s.
if (range.first == range.second && range.first == 0) {
bitset_chunk.reset();
return;

View File

@ -161,7 +161,8 @@ TEST(Retrieve2, LargeTimestamp) {
int64_t N = 100;
int64_t req_size = 10;
auto choose = [=](int i) { return i * 3 % N; };
int choose_sep = 3;
auto choose = [=](int i) { return i * choose_sep % N; };
uint64_t ts_offset = 100;
auto dataset = DataGen(schema, N, 42, ts_offset + 1);
auto segment = CreateSealedSegment(schema);
@ -181,12 +182,20 @@ TEST(Retrieve2, LargeTimestamp) {
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
plan->field_offsets_ = target_offsets;
auto retrieve_results = segment->Retrieve(plan.get(), ts_offset);
Assert(retrieve_results->fields_data_size() == 2);
auto field0 = retrieve_results->fields_data(0);
auto field1 = retrieve_results->fields_data(1);
Assert(field0.scalars().long_data().data_size() == 0);
Assert(field1.scalars().long_data().data_size() == 0);
std::vector<int> filter_timestamps {-1, 0, 1, 10 ,20};
filter_timestamps.push_back(N / 2);
for (const auto & f_ts: filter_timestamps) {
auto retrieve_results = segment->Retrieve(plan.get(), ts_offset + 1 + f_ts);
Assert(retrieve_results->fields_data_size() == 2);
auto field0 = retrieve_results->fields_data(0);
auto field1 = retrieve_results->fields_data(1);
int target_num = (f_ts + choose_sep ) / choose_sep;
if (target_num >req_size) {
target_num = req_size;
}
Assert(field0.scalars().long_data().data_size() == target_num);
Assert(field1.vectors().float_vector().data_size() == target_num * DIM);
}
}
TEST(GetEntityByIds, PrimaryKey) {