Optimize performance of querying on InsertRecord (#22838)

Signed-off-by: yah01 <yang.cen@zilliz.com>
pull/22846/head
yah01 2023-03-20 10:13:56 +08:00 committed by GitHub
parent 77c9e33e70
commit 65c58b3e41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 24 additions and 28 deletions

View File

@ -32,10 +32,10 @@ class OffsetMap {
virtual ~OffsetMap() = default; virtual ~OffsetMap() = default;
virtual std::vector<int64_t> virtual std::vector<int64_t>
find(const PkType pk) const = 0; find(const PkType& pk) const = 0;
virtual void virtual void
insert(const PkType pk, int64_t offset) = 0; insert(const PkType& pk, int64_t offset) = 0;
virtual void virtual void
seal() = 0; seal() = 0;
@ -48,25 +48,25 @@ template <typename T>
class OffsetHashMap : public OffsetMap { class OffsetHashMap : public OffsetMap {
public: public:
std::vector<int64_t> std::vector<int64_t>
find(const PkType pk) const { find(const PkType& pk) const override {
auto offset_vector = map_.find(std::get<T>(pk)); auto offset_vector = map_.find(std::get<T>(pk));
return offset_vector != map_.end() ? offset_vector->second return offset_vector != map_.end() ? offset_vector->second
: std::vector<int64_t>(); : std::vector<int64_t>();
} }
void void
insert(const PkType pk, int64_t offset) { insert(const PkType& pk, int64_t offset) override {
map_[std::get<T>(pk)].emplace_back(offset); map_[std::get<T>(pk)].emplace_back(offset);
} }
void void
seal() { seal() override {
PanicInfo( PanicInfo(
"OffsetHashMap used for growing segment could not be sealed."); "OffsetHashMap used for growing segment could not be sealed.");
} }
bool bool
empty() const { empty() const override {
return map_.empty(); return map_.empty();
} }
@ -78,45 +78,41 @@ template <typename T>
class OffsetOrderedArray : public OffsetMap { class OffsetOrderedArray : public OffsetMap {
public: public:
std::vector<int64_t> std::vector<int64_t>
find(const PkType pk) const { find(const PkType& pk) const override {
int left = 0, right = array_.size() - 1;
T target = std::get<T>(pk);
if (!is_sealed) if (!is_sealed)
PanicInfo("OffsetOrderedArray could not search before seal"); PanicInfo("OffsetOrderedArray could not search before seal");
while (left < right) { const T& target = std::get<T>(pk);
int mid = (left + right) >> 1; auto it =
if (array_[mid].first < target) std::lower_bound(array_.begin(),
left = mid + 1; array_.end(),
else target,
right = mid; [](const std::pair<T, int64_t>& elem,
} const T& value) { return elem.first < value; });
std::vector<int64_t> offset_vector; std::vector<int64_t> offset_vector;
for (int offset_id = right; offset_id < array_.size(); offset_id++) { for (; it != array_.end() && it->first == target; ++it) {
if (offset_id < 0 || array_[offset_id].first != target) offset_vector.push_back(it->second);
break;
offset_vector.push_back(array_[offset_id].second);
} }
return offset_vector; return offset_vector;
} }
void void
insert(const PkType pk, int64_t offset) { insert(const PkType& pk, int64_t offset) override {
if (is_sealed) if (is_sealed)
PanicInfo("OffsetOrderedArray could not insert after seal"); PanicInfo("OffsetOrderedArray could not insert after seal");
array_.push_back(std::make_pair(std::get<T>(pk), offset)); array_.push_back(std::make_pair(std::get<T>(pk), offset));
} }
void void
seal() { seal() override {
sort(array_.begin(), array_.end()); sort(array_.begin(), array_.end());
is_sealed = true; is_sealed = true;
} }
bool bool
empty() const { empty() const override {
return array_.empty(); return array_.empty();
} }
@ -229,33 +225,33 @@ struct InsertRecord {
} }
std::vector<SegOffset> std::vector<SegOffset>
search_pk(const PkType pk, Timestamp timestamp) const { search_pk(const PkType& pk, Timestamp timestamp) const {
std::shared_lock lck(shared_mutex_); std::shared_lock lck(shared_mutex_);
std::vector<SegOffset> res_offsets; std::vector<SegOffset> res_offsets;
auto offset_iter = pk2offset_->find(pk); auto offset_iter = pk2offset_->find(pk);
for (auto offset : offset_iter) { for (auto offset : offset_iter) {
if (timestamps_[offset] <= timestamp) { if (timestamps_[offset] <= timestamp) {
res_offsets.push_back(SegOffset(offset)); res_offsets.emplace_back(offset);
} }
} }
return res_offsets; return res_offsets;
} }
std::vector<SegOffset> std::vector<SegOffset>
search_pk(const PkType pk, int64_t insert_barrier) const { search_pk(const PkType& pk, int64_t insert_barrier) const {
std::shared_lock lck(shared_mutex_); std::shared_lock lck(shared_mutex_);
std::vector<SegOffset> res_offsets; std::vector<SegOffset> res_offsets;
auto offset_iter = pk2offset_->find(pk); auto offset_iter = pk2offset_->find(pk);
for (auto offset : offset_iter) { for (auto offset : offset_iter) {
if (offset < insert_barrier) { if (offset < insert_barrier) {
res_offsets.push_back(SegOffset(offset)); res_offsets.emplace_back(offset);
} }
} }
return res_offsets; return res_offsets;
} }
void void
insert_pk(const PkType pk, int64_t offset) { insert_pk(const PkType& pk, int64_t offset) {
std::lock_guard lck(shared_mutex_); std::lock_guard lck(shared_mutex_);
pk2offset_->insert(pk, offset); pk2offset_->insert(pk, offset);
} }