Clean search result duplicates removal debug log (#10769)

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
pull/10783/head
Cai Yudong 2021-10-27 20:02:26 +08:00 committed by GitHub
parent 2f53a57814
commit 5ad4cdda25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 6 deletions

View File

@ -83,6 +83,7 @@ GetResultData(std::vector<std::vector<int64_t>>& search_records,
search_records[index].push_back(result_pair.offset_++);
}
#else
int64_t skip_dup_cnt = 0;
float prev_dis = MAXFLOAT;
std::unordered_set<int64_t> prev_pk_set;
while (loc_offset - query_offset < topk) {
@ -111,11 +112,14 @@ GetResultData(std::vector<std::vector<int64_t>>& search_records,
prev_pk_set.insert(curr_pk);
} else {
// the entity with same distance and same primary key must be duplicated
LOG_SEGCORE_DEBUG_ << "skip duplicated search result, primary key " << curr_pk;
skip_dup_cnt++;
}
}
result_pair.offset_++;
}
if (skip_dup_cnt > 0) {
LOG_SEGCORE_DEBUG_ << "skip duplicated search result, count = " << skip_dup_cnt;
}
#endif
}

View File

@ -1925,6 +1925,7 @@ func reduceSearchResultData(searchResultData []*schemapb.SearchResultData, nq in
//printSearchResultData(sData, strconv.FormatInt(int64(i), 10))
}
var skipDupCnt int64 = 0
var realTopK int64 = -1
for i := int64(0); i < nq; i++ {
offsets := make([]int64, len(searchResultData))
@ -1967,10 +1968,7 @@ func reduceSearchResultData(searchResultData []*schemapb.SearchResultData, nq in
j++
} else {
// entity with same id and same score must be duplicated
log.Debug("skip duplicated search result",
zap.Int64("id", id),
zap.Float32("score", score),
zap.Float32("prevScore", prevScore))
skipDupCnt++
}
}
offsets[sel]++
@ -1982,7 +1980,9 @@ func reduceSearchResultData(searchResultData []*schemapb.SearchResultData, nq in
realTopK = j
ret.Results.Topks = append(ret.Results.Topks, realTopK)
}
if skipDupCnt > 0 {
log.Debug("skip duplicated search result", zap.Int64("count", skipDupCnt))
}
ret.Results.TopK = realTopK
if metricType != "IP" {