Enable sub_query_result

Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
pull/4973/head^2
FluorineDog 2021-01-06 12:01:13 +08:00 committed by yefu.chen
parent b06e01f523
commit 5a26f6ef21
37 changed files with 710 additions and 200 deletions

View File

@ -18,7 +18,7 @@
namespace milvus {
inline int
field_sizeof(DataType data_type, int dim = 1) {
datatype_sizeof(DataType data_type, int dim = 1) {
switch (data_type) {
case DataType::BOOL:
return sizeof(bool);
@ -78,7 +78,7 @@ datatype_name(DataType data_type) {
}
inline bool
field_is_vector(DataType datatype) {
datatype_is_vector(DataType datatype) {
return datatype == DataType::VECTOR_BINARY || datatype == DataType::VECTOR_FLOAT;
}
@ -119,9 +119,9 @@ struct FieldMeta {
int
get_sizeof() const {
if (is_vector()) {
return field_sizeof(type_, get_dim());
return datatype_sizeof(type_, get_dim());
} else {
return field_sizeof(type_, 1);
return datatype_sizeof(type_, 1);
}
}

View File

@ -50,7 +50,7 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
schema->primary_key_offset_opt_ = schema->size();
}
if (field_is_vector(data_type)) {
if (datatype_is_vector(data_type)) {
auto type_map = RepeatedKeyValToMap(child.type_params());
auto index_map = RepeatedKeyValToMap(child.index_params());
if (!index_map.count("metric_type")) {

View File

@ -14,13 +14,15 @@
#include <faiss/MetricType.h>
#include <string>
#include <boost/align/aligned_allocator.hpp>
#include <memory>
#include <vector>
namespace milvus {
using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
using engine::DataType;
using engine::FieldElementType;
using engine::QueryResult;
using engine::idx_t;
using MetricType = faiss::MetricType;
MetricType
@ -39,4 +41,33 @@ constexpr std::false_type always_false{};
template <typename T>
using aligned_vector = std::vector<T, boost::alignment::aligned_allocator<T, 512>>;
///////////////////////////////////////////////////////////////////////////////////////////////////
struct QueryResult {
QueryResult() = default;
QueryResult(uint64_t num_queries, uint64_t topK) : topK_(topK), num_queries_(num_queries) {
auto count = get_row_count();
result_distances_.resize(count);
internal_seg_offsets_.resize(count);
}
[[nodiscard]] uint64_t
get_row_count() const {
return topK_ * num_queries_;
}
public:
uint64_t num_queries_;
uint64_t topK_;
uint64_t seg_id_;
std::vector<float> result_distances_;
public:
// TODO(gexi): utilize these field
std::vector<int64_t> internal_seg_offsets_;
std::vector<int64_t> result_offsets_;
std::vector<std::vector<char>> row_data_;
};
using QueryResultPtr = std::shared_ptr<QueryResult>;
} // namespace milvus

View File

@ -4,13 +4,16 @@ set(MILVUS_QUERY_SRCS
generated/PlanNode.cpp
generated/Expr.cpp
visitors/ShowPlanNodeVisitor.cpp
visitors/ExecPlanNodeVisitor.cpp
visitors/ShowExprVisitor.cpp
visitors/ExecPlanNodeVisitor.cpp
visitors/ExecExprVisitor.cpp
visitors/VerifyPlanNodeVisitor.cpp
visitors/VerifyExprVisitor.cpp
Plan.cpp
Search.cpp
SearchOnSealed.cpp
BruteForceSearch.cpp
SearchBruteForce.cpp
SubQueryResult.cpp
)
add_library(milvus_query ${MILVUS_QUERY_SRCS})
target_link_libraries(milvus_query milvus_proto milvus_utils)
target_link_libraries(milvus_query milvus_proto milvus_utils knowhere)

View File

@ -21,6 +21,7 @@
#include <boost/align/aligned_allocator.hpp>
#include <boost/algorithm/string.hpp>
#include <algorithm>
#include "query/generated/VerifyPlanNodeVisitor.h"
namespace milvus::query {
@ -106,7 +107,7 @@ Parser::ParseRangeNode(const Json& out_body) {
auto field_name = out_iter.key();
auto body = out_iter.value();
auto data_type = schema[field_name].get_data_type();
Assert(!field_is_vector(data_type));
Assert(!datatype_is_vector(data_type));
switch (data_type) {
case DataType::BOOL:
@ -138,6 +139,8 @@ Parser::CreatePlanImpl(const std::string& dsl_str) {
if (predicate != nullptr) {
vec_node->predicate_ = std::move(predicate);
}
VerifyPlanNodeVisitor verifier;
vec_node->accept(verifier);
auto plan = std::make_unique<Plan>(schema);
plan->tag2field_ = std::move(tag2field_);
@ -152,7 +155,7 @@ Parser::ParseTermNode(const Json& out_body) {
auto field_name = out_iter.key();
auto body = out_iter.value();
auto data_type = schema[field_name].get_data_type();
Assert(!field_is_vector(data_type));
Assert(!datatype_is_vector(data_type));
switch (data_type) {
case DataType::BOOL: {
return ParseTermNodeImpl<bool>(field_name, body);

View File

@ -16,7 +16,7 @@
#include <faiss/utils/distances.h>
#include "utils/tools.h"
#include "query/BruteForceSearch.h"
#include "query/SearchBruteForce.h"
namespace milvus::query {
@ -34,13 +34,13 @@ create_bitmap_view(std::optional<const BitmapSimple*> bitmaps_opt, int64_t chunk
}
Status
QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
const query::QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmaps_opt,
QueryResult& results) {
FloatSearch(const segcore::SegmentSmallIndex& segment,
const query::QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmaps_opt,
QueryResult& results) {
auto& schema = segment.get_schema();
auto& indexing_record = segment.get_indexing_record();
auto& record = segment.get_insert_record();
@ -75,6 +75,7 @@ QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
const auto& indexing_entry = indexing_record.get_vec_entry(vecfield_offset);
auto search_conf = indexing_entry.get_search_conf(topK);
// TODO: use sub_qr
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
auto indexing = indexing_entry.get_vec_indexing(chunk_id);
auto dataset = knowhere::GenDataset(num_queries, dim, query_data);
@ -99,10 +100,12 @@ QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
Assert(vec_chunk_size == indexing_entry.get_chunk_size());
auto max_chunk = upper_div(ins_barrier, vec_chunk_size);
// TODO: use sub_qr
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
std::vector<int64_t> buf_uids(total_count, -1);
std::vector<float> buf_dis(total_count, std::numeric_limits<float>::max());
// should be not visitable
faiss::float_maxheap_array_t buf = {(size_t)num_queries, (size_t)topK, buf_uids.data(), buf_dis.data()};
auto& chunk = vec_ptr->get_chunk(chunk_id);
@ -112,6 +115,7 @@ QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
auto nsize = element_end - element_begin;
auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id);
// TODO: make it wrapped
faiss::knn_L2sqr(query_data, chunk.data(), dim, num_queries, nsize, &buf, bitmap_view);
Assert(buf_uids.size() == total_count);
@ -134,13 +138,13 @@ QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
}
Status
BinaryQueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
const query::QueryInfo& info,
const uint8_t* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmaps_opt,
QueryResult& results) {
BinarySearch(const segcore::SegmentSmallIndex& segment,
const query::QueryInfo& info,
const uint8_t* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmaps_opt,
QueryResult& results) {
auto& schema = segment.get_schema();
auto& indexing_record = segment.get_indexing_record();
auto& record = segment.get_insert_record();
@ -169,8 +173,8 @@ BinaryQueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
auto total_count = topK * num_queries;
// step 3: small indexing search
std::vector<int64_t> final_uids(total_count, -1);
std::vector<float> final_dis(total_count, std::numeric_limits<float>::max());
// TODO: this is too intrusive
// TODO: use QuerySubResult instead
query::dataset::BinaryQueryDataset query_dataset{metric_type, num_queries, topK, code_size, query_data};
using segcore::BinaryVector;
@ -181,30 +185,27 @@ BinaryQueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
auto vec_chunk_size = vec_ptr->get_chunk_size();
auto max_chunk = upper_div(ins_barrier, vec_chunk_size);
SubQueryResult final_result(num_queries, topK, metric_type);
for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) {
std::vector<int64_t> buf_uids(total_count, -1);
std::vector<float> buf_dis(total_count, std::numeric_limits<float>::max());
auto& chunk = vec_ptr->get_chunk(chunk_id);
auto element_begin = chunk_id * vec_chunk_size;
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size);
auto nsize = element_end - element_begin;
auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id);
BinarySearchBruteForce(query_dataset, chunk.data(), nsize, buf_dis.data(), buf_uids.data(), bitmap_view);
auto sub_result = BinarySearchBruteForce(query_dataset, chunk.data(), nsize, bitmap_view);
// convert chunk uid to segment uid
for (auto& x : buf_uids) {
for (auto& x : sub_result.mutable_labels()) {
if (x != -1) {
x += chunk_id * vec_chunk_size;
}
}
segcore::merge_into(num_queries, topK, final_dis.data(), final_uids.data(), buf_dis.data(), buf_uids.data());
final_result.merge(sub_result);
}
results.result_distances_ = std::move(final_dis);
results.internal_seg_offsets_ = std::move(final_uids);
results.result_distances_ = std::move(final_result.mutable_values());
results.internal_seg_offsets_ = std::move(final_result.mutable_labels());
results.topK_ = topK;
results.num_queries_ = num_queries;

View File

@ -14,27 +14,29 @@
#include "segcore/SegmentSmallIndex.h"
#include <deque>
#include <boost/dynamic_bitset.hpp>
#include "query/SubQueryResult.h"
namespace milvus::query {
using BitmapChunk = boost::dynamic_bitset<>;
using BitmapSimple = std::deque<BitmapChunk>;
// TODO: merge these two search into one
// note: c++17 don't support optional ref
Status
QueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
const QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmap_opt,
QueryResult& results);
FloatSearch(const segcore::SegmentSmallIndex& segment,
const QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmap_opt,
QueryResult& results);
Status
BinaryQueryBruteForceImpl(const segcore::SegmentSmallIndex& segment,
const query::QueryInfo& info,
const uint8_t* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmaps_opt,
QueryResult& results);
BinarySearch(const segcore::SegmentSmallIndex& segment,
const query::QueryInfo& info,
const uint8_t* query_data,
int64_t num_queries,
Timestamp timestamp,
std::optional<const BitmapSimple*> bitmaps_opt,
QueryResult& results);
} // namespace milvus::query

View File

@ -9,58 +9,16 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "BruteForceSearch.h"
#include "SearchBruteForce.h"
#include <vector>
#include <common/Types.h>
#include <boost/dynamic_bitset.hpp>
#include <queue>
#include "SubQueryResult.h"
namespace milvus::query {
void
BinarySearchBruteForceNaive(MetricType metric_type,
int64_t code_size,
const uint8_t* binary_chunk,
int64_t chunk_size,
int64_t topk,
int64_t num_queries,
const uint8_t* query_data,
float* result_distances,
idx_t* result_labels,
faiss::ConcurrentBitsetPtr bitset) {
// THIS IS A NAIVE IMPLEMENTATION, ready for optimize
Assert(metric_type == faiss::METRIC_Jaccard);
Assert(code_size % 4 == 0);
using T = std::tuple<float, int>;
for (int64_t q = 0; q < num_queries; ++q) {
auto query_ptr = query_data + code_size * q;
auto query = boost::dynamic_bitset(query_ptr, query_ptr + code_size);
std::vector<T> max_heap(topk + 1, std::make_tuple(std::numeric_limits<float>::max(), -1));
for (int64_t i = 0; i < chunk_size; ++i) {
auto element_ptr = binary_chunk + code_size * i;
auto element = boost::dynamic_bitset(element_ptr, element_ptr + code_size);
auto the_and = (query & element).count();
auto the_or = (query | element).count();
auto distance = the_or ? (float)(the_or - the_and) / the_or : 0;
if (distance < std::get<0>(max_heap[0])) {
max_heap[topk] = std::make_tuple(distance, i);
std::push_heap(max_heap.begin(), max_heap.end());
std::pop_heap(max_heap.begin(), max_heap.end());
}
}
std::sort(max_heap.begin(), max_heap.end());
for (int k = 0; k < topk; ++k) {
auto info = max_heap[k];
result_distances[k + q * topk] = std::get<0>(info);
result_labels[k + q * topk] = std::get<1>(info);
}
}
}
void
SubQueryResult
BinarySearchBruteForceFast(MetricType metric_type,
int64_t code_size,
const uint8_t* binary_chunk,
@ -68,9 +26,11 @@ BinarySearchBruteForceFast(MetricType metric_type,
int64_t topk,
int64_t num_queries,
const uint8_t* query_data,
float* result_distances,
idx_t* result_labels,
faiss::ConcurrentBitsetPtr bitset) {
const faiss::BitsetView& bitset) {
SubQueryResult sub_result(num_queries, topk, metric_type);
float* result_distances = sub_result.get_values();
idx_t* result_labels = sub_result.get_labels();
const idx_t block_size = chunk_size;
bool use_heap = true;
@ -132,18 +92,26 @@ BinarySearchBruteForceFast(MetricType metric_type,
} else {
PanicInfo("Unsupported metric type");
}
return sub_result;
}
void
FloatSearchBruteForceFast(MetricType metric_type,
const float* chunk_data,
int64_t chunk_size,
float* result_distances,
idx_t* result_labels,
const faiss::BitsetView& bitset) {
// TODO
}
SubQueryResult
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
const uint8_t* binary_chunk,
int64_t chunk_size,
float* result_distances,
idx_t* result_labels,
faiss::ConcurrentBitsetPtr bitset) {
const faiss::BitsetView& bitset) {
// TODO: refactor the internal function
BinarySearchBruteForceFast(query_dataset.metric_type, query_dataset.code_size, binary_chunk, chunk_size,
query_dataset.topk, query_dataset.num_queries, query_dataset.query_data,
result_distances, result_labels, bitset);
return BinarySearchBruteForceFast(query_dataset.metric_type, query_dataset.code_size, binary_chunk, chunk_size,
query_dataset.topk, query_dataset.num_queries, query_dataset.query_data, bitset);
}
} // namespace milvus::query

View File

@ -13,6 +13,7 @@
#include <faiss/utils/BinaryDistance.h>
#include "segcore/ConcurrentVector.h"
#include "common/Schema.h"
#include "query/SubQueryResult.h"
namespace milvus::query {
using MetricType = faiss::MetricType;
@ -28,12 +29,10 @@ struct BinaryQueryDataset {
} // namespace dataset
void
SubQueryResult
BinarySearchBruteForce(const dataset::BinaryQueryDataset& query_dataset,
const uint8_t* binary_chunk,
int64_t chunk_size,
float* result_distances,
idx_t* result_labels,
faiss::ConcurrentBitsetPtr bitset = nullptr);
const faiss::BitsetView& bitset = nullptr);
} // namespace milvus::query

View File

@ -0,0 +1,77 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "utils/EasyAssert.h"
#include "query/SubQueryResult.h"
#include "segcore/Reduce.h"
namespace milvus::query {
template <bool is_desc>
void
SubQueryResult::merge_impl(const SubQueryResult& right) {
Assert(num_queries_ == right.num_queries_);
Assert(topk_ == right.topk_);
Assert(metric_type_ == right.metric_type_);
Assert(is_desc == is_descending(metric_type_));
for (int64_t qn = 0; qn < num_queries_; ++qn) {
auto offset = qn * topk_;
int64_t* __restrict__ left_labels = this->get_labels() + offset;
float* __restrict__ left_values = this->get_values() + offset;
auto right_labels = right.get_labels() + offset;
auto right_values = right.get_values() + offset;
std::vector<float> buf_values(topk_);
std::vector<int64_t> buf_labels(topk_);
auto lit = 0; // left iter
auto rit = 0; // right iter
for (auto buf_iter = 0; buf_iter < topk_; ++buf_iter) {
auto left_v = left_values[lit];
auto right_v = right_values[rit];
// optimize out at compiling
if (is_desc ? (left_v >= right_v) : (left_v <= right_v)) {
buf_values[buf_iter] = left_values[lit];
buf_labels[buf_iter] = left_labels[lit];
++lit;
} else {
buf_values[buf_iter] = right_values[rit];
buf_labels[buf_iter] = right_labels[rit];
++rit;
}
}
std::copy_n(buf_values.data(), topk_, left_values);
std::copy_n(buf_labels.data(), topk_, left_labels);
}
}
void
SubQueryResult::merge(const SubQueryResult& sub_result) {
Assert(metric_type_ == sub_result.metric_type_);
if (is_descending(metric_type_)) {
this->merge_impl<true>(sub_result);
} else {
this->merge_impl<false>(sub_result);
}
}
SubQueryResult
SubQueryResult::merge(const SubQueryResult& left, const SubQueryResult& right) {
auto left_copy = left;
left_copy.merge(right);
return left_copy;
}
} // namespace milvus::query

View File

@ -0,0 +1,98 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include "common/Types.h"
#include <limits>
#include <vector>
namespace milvus::query {
class SubQueryResult {
public:
SubQueryResult(int64_t num_queries, int64_t topk, MetricType metric_type)
: metric_type_(metric_type),
num_queries_(num_queries),
topk_(topk),
labels_(num_queries * topk, -1),
values_(num_queries * topk, init_value(metric_type)) {
}
public:
static constexpr float
init_value(MetricType metric_type) {
return (is_descending(metric_type) ? -1 : 1) * std::numeric_limits<float>::max();
}
static constexpr bool
is_descending(MetricType metric_type) {
// TODO
if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
return true;
} else {
return false;
}
}
public:
int64_t
get_num_queries() const {
return num_queries_;
}
int64_t
get_topk() const {
return topk_;
}
const int64_t*
get_labels() const {
return labels_.data();
}
int64_t*
get_labels() {
return labels_.data();
}
const float*
get_values() const {
return values_.data();
}
float*
get_values() {
return values_.data();
}
auto&
mutable_labels() {
return labels_;
}
auto&
mutable_values() {
return values_;
}
static SubQueryResult
merge(const SubQueryResult& left, const SubQueryResult& right);
void
merge(const SubQueryResult& sub_result);
private:
template <bool is_desc>
void
merge_impl(const SubQueryResult& sub_result);
private:
int64_t num_queries_;
int64_t topk_;
MetricType metric_type_;
std::vector<int64_t> labels_;
std::vector<float> values_;
};
} // namespace milvus::query

View File

@ -21,7 +21,7 @@
#include "ExprVisitor.h"
namespace milvus::query {
class ExecExprVisitor : ExprVisitor {
class ExecExprVisitor : public ExprVisitor {
public:
void
visit(BoolUnaryExpr& expr) override;

View File

@ -19,7 +19,7 @@
#include "PlanNodeVisitor.h"
namespace milvus::query {
class ExecPlanNodeVisitor : PlanNodeVisitor {
class ExecPlanNodeVisitor : public PlanNodeVisitor {
public:
void
visit(FloatVectorANNS& node) override;

View File

@ -19,7 +19,7 @@
#include "ExprVisitor.h"
namespace milvus::query {
class ShowExprVisitor : ExprVisitor {
class ShowExprVisitor : public ExprVisitor {
public:
void
visit(BoolUnaryExpr& expr) override;

View File

@ -20,7 +20,7 @@
#include "PlanNodeVisitor.h"
namespace milvus::query {
class ShowPlanNodeVisitor : PlanNodeVisitor {
class ShowPlanNodeVisitor : public PlanNodeVisitor {
public:
void
visit(FloatVectorANNS& node) override;

View File

@ -0,0 +1,36 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#error TODO: copy this file out, and modify the content.
#include "query/generated/VerifyExprVisitor.h"
namespace milvus::query {
void
VerifyExprVisitor::visit(BoolUnaryExpr& expr) {
// TODO
}
void
VerifyExprVisitor::visit(BoolBinaryExpr& expr) {
// TODO
}
void
VerifyExprVisitor::visit(TermExpr& expr) {
// TODO
}
void
VerifyExprVisitor::visit(RangeExpr& expr) {
// TODO
}
} // namespace milvus::query

View File

@ -0,0 +1,40 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
// Generated File
// DO NOT EDIT
#include <optional>
#include <boost/dynamic_bitset.hpp>
#include <utility>
#include <deque>
#include "segcore/SegmentSmallIndex.h"
#include "query/ExprImpl.h"
#include "ExprVisitor.h"
namespace milvus::query {
class VerifyExprVisitor : public ExprVisitor {
public:
void
visit(BoolUnaryExpr& expr) override;
void
visit(BoolBinaryExpr& expr) override;
void
visit(TermExpr& expr) override;
void
visit(RangeExpr& expr) override;
public:
};
} // namespace milvus::query

View File

@ -0,0 +1,26 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#error TODO: copy this file out, and modify the content.
#include "query/generated/VerifyPlanNodeVisitor.h"
namespace milvus::query {
void
VerifyPlanNodeVisitor::visit(FloatVectorANNS& node) {
// TODO
}
void
VerifyPlanNodeVisitor::visit(BinaryVectorANNS& node) {
// TODO
}
} // namespace milvus::query

View File

@ -0,0 +1,37 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
// Generated File
// DO NOT EDIT
#include "utils/Json.h"
#include "query/PlanImpl.h"
#include "segcore/SegmentBase.h"
#include <utility>
#include "PlanNodeVisitor.h"
namespace milvus::query {
class VerifyPlanNodeVisitor : public PlanNodeVisitor {
public:
void
visit(FloatVectorANNS& node) override;
void
visit(BinaryVectorANNS& node) override;
public:
using RetType = QueryResult;
VerifyPlanNodeVisitor() = default;
private:
std::optional<RetType> ret_;
};
} // namespace milvus::query

View File

@ -79,7 +79,7 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
bitset_pack, ret);
} else {
QueryBruteForceImpl(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret);
FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret);
}
ret_ = ret;
@ -104,7 +104,7 @@ ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) {
bitset_pack = &bitmap_holder;
}
BinaryQueryBruteForceImpl(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret);
BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret);
ret_ = ret;
}

View File

@ -112,7 +112,7 @@ TermExtract(const TermExpr& expr_raw) {
void
ShowExprVisitor::visit(TermExpr& expr) {
Assert(!ret_.has_value());
Assert(field_is_vector(expr.data_type_) == false);
Assert(datatype_is_vector(expr.data_type_) == false);
auto terms = [&] {
switch (expr.data_type_) {
case DataType::BOOL:
@ -161,7 +161,7 @@ ConditionExtract(const RangeExpr& expr_raw) {
void
ShowExprVisitor::visit(RangeExpr& expr) {
Assert(!ret_.has_value());
Assert(field_is_vector(expr.data_type_) == false);
Assert(datatype_is_vector(expr.data_type_) == false);
auto conditions = [&] {
switch (expr.data_type_) {
case DataType::BOOL:

View File

@ -0,0 +1,35 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "query/generated/VerifyExprVisitor.h"
namespace milvus::query {
void
VerifyExprVisitor::visit(BoolUnaryExpr& expr) {
// TODO
}
void
VerifyExprVisitor::visit(BoolBinaryExpr& expr) {
// TODO
}
void
VerifyExprVisitor::visit(TermExpr& expr) {
// TODO
}
void
VerifyExprVisitor::visit(RangeExpr& expr) {
// TODO
}
} // namespace milvus::query

View File

@ -0,0 +1,85 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "query/generated/VerifyPlanNodeVisitor.h"
#include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "segcore/SegmentSmallIndex.h"
#include "knowhere/index/vector_index/ConfAdapter.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
namespace milvus::query {
#if 1
namespace impl {
// THIS CONTAINS EXTRA BODY FOR VISITOR
// WILL BE USED BY GENERATOR UNDER suvlim/core_gen/
class VerifyPlanNodeVisitor : PlanNodeVisitor {
public:
using RetType = QueryResult;
VerifyPlanNodeVisitor() = default;
private:
std::optional<RetType> ret_;
};
} // namespace impl
#endif
static knowhere::IndexType
InferIndexType(const Json& search_params) {
// ivf -> nprobe
// nsg -> search_length
// hnsw/rhnsw/*pq/*sq -> ef
// annoy -> search_k
// ngtpanng / ngtonng -> max_search_edges / epsilon
static const std::map<std::string, knowhere::IndexType> key_list = [] {
std::map<std::string, knowhere::IndexType> list;
namespace ip = knowhere::IndexParams;
namespace ie = knowhere::IndexEnum;
list.emplace(ip::nprobe, ie::INDEX_FAISS_IVFFLAT);
list.emplace(ip::search_length, ie::INDEX_NSG);
list.emplace(ip::ef, ie::INDEX_HNSW);
list.emplace(ip::search_k, ie::INDEX_ANNOY);
list.emplace(ip::max_search_edges, ie::INDEX_NGTONNG);
list.emplace(ip::epsilon, ie::INDEX_NGTONNG);
return list;
}();
auto dbg_str = search_params.dump();
for (auto& kv : search_params.items()) {
std::string key = kv.key();
if (key_list.count(key)) {
return key_list.at(key);
}
}
PanicInfo("failed to infer index type");
}
void
VerifyPlanNodeVisitor::visit(FloatVectorANNS& node) {
auto& search_params = node.query_info_.search_params_;
auto inferred_type = InferIndexType(search_params);
auto adapter = knowhere::AdapterMgr::GetInstance().GetAdapter(inferred_type);
auto index_mode = knowhere::IndexMode::MODE_CPU;
// mock the api, topk will be passed from placeholder
auto params_copy = search_params;
params_copy[knowhere::meta::TOPK] = 10;
// NOTE: the second parameter is not checked in knowhere, may be redundant
auto passed = adapter->CheckSearch(params_copy, inferred_type, index_mode);
AssertInfo(passed, "invalid search params");
}
void
VerifyPlanNodeVisitor::visit(BinaryVectorANNS& node) {
// TODO
}
} // namespace milvus::query

View File

@ -24,5 +24,6 @@ target_link_libraries(milvus_segcore
dl backtrace
milvus_common
milvus_query
milvus_utils
)

View File

@ -13,7 +13,7 @@
#include <vector>
#include "IndexMeta.h"
#include "utils/Types.h"
#include "common/Types.h"
#include "common/Schema.h"
#include <memory>

View File

@ -274,19 +274,14 @@ SegmentNaive::QueryImpl(query::QueryDeprecatedPtr query_info, Timestamp timestam
auto distances = final->Get<float*>(knowhere::meta::DISTANCE);
auto total_num = num_queries * topK;
result.result_ids_.resize(total_num);
result.result_distances_.resize(total_num);
result.num_queries_ = num_queries;
result.topK_ = topK;
std::copy_n(ids, total_num, result.result_ids_.data());
std::copy_n(ids, total_num, result.internal_seg_offsets_.data());
std::copy_n(distances, total_num, result.result_distances_.data());
for (auto& id : result.result_ids_) {
id = record_.uids_[id];
}
return Status::OK();
}
@ -347,7 +342,7 @@ SegmentNaive::QuerySlowImpl(query::QueryDeprecatedPtr query_info, Timestamp time
result.topK_ = topK;
auto row_num = topK * num_queries;
result.result_ids_.resize(row_num);
result.internal_seg_offsets_.resize(row_num);
result.result_distances_.resize(row_num);
for (int q_id = 0; q_id < num_queries; ++q_id) {
@ -356,7 +351,7 @@ SegmentNaive::QuerySlowImpl(query::QueryDeprecatedPtr query_info, Timestamp time
auto dst_id = topK - 1 - i + q_id * topK;
auto [dis, offset] = records[q_id].top();
records[q_id].pop();
result.result_ids_[dst_id] = record_.uids_[offset];
result.internal_seg_offsets_[dst_id] = offset;
result.result_distances_[dst_id] = dis;
}
}

View File

@ -349,19 +349,12 @@ SegmentSmallIndex::FillTargetEntry(const query::Plan* plan, QueryResult& results
Assert(results.result_offsets_.size() == size);
Assert(results.row_data_.size() == 0);
// TODO: deprecate
results.result_ids_.clear();
results.result_ids_.resize(size);
if (plan->schema_.get_is_auto_id()) {
auto& uids = record_.uids_;
for (int64_t i = 0; i < size; ++i) {
auto seg_offset = results.internal_seg_offsets_[i];
auto row_id = seg_offset == -1 ? -1 : uids[seg_offset];
// TODO: deprecate
results.result_ids_[i] = row_id;
std::vector<char> blob(sizeof(row_id));
memcpy(blob.data(), &row_id, sizeof(row_id));
results.row_data_.emplace_back(std::move(blob));
@ -377,9 +370,6 @@ SegmentSmallIndex::FillTargetEntry(const query::Plan* plan, QueryResult& results
auto seg_offset = results.internal_seg_offsets_[i];
auto row_id = seg_offset == -1 ? -1 : uids->operator[](seg_offset);
// TODO: deprecate
results.result_ids_[i] = row_id;
std::vector<char> blob(sizeof(row_id));
memcpy(blob.data(), &row_id, sizeof(row_id));
results.row_data_.emplace_back(std::move(blob));

View File

@ -14,10 +14,10 @@
#include "segcore/reduce_c.h"
#include "segcore/Reduce.h"
#include "utils/Types.h"
#include "common/Types.h"
#include "pb/service_msg.pb.h"
using SearchResult = milvus::engine::QueryResult;
using SearchResult = milvus::QueryResult;
int
MergeInto(int64_t num_queries, int64_t topk, float* distances, int64_t* uids, float* new_distances, int64_t* new_uids) {

View File

@ -165,7 +165,7 @@ CStatus
FillTargetEntry(CSegmentBase c_segment, CPlan c_plan, CQueryResult c_result) {
auto segment = (milvus::segcore::SegmentBase*)c_segment;
auto plan = (milvus::query::Plan*)c_plan;
auto result = (milvus::engine::QueryResult*)c_result;
auto result = (milvus::QueryResult*)c_result;
auto status = CStatus();
try {

View File

@ -136,45 +136,5 @@ struct AttrsData {
IDNumbers id_array_;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
struct QueryResult {
QueryResult() = default;
QueryResult(uint64_t num_queries, uint64_t topK) : topK_(topK), num_queries_(num_queries) {
auto count = get_row_count();
result_distances_.resize(count);
internal_seg_offsets_.resize(count);
// TODO: deprecated
result_ids_.resize(count);
}
[[nodiscard]] uint64_t
get_row_count() const {
return topK_ * num_queries_;
}
uint64_t num_queries_;
uint64_t topK_;
// uint64_t total_row_count_; // total_row_count_ = topK * num_queries_
// vector<tuple<Score, SegId, Offset>> data_reduced;
// vector<tuple<Score, SegId, Offset, RawData>>
// map<SegId, vector<tuple<DataOffset, ResLoc>>>
uint64_t seg_id_;
std::vector<float> result_distances_;
// TODO(gexi): utilize these field
std::vector<int64_t> internal_seg_offsets_;
std::vector<int64_t> result_offsets_;
std::vector<std::vector<char>> row_data_;
// TODO: deprecated, use row_data directly
std::vector<idx_t> result_ids_;
};
using QueryResultPtr = std::shared_ptr<QueryResult>;
} // namespace engine
} // namespace milvus

View File

@ -14,6 +14,7 @@ set(MILVUS_TEST_FILES
test_binary.cpp
test_index_wrapper.cpp
test_sealed.cpp
test_reduce.cpp
)
add_executable(all_tests
${MILVUS_TEST_FILES}
@ -24,10 +25,8 @@ target_link_libraries(all_tests
gtest_main
milvus_segcore
milvus_indexbuilder
knowhere
log
pthread
milvus_utils
)
install (TARGETS all_tests DESTINATION unittest)

View File

@ -137,7 +137,7 @@ TEST(CApiTest, SearchTest) {
auto offset = PreInsert(segment, N);
auto ins_res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
assert(ins_res.error_code == Success);
ASSERT_EQ(ins_res.error_code, Success);
const char* dsl_string = R"(
{
@ -176,11 +176,11 @@ TEST(CApiTest, SearchTest) {
void* plan = nullptr;
auto status = CreatePlan(collection, dsl_string, &plan);
assert(status.error_code == Success);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup);
assert(status.error_code == Success);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
@ -189,7 +189,7 @@ TEST(CApiTest, SearchTest) {
CQueryResult search_result;
auto res = Search(segment, plan, placeholderGroups.data(), timestamps.data(), 1, &search_result);
assert(res.error_code == Success);
ASSERT_EQ(res.error_code, Success);
DeletePlan(plan);
DeletePlaceholderGroup(placeholderGroup);

View File

@ -33,7 +33,7 @@
#include "test_utils/Timer.h"
#include "segcore/Reduce.h"
#include "test_utils/DataGen.h"
#include "query/BruteForceSearch.h"
#include "query/SearchBruteForce.h"
using std::cin;
using std::cout;
@ -245,8 +245,6 @@ TEST(Indexing, BinaryBruteForce) {
schema->AddField("vecbin", DataType::VECTOR_BINARY, dim, MetricType::METRIC_Jaccard);
schema->AddField("age", DataType::INT64);
auto dataset = DataGen(schema, N, 10);
vector<float> distances(result_count);
vector<int64_t> ids(result_count);
auto bin_vec = dataset.get_col<uint8_t>(0);
auto line_sizeof = schema->operator[](0).get_sizeof();
auto query_data = 1024 * line_sizeof + bin_vec.data();
@ -258,13 +256,13 @@ TEST(Indexing, BinaryBruteForce) {
query_data //
};
query::BinarySearchBruteForce(query_dataset, bin_vec.data(), N, distances.data(), ids.data());
auto sub_result = query::BinarySearchBruteForce(query_dataset, bin_vec.data(), N);
QueryResult qr;
qr.num_queries_ = num_queries;
qr.topK_ = topk;
qr.internal_seg_offsets_ = ids;
qr.result_distances_ = distances;
qr.internal_seg_offsets_ = std::move(sub_result.mutable_labels());
qr.result_distances_ = std::move(sub_result.mutable_values());
auto json = QueryResultToJson(qr);
auto ref = json::parse(R"(

View File

@ -402,7 +402,7 @@ TEST(Query, FillSegment) {
pb::schema::CollectionSchema proto;
proto.set_name("col");
proto.set_description("asdfhsalkgfhsadg");
proto.set_autoid(true);
proto.set_autoid(false);
{
auto field = proto.add_fields();
@ -425,7 +425,7 @@ TEST(Query, FillSegment) {
field->set_fieldid(101);
field->set_is_primary_key(true);
field->set_description("asdgfsagf");
field->set_data_type(pb::schema::DataType::INT32);
field->set_data_type(pb::schema::DataType::INT64);
}
auto schema = Schema::ParseFrom(proto);
@ -466,18 +466,17 @@ TEST(Query, FillSegment) {
result.result_offsets_.resize(topk * num_queries);
segment->FillTargetEntry(plan.get(), result);
// TODO: deprecated result_ids_
ASSERT_EQ(result.result_ids_, result.internal_seg_offsets_);
auto ans = result.row_data_;
ASSERT_EQ(ans.size(), topk * num_queries);
int64_t std_index = 0;
auto std_vec = dataset.get_col<int64_t>(1);
for (auto& vec : ans) {
ASSERT_EQ(vec.size(), sizeof(int64_t));
int64_t val;
memcpy(&val, vec.data(), sizeof(int64_t));
auto std_val = result.result_ids_[std_index];
ASSERT_EQ(val, std_val);
auto internal_offset = result.internal_seg_offsets_[std_index];
auto std_val = std_vec[internal_offset];
ASSERT_EQ(val, std_val) << "io:" << internal_offset;
++std_index;
}
}

View File

@ -0,0 +1,120 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include "query/SubQueryResult.h"
#include <vector>
#include <queue>
#include <random>
using namespace milvus;
using namespace milvus::query;
TEST(Reduce, SubQueryResult) {
int64_t num_queries = 512;
int64_t topk = 32;
int64_t iteration = 50;
constexpr int64_t limit = 100000000L;
auto metric_type = MetricType::METRIC_L2;
using queue_type = std::priority_queue<int64_t>;
std::vector<queue_type> ref_results(num_queries);
for (auto& ref_result : ref_results) {
for (int i = 0; i < topk; ++i) {
ref_result.push(limit);
}
}
std::default_random_engine e(42);
SubQueryResult final_result(num_queries, topk, metric_type);
for (int i = 0; i < iteration; ++i) {
std::vector<int64_t> labels;
std::vector<float> values;
for (int n = 0; n < num_queries; ++n) {
for (int k = 0; k < topk; ++k) {
auto gen_x = e() % limit;
ref_results[n].push(gen_x);
ref_results[n].pop();
labels.push_back(gen_x);
values.push_back(gen_x);
}
std::sort(labels.begin() + n * topk, labels.begin() + n * topk + topk);
std::sort(values.begin() + n * topk, values.begin() + n * topk + topk);
}
SubQueryResult sub_result(num_queries, topk, metric_type);
sub_result.mutable_values() = values;
sub_result.mutable_labels() = labels;
final_result.merge(sub_result);
}
for (int n = 0; n < num_queries; ++n) {
ASSERT_EQ(ref_results[n].size(), topk);
for (int k = 0; k < topk; ++k) {
auto ref_x = ref_results[n].top();
ref_results[n].pop();
auto index = n * topk + topk - 1 - k;
auto label = final_result.get_labels()[index];
auto value = final_result.get_values()[index];
ASSERT_EQ(label, ref_x);
ASSERT_EQ(value, ref_x);
}
}
}
TEST(Reduce, SubQueryResultDesc) {
int64_t num_queries = 512;
int64_t topk = 32;
int64_t iteration = 50;
constexpr int64_t limit = 100000000L;
constexpr int64_t init_value = 0;
auto metric_type = MetricType::METRIC_INNER_PRODUCT;
using queue_type = std::priority_queue<int64_t, std::vector<int64_t>, std::greater<int64_t>>;
std::vector<queue_type> ref_results(num_queries);
for (auto& ref_result : ref_results) {
for (int i = 0; i < topk; ++i) {
ref_result.push(init_value);
}
}
std::default_random_engine e(42);
SubQueryResult final_result(num_queries, topk, metric_type);
for (int i = 0; i < iteration; ++i) {
std::vector<int64_t> labels;
std::vector<float> values;
for (int n = 0; n < num_queries; ++n) {
for (int k = 0; k < topk; ++k) {
auto gen_x = e() % limit;
ref_results[n].push(gen_x);
ref_results[n].pop();
labels.push_back(gen_x);
values.push_back(gen_x);
}
std::sort(labels.begin() + n * topk, labels.begin() + n * topk + topk, std::greater<int64_t>());
std::sort(values.begin() + n * topk, values.begin() + n * topk + topk, std::greater<float>());
}
SubQueryResult sub_result(num_queries, topk, metric_type);
sub_result.mutable_values() = values;
sub_result.mutable_labels() = labels;
final_result.merge(sub_result);
}
for (int n = 0; n < num_queries; ++n) {
ASSERT_EQ(ref_results[n].size(), topk);
for (int k = 0; k < topk; ++k) {
auto ref_x = ref_results[n].top();
ref_results[n].pop();
auto index = n * topk + topk - 1 - k;
auto label = final_result.get_labels()[index];
auto value = final_result.get_values()[index];
ASSERT_EQ(label, ref_x);
ASSERT_EQ(value, ref_x);
}
}
}

View File

@ -58,6 +58,10 @@ if __name__ == "__main__":
'visitor_name': "ExecExprVisitor",
"parameter_name": 'expr',
},
{
'visitor_name': "VerifyExprVisitor",
"parameter_name": 'expr',
},
],
'PlanNode': [
{
@ -68,7 +72,10 @@ if __name__ == "__main__":
'visitor_name': "ExecPlanNodeVisitor",
"parameter_name": 'node',
},
{
'visitor_name': "VerifyPlanNodeVisitor",
"parameter_name": 'node',
},
]
}
extract_extra_body(visitor_info, query_path)

View File

@ -13,7 +13,7 @@
#include "@@base_visitor@@.h"
namespace @@namespace@@ {
class @@visitor_name@@ : @@base_visitor@@ {
class @@visitor_name@@ : public @@base_visitor@@ {
public:
@@body@@