mirror of https://github.com/milvus-io/milvus.git
Support to filter with json expr (#23739)
Signed-off-by: yah01 <yang.cen@zilliz.com>pull/23825/head
parent
c1e8406c44
commit
62eea5286f
|
@ -33,6 +33,8 @@
|
|||
|
||||
namespace milvus {
|
||||
using document = simdjson::ondemand::document;
|
||||
template <typename T>
|
||||
using value_result = simdjson::simdjson_result<T>;
|
||||
class Json {
|
||||
public:
|
||||
Json() = default;
|
||||
|
@ -41,9 +43,6 @@ class Json {
|
|||
data_ = own_data_.value();
|
||||
}
|
||||
|
||||
explicit Json(simdjson::padded_string_view data) : data_(data) {
|
||||
}
|
||||
|
||||
Json(const char* data, size_t len, size_t cap) : data_(data, len) {
|
||||
AssertInfo(len + simdjson::SIMDJSON_PADDING <= cap,
|
||||
fmt::format("create json without enough memory size for "
|
||||
|
@ -58,16 +57,33 @@ class Json {
|
|||
Json(const char* data, size_t len) : data_(data, len) {
|
||||
}
|
||||
|
||||
Json(Json&& json) = default;
|
||||
Json(const Json& json) {
|
||||
if (json.own_data_.has_value()) {
|
||||
own_data_ = simdjson::padded_string(
|
||||
json.own_data_.value().data(), json.own_data_.value().length());
|
||||
data_ = own_data_.value();
|
||||
} else {
|
||||
data_ = json.data_;
|
||||
}
|
||||
};
|
||||
Json(Json&& json) noexcept {
|
||||
if (json.own_data_.has_value()) {
|
||||
own_data_ = std::move(json.own_data_);
|
||||
data_ = own_data_.value();
|
||||
} else {
|
||||
data_ = json.data_;
|
||||
}
|
||||
}
|
||||
|
||||
Json&
|
||||
operator=(const Json& json) {
|
||||
if (json.own_data_.has_value()) {
|
||||
own_data_ = simdjson::padded_string(
|
||||
json.own_data_.value().data(), json.own_data_.value().length());
|
||||
data_ = own_data_.value();
|
||||
} else {
|
||||
data_ = json.data_;
|
||||
}
|
||||
|
||||
data_ = json.data_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -75,11 +91,6 @@ class Json {
|
|||
return data_;
|
||||
}
|
||||
|
||||
void
|
||||
parse(simdjson::padded_string_view data) {
|
||||
data_ = data;
|
||||
}
|
||||
|
||||
document
|
||||
doc() const {
|
||||
thread_local simdjson::ondemand::parser parser;
|
||||
|
@ -91,36 +102,33 @@ class Json {
|
|||
parser.iterate(data_, data_.size() + simdjson::SIMDJSON_PADDING)
|
||||
.get(doc);
|
||||
AssertInfo(err == simdjson::SUCCESS,
|
||||
fmt::format("failed to parse the json: {}", err));
|
||||
fmt::format("failed to parse the json {}: {}",
|
||||
data_,
|
||||
simdjson::error_message(err)));
|
||||
return doc;
|
||||
}
|
||||
|
||||
simdjson::ondemand::value
|
||||
operator[](const std::string_view field) const {
|
||||
simdjson::ondemand::value result;
|
||||
auto err = doc().get_value()[field].get(result);
|
||||
AssertInfo(
|
||||
err == simdjson::SUCCESS,
|
||||
fmt::format("failed to access the field {}: {}", field, err));
|
||||
return result;
|
||||
}
|
||||
|
||||
simdjson::ondemand::value
|
||||
operator[](std::vector<std::string> nested_path) const {
|
||||
bool
|
||||
exist(std::vector<std::string> nested_path) const {
|
||||
std::for_each(
|
||||
nested_path.begin(), nested_path.end(), [](std::string& key) {
|
||||
boost::replace_all(key, "~", "~0");
|
||||
boost::replace_all(key, "/", "~1");
|
||||
});
|
||||
auto pointer = boost::algorithm::join(nested_path, "/");
|
||||
simdjson::ondemand::value result;
|
||||
auto err = doc().at_pointer(pointer).get(result);
|
||||
AssertInfo(
|
||||
err == simdjson::SUCCESS,
|
||||
fmt::format("failed to access the field with json pointer {}: {}",
|
||||
pointer,
|
||||
err));
|
||||
return result;
|
||||
auto pointer = "/" + boost::algorithm::join(nested_path, "/");
|
||||
return doc().at_pointer(pointer).error() == simdjson::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
value_result<T>
|
||||
at(std::vector<std::string> nested_path) const {
|
||||
std::for_each(
|
||||
nested_path.begin(), nested_path.end(), [](std::string& key) {
|
||||
boost::replace_all(key, "~", "~0");
|
||||
boost::replace_all(key, "/", "~1");
|
||||
});
|
||||
auto pointer = "/" + boost::algorithm::join(nested_path, "/");
|
||||
return doc().at_pointer(pointer).get<T>();
|
||||
}
|
||||
|
||||
std::string_view
|
||||
|
@ -130,7 +138,7 @@ class Json {
|
|||
|
||||
private:
|
||||
std::optional<simdjson::padded_string>
|
||||
own_data_; // this could be empty, then the Json will be just s view on bytes
|
||||
simdjson::padded_string_view data_;
|
||||
own_data_{}; // this could be empty, then the Json will be just s view on bytes
|
||||
simdjson::padded_string_view data_{};
|
||||
};
|
||||
} // namespace milvus
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common/Schema.h"
|
||||
#include "common/Types.h"
|
||||
#include "pb/plan.pb.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
@ -33,6 +34,27 @@ using optype = proto::plan::OpType;
|
|||
|
||||
class ExprVisitor;
|
||||
|
||||
struct ColumnInfo {
|
||||
FieldId field_id;
|
||||
DataType data_type;
|
||||
std::vector<std::string> nested_path;
|
||||
|
||||
ColumnInfo(const proto::plan::ColumnInfo& column_info)
|
||||
: field_id(column_info.field_id()),
|
||||
data_type(static_cast<DataType>(column_info.data_type())),
|
||||
nested_path(column_info.nested_path().begin(),
|
||||
column_info.nested_path().end()) {
|
||||
}
|
||||
|
||||
ColumnInfo(FieldId field_id,
|
||||
DataType data_type,
|
||||
std::vector<std::string> nested_path = {})
|
||||
: field_id(field_id),
|
||||
data_type(data_type),
|
||||
nested_path(std::move(nested_path)) {
|
||||
}
|
||||
};
|
||||
|
||||
// Base of all Exprs
|
||||
struct Expr {
|
||||
public:
|
||||
|
@ -132,8 +154,8 @@ static const std::map<ArithOpType, std::string> mapping_arith_op_ = {
|
|||
};
|
||||
|
||||
struct BinaryArithOpEvalRangeExpr : Expr {
|
||||
const FieldId field_id_;
|
||||
const DataType data_type_;
|
||||
const ColumnInfo column_;
|
||||
const proto::plan::GenericValue::ValCase val_case_;
|
||||
const OpType op_type_;
|
||||
const ArithOpType arith_op_;
|
||||
|
||||
|
@ -141,12 +163,13 @@ struct BinaryArithOpEvalRangeExpr : Expr {
|
|||
// prevent accidental instantiation
|
||||
BinaryArithOpEvalRangeExpr() = delete;
|
||||
|
||||
BinaryArithOpEvalRangeExpr(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
const OpType op_type,
|
||||
const ArithOpType arith_op)
|
||||
: field_id_(field_id),
|
||||
data_type_(data_type),
|
||||
BinaryArithOpEvalRangeExpr(
|
||||
ColumnInfo column,
|
||||
const proto::plan::GenericValue::ValCase val_case,
|
||||
const OpType op_type,
|
||||
const ArithOpType arith_op)
|
||||
: column_(std::move(column)),
|
||||
val_case_(val_case),
|
||||
op_type_(op_type),
|
||||
arith_op_(arith_op) {
|
||||
}
|
||||
|
@ -189,8 +212,8 @@ struct UnaryRangeExpr : Expr {
|
|||
};
|
||||
|
||||
struct BinaryRangeExpr : Expr {
|
||||
const FieldId field_id_;
|
||||
const DataType data_type_;
|
||||
const ColumnInfo column_;
|
||||
const proto::plan::GenericValue::ValCase val_case_;
|
||||
const bool lower_inclusive_;
|
||||
const bool upper_inclusive_;
|
||||
|
||||
|
@ -198,12 +221,12 @@ struct BinaryRangeExpr : Expr {
|
|||
// prevent accidental instantiation
|
||||
BinaryRangeExpr() = delete;
|
||||
|
||||
BinaryRangeExpr(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
BinaryRangeExpr(ColumnInfo column,
|
||||
const proto::plan::GenericValue::ValCase val_case,
|
||||
const bool lower_inclusive,
|
||||
const bool upper_inclusive)
|
||||
: field_id_(field_id),
|
||||
data_type_(data_type),
|
||||
: column_(std::move(column)),
|
||||
val_case_(val_case),
|
||||
lower_inclusive_(lower_inclusive),
|
||||
upper_inclusive_(upper_inclusive) {
|
||||
}
|
||||
|
|
|
@ -17,10 +17,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <boost/container/vector.hpp>
|
||||
|
||||
#include "Expr.h"
|
||||
#include "pb/plan.pb.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
|
@ -40,13 +42,15 @@ struct BinaryArithOpEvalRangeExprImpl : BinaryArithOpEvalRangeExpr {
|
|||
const T right_operand_;
|
||||
const T value_;
|
||||
|
||||
BinaryArithOpEvalRangeExprImpl(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
const ArithOpType arith_op,
|
||||
const T right_operand,
|
||||
const OpType op_type,
|
||||
const T value)
|
||||
: BinaryArithOpEvalRangeExpr(field_id, data_type, op_type, arith_op),
|
||||
BinaryArithOpEvalRangeExprImpl(
|
||||
ColumnInfo column,
|
||||
const proto::plan::GenericValue::ValCase val_case,
|
||||
const ArithOpType arith_op,
|
||||
const T right_operand,
|
||||
const OpType op_type,
|
||||
const T value)
|
||||
: BinaryArithOpEvalRangeExpr(
|
||||
std::forward<ColumnInfo>(column), val_case, op_type, arith_op),
|
||||
right_operand_(right_operand),
|
||||
value_(value) {
|
||||
}
|
||||
|
@ -69,14 +73,16 @@ struct BinaryRangeExprImpl : BinaryRangeExpr {
|
|||
const T lower_value_;
|
||||
const T upper_value_;
|
||||
|
||||
BinaryRangeExprImpl(const FieldId field_id,
|
||||
const DataType data_type,
|
||||
BinaryRangeExprImpl(ColumnInfo column,
|
||||
const proto::plan::GenericValue::ValCase val_case,
|
||||
const bool lower_inclusive,
|
||||
const bool upper_inclusive,
|
||||
const T lower_value,
|
||||
const T upper_value)
|
||||
: BinaryRangeExpr(
|
||||
field_id, data_type, lower_inclusive, upper_inclusive),
|
||||
: BinaryRangeExpr(std::forward<ColumnInfo>(column),
|
||||
val_case,
|
||||
lower_inclusive,
|
||||
upper_inclusive),
|
||||
lower_value_(lower_value),
|
||||
upper_value_(upper_value) {
|
||||
}
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#include "Plan.h"
|
||||
#include "generated/ExtractInfoPlanNodeVisitor.h"
|
||||
#include "generated/VerifyPlanNodeVisitor.h"
|
||||
#include "pb/plan.pb.h"
|
||||
#include "query/Expr.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
|
@ -92,7 +94,7 @@ Parser::ParseRangeNode(const Json& out_body) {
|
|||
Assert(out_body.size() == 1);
|
||||
auto out_iter = out_body.begin();
|
||||
auto field_name = FieldName(out_iter.key());
|
||||
auto body = out_iter.value();
|
||||
auto& body = out_iter.value();
|
||||
auto data_type = schema[field_name].get_data_type();
|
||||
Assert(!datatype_is_vector(data_type));
|
||||
|
||||
|
@ -302,8 +304,9 @@ Parser::ParseRangeNodeImpl(const FieldName& field_name, const Json& body) {
|
|||
}
|
||||
|
||||
return std::make_unique<BinaryArithOpEvalRangeExprImpl<T>>(
|
||||
schema.get_field_id(field_name),
|
||||
schema[field_name].get_data_type(),
|
||||
ColumnInfo(schema.get_field_id(field_name),
|
||||
schema[field_name].get_data_type()),
|
||||
proto::plan::GenericValue::ValCase::VAL_NOT_SET,
|
||||
arith_op_mapping_.at(arith_op_name),
|
||||
right_operand,
|
||||
mapping_.at(op_name),
|
||||
|
@ -366,8 +369,9 @@ Parser::ParseRangeNodeImpl(const FieldName& field_name, const Json& body) {
|
|||
AssertInfo(has_lower_value && has_upper_value,
|
||||
"illegal binary-range node");
|
||||
return std::make_unique<BinaryRangeExprImpl<T>>(
|
||||
schema.get_field_id(field_name),
|
||||
schema[field_name].get_data_type(),
|
||||
ColumnInfo(schema.get_field_id(field_name),
|
||||
schema[field_name].get_data_type()),
|
||||
proto::plan::GenericValue::ValCase::VAL_NOT_SET,
|
||||
lower_inclusive,
|
||||
upper_inclusive,
|
||||
lower_value,
|
||||
|
|
|
@ -13,12 +13,15 @@
|
|||
|
||||
#include <google/protobuf/text_format.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "ExprImpl.h"
|
||||
#include "common/VectorTrait.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "generated/ExtractInfoExprVisitor.h"
|
||||
#include "generated/ExtractInfoPlanNodeVisitor.h"
|
||||
#include "pb/plan.pb.h"
|
||||
|
||||
namespace milvus::query {
|
||||
namespace planpb = milvus::proto::plan;
|
||||
|
@ -107,8 +110,8 @@ ExtractBinaryRangeExprImpl(FieldId field_id,
|
|||
}
|
||||
};
|
||||
return std::make_unique<BinaryRangeExprImpl<T>>(
|
||||
field_id,
|
||||
data_type,
|
||||
expr_proto.column_info(),
|
||||
expr_proto.lower_value().val_case(),
|
||||
expr_proto.lower_inclusive(),
|
||||
expr_proto.upper_inclusive(),
|
||||
getValue(expr_proto.lower_value()),
|
||||
|
@ -137,11 +140,11 @@ ExtractBinaryArithOpEvalRangeExprImpl(
|
|||
}
|
||||
};
|
||||
return std::make_unique<BinaryArithOpEvalRangeExprImpl<T>>(
|
||||
field_id,
|
||||
data_type,
|
||||
static_cast<ArithOpType>(expr_proto.arith_op()),
|
||||
expr_proto.column_info(),
|
||||
expr_proto.value().val_case(),
|
||||
expr_proto.arith_op(),
|
||||
getValue(expr_proto.right_operand()),
|
||||
static_cast<OpType>(expr_proto.op()),
|
||||
expr_proto.op(),
|
||||
getValue(expr_proto.value()));
|
||||
}
|
||||
|
||||
|
@ -342,6 +345,25 @@ ProtoParser::ParseBinaryRangeExpr(const proto::plan::BinaryRangeExpr& expr_pb) {
|
|||
return ExtractBinaryRangeExprImpl<std::string>(
|
||||
field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::JSON: {
|
||||
switch (expr_pb.lower_value().val_case()) {
|
||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||
return ExtractBinaryRangeExprImpl<bool>(
|
||||
field_id, data_type, expr_pb);
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||
return ExtractBinaryRangeExprImpl<int64_t>(
|
||||
field_id, data_type, expr_pb);
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||
return ExtractBinaryRangeExprImpl<double>(
|
||||
field_id, data_type, expr_pb);
|
||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||
return ExtractBinaryRangeExprImpl<std::string>(
|
||||
field_id, data_type, expr_pb);
|
||||
default:
|
||||
PanicInfo("unknown data type in expression");
|
||||
}
|
||||
}
|
||||
|
||||
default: {
|
||||
PanicInfo("unsupported data type");
|
||||
}
|
||||
|
@ -473,6 +495,20 @@ ProtoParser::ParseBinaryArithOpEvalRangeExpr(
|
|||
return ExtractBinaryArithOpEvalRangeExprImpl<double>(
|
||||
field_id, data_type, expr_pb);
|
||||
}
|
||||
case DataType::JSON: {
|
||||
switch (expr_pb.value().val_case()) {
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<int64_t>(
|
||||
field_id, data_type, expr_pb);
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||
return ExtractBinaryArithOpEvalRangeExprImpl<double>(
|
||||
field_id, data_type, expr_pb);
|
||||
default:
|
||||
PanicInfo(fmt::format(
|
||||
"unsupported data type {} in expression",
|
||||
expr_pb.value().val_case()));
|
||||
}
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported data type");
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
// DO NOT EDIT
|
||||
#include <optional>
|
||||
#include <boost/variant.hpp>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <deque>
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
|
@ -78,11 +79,21 @@ class ExecExprVisitor : public ExprVisitor {
|
|||
auto
|
||||
ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> BitsetType;
|
||||
|
||||
template <typename ExprValueType>
|
||||
auto
|
||||
ExecBinaryArithOpEvalRangeVisitorDispatcherJson(
|
||||
BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType;
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
||||
BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType;
|
||||
|
||||
template <typename ExprValueType>
|
||||
auto
|
||||
ExecBinaryRangeVisitorDispatcherJson(BinaryRangeExpr& expr_raw)
|
||||
-> BitsetType;
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) -> BitsetType;
|
||||
|
|
|
@ -14,13 +14,21 @@
|
|||
#include <boost/variant.hpp>
|
||||
#include <deque>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include "common/Json.h"
|
||||
#include "common/Types.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "pb/plan.pb.h"
|
||||
#include "query/ExprImpl.h"
|
||||
#include "query/Relational.h"
|
||||
#include "query/Utils.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "simdjson/error.h"
|
||||
|
||||
namespace milvus::query {
|
||||
// THIS CONTAINS EXTRA BODY FOR VISITOR
|
||||
|
@ -194,8 +202,7 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id,
|
|||
auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
|
||||
const T* data = chunk.data();
|
||||
for (int index = 0; index < this_size; ++index) {
|
||||
auto x = data[index];
|
||||
result[index] = element_func(x);
|
||||
result[index] = element_func(data[index]);
|
||||
}
|
||||
|
||||
results.emplace_back(std::move(result));
|
||||
|
@ -360,6 +367,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
auto right_operand = expr.right_operand_;
|
||||
auto op = expr.op_type_;
|
||||
auto val = expr.value_;
|
||||
auto& nested_path = expr.column_.nested_path;
|
||||
|
||||
switch (op) {
|
||||
case OpType::Equal: {
|
||||
|
@ -370,11 +378,13 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x + right_operand) == val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) {
|
||||
auto elem_func = [val, right_operand, &nested_path](T x) {
|
||||
// visit the nested field
|
||||
// now it must be Json
|
||||
return ((x + right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -386,7 +396,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return ((x - right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -398,7 +408,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return ((x * right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -410,7 +420,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return ((x / right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -422,7 +432,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return (static_cast<T>(fmod(x, right_operand)) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
|
@ -441,7 +451,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return ((x + right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -453,7 +463,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return ((x - right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -465,7 +475,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return ((x * right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -477,7 +487,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return ((x / right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
|
@ -489,7 +499,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
return (static_cast<T>(fmod(x, right_operand)) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(
|
||||
expr.field_id_, index_func, elem_func);
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
|
@ -503,6 +513,177 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
|
|||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
template <typename ExprValueType>
|
||||
auto
|
||||
ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcherJson(
|
||||
BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType {
|
||||
auto& expr =
|
||||
static_cast<BinaryArithOpEvalRangeExprImpl<ExprValueType>&>(expr_raw);
|
||||
using Index = index::ScalarIndex<milvus::Json>;
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
|
||||
auto arith_op = expr.arith_op_;
|
||||
auto right_operand = expr.right_operand_;
|
||||
auto op = expr.op_type_;
|
||||
auto val = expr.value_;
|
||||
auto& nested_path = expr.column_.nested_path;
|
||||
|
||||
switch (op) {
|
||||
case OpType::Equal: {
|
||||
switch (arith_op) {
|
||||
case ArithOpType::Add: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return !x.error() &&
|
||||
((x.value() + right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return !x.error() &&
|
||||
((x.value() - right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return !x.error() &&
|
||||
((x.value() * right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return !x.error() &&
|
||||
((x.value() / right_operand) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return !x.error() &&
|
||||
(static_cast<ExprValueType>(
|
||||
fmod(x.value(), right_operand)) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
}
|
||||
}
|
||||
}
|
||||
case OpType::NotEqual: {
|
||||
switch (arith_op) {
|
||||
case ArithOpType::Add: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return x.error() ||
|
||||
((x.value() + right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return x.error() ||
|
||||
((x.value() - right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return x.error() ||
|
||||
((x.value() * right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return x.error() ||
|
||||
((x.value() / right_operand) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto index_func = [val, right_operand](Index* index,
|
||||
size_t offset) {
|
||||
return false;
|
||||
};
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(nested_path);
|
||||
return x.error() ||
|
||||
(static_cast<ExprValueType>(
|
||||
fmod(x.value(), right_operand)) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
}
|
||||
}
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported range node with arithmetic operation");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma ide diagnostic ignored "Simplify"
|
||||
template <typename T>
|
||||
|
@ -517,28 +698,86 @@ ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw)
|
|||
|
||||
bool lower_inclusive = expr.lower_inclusive_;
|
||||
bool upper_inclusive = expr.upper_inclusive_;
|
||||
IndexInnerType val1 = IndexInnerType(expr.lower_value_);
|
||||
IndexInnerType val2 = IndexInnerType(expr.upper_value_);
|
||||
IndexInnerType val1 = expr.lower_value_;
|
||||
IndexInnerType val2 = expr.upper_value_;
|
||||
|
||||
auto index_func = [=](Index* index) {
|
||||
return index->Range(val1, lower_inclusive, val2, upper_inclusive);
|
||||
};
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 <= x && x <= val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 <= x && x < val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 < x && x <= val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
} else {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 < x && x < val2); };
|
||||
return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
return ExecRangeVisitorImpl<T>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
template <typename ExprValueType>
|
||||
auto
|
||||
ExecExprVisitor::ExecBinaryRangeVisitorDispatcherJson(BinaryRangeExpr& expr_raw)
|
||||
-> BitsetType {
|
||||
using Index = index::ScalarIndex<milvus::Json>;
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
|
||||
auto& expr = static_cast<BinaryRangeExprImpl<ExprValueType>&>(expr_raw);
|
||||
bool lower_inclusive = expr.lower_inclusive_;
|
||||
bool upper_inclusive = expr.upper_inclusive_;
|
||||
ExprValueType val1 = expr.lower_value_;
|
||||
ExprValueType val2 = expr.upper_value_;
|
||||
|
||||
// no json index now
|
||||
auto index_func = [=](Index* index) { return TargetBitmapPtr{}; };
|
||||
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(expr.column_.nested_path);
|
||||
auto value = x.value();
|
||||
return !x.error() && (val1 <= value && value <= val2);
|
||||
};
|
||||
return ExecRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(expr.column_.nested_path);
|
||||
auto value = x.value();
|
||||
return !x.error() && (val1 <= value && value < val2);
|
||||
};
|
||||
return ExecRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(expr.column_.nested_path);
|
||||
auto value = x.value();
|
||||
return !x.error() && (val1 < value && value <= val2);
|
||||
};
|
||||
return ExecRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
} else {
|
||||
auto elem_func = [&](const milvus::Json& json) {
|
||||
auto x = json.template at<GetType>(expr.column_.nested_path);
|
||||
auto value = x.value();
|
||||
return !x.error() && (val1 < value && value < val2);
|
||||
};
|
||||
return ExecRangeVisitorImpl<milvus::Json>(
|
||||
expr.column_.field_id, index_func, elem_func);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ExecExprVisitor::visit(UnaryRangeExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
|
@ -592,11 +831,11 @@ ExecExprVisitor::visit(UnaryRangeExpr& expr) {
|
|||
|
||||
void
|
||||
ExecExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
AssertInfo(expr.data_type_ == field_meta.get_data_type(),
|
||||
auto& field_meta = segment_.get_schema()[expr.column_.field_id];
|
||||
AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta data type");
|
||||
BitsetType res;
|
||||
switch (expr.data_type_) {
|
||||
switch (expr.column_.data_type) {
|
||||
case DataType::INT8: {
|
||||
res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int8_t>(expr);
|
||||
break;
|
||||
|
@ -621,6 +860,30 @@ ExecExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
|||
res = ExecBinaryArithOpEvalRangeVisitorDispatcher<double>(expr);
|
||||
break;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
switch (expr.val_case_) {
|
||||
case proto::plan::GenericValue::ValCase::kBoolVal: {
|
||||
res = ExecBinaryArithOpEvalRangeVisitorDispatcherJson<bool>(
|
||||
expr);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||
res = ExecBinaryArithOpEvalRangeVisitorDispatcherJson<
|
||||
int64_t>(expr);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||
res =
|
||||
ExecBinaryArithOpEvalRangeVisitorDispatcherJson<double>(
|
||||
expr);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported value type {} in expression");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
@ -631,11 +894,11 @@ ExecExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
|||
|
||||
void
|
||||
ExecExprVisitor::visit(BinaryRangeExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
AssertInfo(expr.data_type_ == field_meta.get_data_type(),
|
||||
auto& field_meta = segment_.get_schema()[expr.column_.field_id];
|
||||
AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta data type");
|
||||
BitsetType res;
|
||||
switch (expr.data_type_) {
|
||||
switch (expr.column_.data_type) {
|
||||
case DataType::BOOL: {
|
||||
res = ExecBinaryRangeVisitorDispatcher<bool>(expr);
|
||||
break;
|
||||
|
@ -672,6 +935,31 @@ ExecExprVisitor::visit(BinaryRangeExpr& expr) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
switch (expr.val_case_) {
|
||||
case proto::plan::GenericValue::ValCase::kBoolVal: {
|
||||
res = ExecBinaryRangeVisitorDispatcherJson<bool>(expr);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||
res = ExecBinaryRangeVisitorDispatcherJson<int64_t>(expr);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||
res = ExecBinaryRangeVisitorDispatcherJson<double>(expr);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::ValCase::kStringVal: {
|
||||
res =
|
||||
ExecBinaryRangeVisitorDispatcherJson<std::string>(expr);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported value type {} in expression");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
@ -713,9 +1001,9 @@ ExecExprVisitor::ExecCompareExprDispatcher(CompareExpr& expr, Op op)
|
|||
// check for sealed segment, load either raw field data or index
|
||||
auto left_indexing_barrier = segment_.num_chunk_index(expr.left_field_id_);
|
||||
auto left_data_barrier = segment_.num_chunk_data(expr.left_field_id_);
|
||||
AssertInfo(
|
||||
std::max(left_data_barrier, left_indexing_barrier) == num_chunk,
|
||||
"max(left_data_barrier, left_indexing_barrier) not equal to num_chunk");
|
||||
AssertInfo(std::max(left_data_barrier, left_indexing_barrier) == num_chunk,
|
||||
"max(left_data_barrier, left_indexing_barrier) not equal to "
|
||||
"num_chunk");
|
||||
|
||||
auto right_indexing_barrier =
|
||||
segment_.num_chunk_index(expr.right_field_id_);
|
||||
|
@ -909,12 +1197,12 @@ ExecExprVisitor::visit(CompareExpr& expr) {
|
|||
auto& schema = segment_.get_schema();
|
||||
auto& left_field_meta = schema[expr.left_field_id_];
|
||||
auto& right_field_meta = schema[expr.right_field_id_];
|
||||
AssertInfo(
|
||||
expr.left_data_type_ == left_field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]Left data type not equal to left field meta type");
|
||||
AssertInfo(
|
||||
expr.right_data_type_ == right_field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]right data type not equal to right field meta type");
|
||||
AssertInfo(expr.left_data_type_ == left_field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]Left data type not equal to left "
|
||||
"field meta type");
|
||||
AssertInfo(expr.right_data_type_ == right_field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]right data type not equal to right field "
|
||||
"meta type");
|
||||
|
||||
BitsetType res;
|
||||
switch (expr.op_type_) {
|
||||
|
@ -1084,7 +1372,8 @@ void
|
|||
ExecExprVisitor::visit(TermExpr& expr) {
|
||||
auto& field_meta = segment_.get_schema()[expr.field_id_];
|
||||
AssertInfo(expr.data_type_ == field_meta.get_data_type(),
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta data type ");
|
||||
"[ExecExprVisitor]DataType of expr isn't field_meta "
|
||||
"data type ");
|
||||
BitsetType res;
|
||||
switch (expr.data_type_) {
|
||||
case DataType::BOOL: {
|
||||
|
|
|
@ -51,7 +51,7 @@ ExtractInfoExprVisitor::visit(UnaryRangeExpr& expr) {
|
|||
|
||||
void
|
||||
ExtractInfoExprVisitor::visit(BinaryRangeExpr& expr) {
|
||||
plan_info_.add_involved_field(expr.field_id_);
|
||||
plan_info_.add_involved_field(expr.column_.field_id);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -62,7 +62,7 @@ ExtractInfoExprVisitor::visit(CompareExpr& expr) {
|
|||
|
||||
void
|
||||
ExtractInfoExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
||||
plan_info_.add_involved_field(expr.field_id_);
|
||||
plan_info_.add_involved_field(expr.column_.field_id);
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
||||
|
|
|
@ -204,8 +204,8 @@ BinaryRangeExtract(const BinaryRangeExpr& expr_raw) {
|
|||
expr,
|
||||
"[ShowExprVisitor]BinaryRangeExpr cast to BinaryRangeExprImpl failed");
|
||||
Json res{{"expr_type", "BinaryRange"},
|
||||
{"field_id", expr->field_id_.get()},
|
||||
{"data_type", datatype_name(expr->data_type_)},
|
||||
{"field_id", expr->column_.field_id.get()},
|
||||
{"data_type", datatype_name(expr->column_.data_type)},
|
||||
{"lower_inclusive", expr->lower_inclusive_},
|
||||
{"upper_inclusive", expr->upper_inclusive_},
|
||||
{"lower_value", expr->lower_value_},
|
||||
|
@ -217,9 +217,9 @@ void
|
|||
ShowExprVisitor::visit(BinaryRangeExpr& expr) {
|
||||
AssertInfo(!json_opt_.has_value(),
|
||||
"[ShowExprVisitor]Ret json already has value before visit");
|
||||
AssertInfo(datatype_is_vector(expr.data_type_) == false,
|
||||
AssertInfo(datatype_is_vector(expr.column_.data_type) == false,
|
||||
"[ShowExprVisitor]Data type of expr isn't vector type");
|
||||
switch (expr.data_type_) {
|
||||
switch (expr.column_.data_type) {
|
||||
case DataType::BOOL:
|
||||
json_opt_ = BinaryRangeExtract<bool>(expr);
|
||||
return;
|
||||
|
@ -277,8 +277,8 @@ BinaryArithOpEvalRangeExtract(const BinaryArithOpEvalRangeExpr& expr_raw) {
|
|||
"BinaryArithOpEvalRangeExprImpl failed");
|
||||
|
||||
Json res{{"expr_type", "BinaryArithOpEvalRange"},
|
||||
{"field_offset", expr->field_id_.get()},
|
||||
{"data_type", datatype_name(expr->data_type_)},
|
||||
{"field_offset", expr->column_.field_id.get()},
|
||||
{"data_type", datatype_name(expr->column_.data_type)},
|
||||
{"arith_op",
|
||||
ArithOpType_Name(static_cast<ArithOpType>(expr->arith_op_))},
|
||||
{"right_operand", expr->right_operand_},
|
||||
|
@ -291,9 +291,9 @@ void
|
|||
ShowExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
|
||||
AssertInfo(!json_opt_.has_value(),
|
||||
"[ShowExprVisitor]Ret json already has value before visit");
|
||||
AssertInfo(datatype_is_vector(expr.data_type_) == false,
|
||||
AssertInfo(datatype_is_vector(expr.column_.data_type) == false,
|
||||
"[ShowExprVisitor]Data type of expr isn't vector type");
|
||||
switch (expr.data_type_) {
|
||||
switch (expr.column_.data_type) {
|
||||
case DataType::INT8:
|
||||
json_opt_ = BinaryArithOpEvalRangeExtract<int8_t>(expr);
|
||||
return;
|
||||
|
|
|
@ -76,10 +76,12 @@ VectorBase::set_data_raw(ssize_t element_offset,
|
|||
}
|
||||
case DataType::JSON: {
|
||||
auto& json_data = FIELD_DATA(data, json);
|
||||
std::vector<Json> data_raw(json_data.size());
|
||||
std::vector<Json> data_raw{};
|
||||
data_raw.reserve(json_data.size());
|
||||
for (auto& json_bytes : json_data) {
|
||||
data_raw.emplace_back(simdjson::padded_string(json_bytes));
|
||||
}
|
||||
|
||||
return set_data_raw(element_offset, data_raw.data(), element_count);
|
||||
}
|
||||
default: {
|
||||
|
|
|
@ -20,11 +20,13 @@
|
|||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/Json.h"
|
||||
#include "common/Span.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/Utils.h"
|
||||
|
@ -322,6 +324,7 @@ class ConcurrentVectorImpl : public VectorBase {
|
|||
chunk_num));
|
||||
Chunk& chunk = chunks_[chunk_id];
|
||||
auto ptr = chunk.data();
|
||||
|
||||
std::copy_n(source + source_offset * Dim,
|
||||
element_count * Dim,
|
||||
ptr + chunk_offset * Dim);
|
||||
|
|
|
@ -45,10 +45,6 @@ class SegmentGrowing : public SegmentInternalInterface {
|
|||
|
||||
// virtual Status
|
||||
// Delete(int64_t reserved_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) = 0;
|
||||
|
||||
public:
|
||||
virtual int64_t
|
||||
get_deleted_count() const = 0;
|
||||
};
|
||||
|
||||
using SegmentGrowingPtr = std::unique_ptr<SegmentGrowing>;
|
||||
|
|
|
@ -11,14 +11,22 @@
|
|||
|
||||
#include <boost/format.hpp>
|
||||
#include <gtest/gtest.h>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
#include <vector>
|
||||
|
||||
#include "common/Json.h"
|
||||
#include "common/Types.h"
|
||||
#include "pb/plan.pb.h"
|
||||
#include "query/Expr.h"
|
||||
#include "query/ExprImpl.h"
|
||||
#include "query/Plan.h"
|
||||
#include "query/PlanNode.h"
|
||||
#include "query/generated/ShowPlanNodeVisitor.h"
|
||||
#include "query/generated/ExecExprVisitor.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "simdjson/padded_string.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
#include "index/IndexFactory.h"
|
||||
|
||||
|
@ -343,6 +351,86 @@ TEST(Expr, TestRange) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestBinaryRangeJSON) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
struct Testcase {
|
||||
bool lower_inclusive;
|
||||
bool upper_inclusive;
|
||||
int64_t lower;
|
||||
int64_t upper;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{true, false, 10, 20, {"int"}},
|
||||
{true, true, 20, 30, {"int"}},
|
||||
{false, true, 30, 40, {"int"}},
|
||||
{false, false, 40, 50, {"int"}},
|
||||
};
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> json_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_json_col = raw_data.get_col<std::string>(json_fid);
|
||||
|
||||
json_col.insert(
|
||||
json_col.end(), new_json_col.begin(), new_json_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto testcase : testcases) {
|
||||
auto check = [&](int64_t value) {
|
||||
int64_t lower = testcase.lower, upper = testcase.upper;
|
||||
if (!testcase.lower_inclusive) {
|
||||
lower++;
|
||||
}
|
||||
if (!testcase.upper_inclusive) {
|
||||
upper--;
|
||||
}
|
||||
return lower <= value && value <= upper;
|
||||
};
|
||||
RetrievePlanNode plan;
|
||||
plan.predicate_ = std::make_unique<BinaryRangeExprImpl<int64_t>>(
|
||||
ColumnInfo(json_fid, DataType::JSON, testcase.nested_path),
|
||||
proto::plan::GenericValue::ValCase::kInt64Val,
|
||||
testcase.lower_inclusive,
|
||||
testcase.upper_inclusive,
|
||||
testcase.lower,
|
||||
testcase.upper);
|
||||
auto final = visitor.call_child(*plan.predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val = milvus::Json(simdjson::padded_string(json_col[i]))
|
||||
.template at<int64_t>(testcase.nested_path)
|
||||
.value();
|
||||
auto ref = check(val);
|
||||
ASSERT_EQ(ans, ref)
|
||||
<< val << testcase.lower_inclusive << testcase.lower
|
||||
<< testcase.upper_inclusive << testcase.upper;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestTerm) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
@ -1110,6 +1198,154 @@ TEST(Expr, TestBinaryArithOpEvalRange) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestBinaryArithOpEvalRangeJSON) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
struct Testcase {
|
||||
int64_t right_operand;
|
||||
int64_t value;
|
||||
OpType op;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{10, 20, OpType::Equal, {"int"}},
|
||||
{20, 30, OpType::Equal, {"int"}},
|
||||
{30, 40, OpType::NotEqual, {"int"}},
|
||||
{40, 50, OpType::NotEqual, {"int"}},
|
||||
};
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> json_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_json_col = raw_data.get_col<std::string>(json_fid);
|
||||
|
||||
json_col.insert(
|
||||
json_col.end(), new_json_col.begin(), new_json_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto testcase : testcases) {
|
||||
auto check = [&](int64_t value) {
|
||||
if (testcase.op == OpType::Equal) {
|
||||
return value + testcase.right_operand == testcase.value;
|
||||
}
|
||||
return value + testcase.right_operand != testcase.value;
|
||||
};
|
||||
RetrievePlanNode plan;
|
||||
plan.predicate_ =
|
||||
std::make_unique<BinaryArithOpEvalRangeExprImpl<int64_t>>(
|
||||
ColumnInfo(json_fid, DataType::JSON, testcase.nested_path),
|
||||
proto::plan::GenericValue::ValCase::kInt64Val,
|
||||
ArithOpType::Add,
|
||||
testcase.right_operand,
|
||||
testcase.op,
|
||||
testcase.value);
|
||||
auto final = visitor.call_child(*plan.predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val = milvus::Json(simdjson::padded_string(json_col[i]))
|
||||
.template at<int64_t>(testcase.nested_path)
|
||||
.value();
|
||||
auto ref = check(val);
|
||||
ASSERT_EQ(ans, ref) << testcase.value << " " << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestBinaryArithOpEvalRangeJSONFloat) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
struct Testcase {
|
||||
double right_operand;
|
||||
double value;
|
||||
OpType op;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{10, 20, OpType::Equal, {"double"}},
|
||||
{20, 30, OpType::Equal, {"double"}},
|
||||
{30, 40, OpType::NotEqual, {"double"}},
|
||||
{40, 50, OpType::NotEqual, {"double"}},
|
||||
};
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> json_col;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_json_col = raw_data.get_col<std::string>(json_fid);
|
||||
|
||||
json_col.insert(
|
||||
json_col.end(), new_json_col.begin(), new_json_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(
|
||||
*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto testcase : testcases) {
|
||||
auto check = [&](double value) {
|
||||
if (testcase.op == OpType::Equal) {
|
||||
return value + testcase.right_operand == testcase.value;
|
||||
}
|
||||
return value + testcase.right_operand != testcase.value;
|
||||
};
|
||||
RetrievePlanNode plan;
|
||||
plan.predicate_ =
|
||||
std::make_unique<BinaryArithOpEvalRangeExprImpl<double>>(
|
||||
ColumnInfo(json_fid, DataType::JSON, testcase.nested_path),
|
||||
proto::plan::GenericValue::ValCase::kFloatVal,
|
||||
ArithOpType::Add,
|
||||
testcase.right_operand,
|
||||
testcase.op,
|
||||
testcase.value);
|
||||
auto final = visitor.call_child(*plan.predicate_.value());
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val = milvus::Json(simdjson::padded_string(json_col[i]))
|
||||
.template at<double>(testcase.nested_path)
|
||||
.value();
|
||||
auto ref = check(val);
|
||||
ASSERT_EQ(ans, ref) << testcase.value << " " << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <google/protobuf/text_format.h>
|
||||
|
||||
#include "Constants.h"
|
||||
|
@ -154,6 +155,13 @@ struct GeneratedData {
|
|||
|
||||
break;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
auto ret_data = reinterpret_cast<std::string*>(ret.data());
|
||||
auto src_data =
|
||||
target_field_data.scalars().json_data().data();
|
||||
std::copy(src_data.begin(), src_data.end(), ret_data);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
|
@ -306,10 +314,10 @@ DataGen(SchemaPtr schema,
|
|||
case DataType::JSON: {
|
||||
vector<std::string> data(N);
|
||||
for (int i = 0; i < N / repeat_count; i++) {
|
||||
auto str = R"({"key":)" + std::to_string(er()) + "}";
|
||||
for (int j = 0; j < repeat_count; j++) {
|
||||
data[i * repeat_count + j] = str;
|
||||
}
|
||||
auto str = R"({"int":)" + std::to_string(er()) +
|
||||
R"(,"double":)" +
|
||||
std::to_string(static_cast<double>(er())) + "}";
|
||||
data[i] = str;
|
||||
}
|
||||
insert_cols(data, N, field_meta);
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue