mirror of https://github.com/milvus-io/milvus.git
enhance: all op(Null) is false in expr (#35527)
#31728 --------- Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>pull/36819/head
parent
04c306e63f
commit
eb3e4583ec
|
@ -69,7 +69,7 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
|
|||
ssize_t byte_count = (element_count + 7) / 8;
|
||||
// Note: if 'nullable == true` and valid_data is nullptr
|
||||
// means null_count == 0, will fill it with 0xFF
|
||||
if (!valid_data) {
|
||||
if (valid_data == nullptr) {
|
||||
valid_data_.assign(byte_count, 0xFF);
|
||||
} else {
|
||||
std::copy_n(valid_data, byte_count, valid_data_.data());
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "EasyAssert.h"
|
||||
#include "Types.h"
|
||||
#include "common/FieldData.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -50,6 +52,7 @@ class BaseVector {
|
|||
protected:
|
||||
DataType type_kind_;
|
||||
size_t length_;
|
||||
// todo: use null_count to skip some bitset operate
|
||||
std::optional<size_t> null_count_;
|
||||
};
|
||||
|
||||
|
@ -65,8 +68,8 @@ class ColumnVector final : public BaseVector {
|
|||
size_t length,
|
||||
std::optional<size_t> null_count = std::nullopt)
|
||||
: BaseVector(data_type, length, null_count) {
|
||||
//todo: support null expr
|
||||
values_ = InitScalarFieldData(data_type, false, length);
|
||||
valid_values_ = InitScalarFieldData(data_type, false, length);
|
||||
}
|
||||
|
||||
// ColumnVector(FixedVector<bool>&& data)
|
||||
|
@ -75,15 +78,25 @@ class ColumnVector final : public BaseVector {
|
|||
// std::make_shared<FieldData<bool>>(DataType::BOOL, std::move(data));
|
||||
// }
|
||||
|
||||
// // the size is the number of bits
|
||||
// ColumnVector(TargetBitmap&& bitmap)
|
||||
// : BaseVector(DataType::INT8, bitmap.size()) {
|
||||
// values_ = std::make_shared<FieldDataImpl<uint8_t, false>>(
|
||||
// bitmap.size(), DataType::INT8, false, std::move(bitmap).into());
|
||||
// }
|
||||
|
||||
// the size is the number of bits
|
||||
ColumnVector(TargetBitmap&& bitmap)
|
||||
ColumnVector(TargetBitmap&& bitmap, TargetBitmap&& valid_bitmap)
|
||||
: BaseVector(DataType::INT8, bitmap.size()) {
|
||||
values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(DataType::INT8,
|
||||
std::move(bitmap));
|
||||
valid_values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(
|
||||
DataType::INT8, std::move(valid_bitmap));
|
||||
}
|
||||
|
||||
virtual ~ColumnVector() override {
|
||||
values_.reset();
|
||||
valid_values_.reset();
|
||||
}
|
||||
|
||||
void*
|
||||
|
@ -91,6 +104,11 @@ class ColumnVector final : public BaseVector {
|
|||
return values_->Data();
|
||||
}
|
||||
|
||||
void*
|
||||
GetValidRawData() {
|
||||
return valid_values_->Data();
|
||||
}
|
||||
|
||||
template <typename As>
|
||||
const As*
|
||||
RawAsValues() const {
|
||||
|
@ -99,6 +117,7 @@ class ColumnVector final : public BaseVector {
|
|||
|
||||
private:
|
||||
FieldDataPtr values_;
|
||||
FieldDataPtr valid_values_;
|
||||
};
|
||||
|
||||
using ColumnVectorPtr = std::shared_ptr<ColumnVector>;
|
||||
|
|
|
@ -25,16 +25,19 @@ PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||
? active_count_ - current_pos_
|
||||
: batch_size_;
|
||||
|
||||
// always true no need to skip null
|
||||
if (real_batch_size == 0) {
|
||||
result = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
|
||||
res.set();
|
||||
valid_res.set();
|
||||
|
||||
result = res_vec;
|
||||
current_pos_ += real_batch_size;
|
||||
|
|
|
@ -113,9 +113,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
auto op_type = expr_->op_type_;
|
||||
|
@ -129,6 +131,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
#define BinaryArithRangeJSONCompare(cmp) \
|
||||
do { \
|
||||
for (size_t i = 0; i < size; ++i) { \
|
||||
if (valid_data != nullptr && !valid_data[i]) { \
|
||||
res[i] = false; \
|
||||
valid_res[i] = false; \
|
||||
continue; \
|
||||
} \
|
||||
auto x = data[i].template at<GetType>(pointer); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
|
@ -146,6 +153,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
#define BinaryArithRangeJSONCompareNotEqual(cmp) \
|
||||
do { \
|
||||
for (size_t i = 0; i < size; ++i) { \
|
||||
if (valid_data != nullptr && !valid_data[i]) { \
|
||||
res[i] = false; \
|
||||
valid_res[i] = false; \
|
||||
continue; \
|
||||
} \
|
||||
auto x = data[i].template at<GetType>(pointer); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
|
@ -161,8 +173,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
} while (false)
|
||||
|
||||
auto execute_sub_batch = [op_type, arith_type](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValueType val,
|
||||
ValueType right_operand,
|
||||
const std::string& pointer) {
|
||||
|
@ -197,6 +211,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = false;
|
||||
valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
int array_length = 0;
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -246,6 +265,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = false;
|
||||
valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
int array_length = 0;
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -295,6 +319,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = false;
|
||||
valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
int array_length = 0;
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -344,6 +373,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = false;
|
||||
valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
int array_length = 0;
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -393,6 +427,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = false;
|
||||
valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
int array_length = 0;
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -442,6 +481,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = false;
|
||||
valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
int array_length = 0;
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -471,6 +515,7 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||
int64_t processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
value,
|
||||
right_operand,
|
||||
pointer);
|
||||
|
@ -492,9 +537,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
int index = -1;
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
|
@ -511,6 +558,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
#define BinaryArithRangeArrayCompare(cmp) \
|
||||
do { \
|
||||
for (size_t i = 0; i < size; ++i) { \
|
||||
if (valid_data != nullptr && !valid_data[i]) { \
|
||||
res[i] = false; \
|
||||
valid_res[i] = false; \
|
||||
continue; \
|
||||
} \
|
||||
if (index >= data[i].length()) { \
|
||||
res[i] = false; \
|
||||
continue; \
|
||||
|
@ -521,8 +573,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
} while (false)
|
||||
|
||||
auto execute_sub_batch = [op_type, arith_type](const ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValueType val,
|
||||
ValueType right_operand,
|
||||
int index) {
|
||||
|
@ -558,6 +612,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = data[i].length() == val;
|
||||
}
|
||||
break;
|
||||
|
@ -601,6 +659,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = data[i].length() != val;
|
||||
}
|
||||
break;
|
||||
|
@ -644,6 +706,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = data[i].length() > val;
|
||||
}
|
||||
break;
|
||||
|
@ -687,6 +753,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = data[i].length() >= val;
|
||||
}
|
||||
break;
|
||||
|
@ -730,6 +800,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = data[i].length() < val;
|
||||
}
|
||||
break;
|
||||
|
@ -773,6 +847,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
}
|
||||
case proto::plan::ArithOpType::ArrayLength: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = data[i].length() <= val;
|
||||
}
|
||||
break;
|
||||
|
@ -794,8 +872,14 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, value, right_operand, index);
|
||||
int64_t processed_size =
|
||||
ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
value,
|
||||
right_operand,
|
||||
index);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -1185,12 +1269,13 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() {
|
|||
return res;
|
||||
};
|
||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, value, right_operand);
|
||||
AssertInfo(res.size() == real_batch_size,
|
||||
AssertInfo(res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
res.size(),
|
||||
res->size(),
|
||||
real_batch_size);
|
||||
return std::make_shared<ColumnVector>(std::move(res));
|
||||
// return std::make_shared<ColumnVector>(std::move(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -1209,16 +1294,20 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() {
|
|||
auto value = GetValueFromProto<HighPrecisionType>(expr_->value_);
|
||||
auto right_operand =
|
||||
GetValueFromProto<HighPrecisionType>(expr_->right_operand_);
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto op_type = expr_->op_type_;
|
||||
auto arith_type = expr_->arith_op_type_;
|
||||
auto execute_sub_batch = [op_type, arith_type](
|
||||
const T* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
HighPrecisionType value,
|
||||
HighPrecisionType right_operand) {
|
||||
switch (op_type) {
|
||||
|
@ -1534,9 +1623,23 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() {
|
|||
"arithmetic eval expr: {}",
|
||||
op_type);
|
||||
}
|
||||
// there is a batch operation in ArithOpElementFunc,
|
||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||
// but to mask res with valid_data after the batch operation.
|
||||
if (valid_data != nullptr) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (!valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<T>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, value, right_operand);
|
||||
int64_t processed_size = ProcessDataChunks<T>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
value,
|
||||
right_operand);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
|
|
@ -239,7 +239,6 @@ struct ArithOpElementFunc {
|
|||
}
|
||||
}
|
||||
*/
|
||||
|
||||
if constexpr (!std::is_same_v<decltype(CmpOpHelper<cmp_op>::op),
|
||||
void>) {
|
||||
constexpr auto cmp_op_cvt = CmpOpHelper<cmp_op>::op;
|
||||
|
@ -282,22 +281,26 @@ struct ArithOpIndexFunc {
|
|||
HighPrecisonType right_operand) {
|
||||
TargetBitmap res(size);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto raw = index->Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (cmp_op == proto::plan::OpType::Equal) {
|
||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||
res[i] = (index->Reverse_Lookup(i) + right_operand) == val;
|
||||
res[i] = (raw.value() + right_operand) == val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Sub) {
|
||||
res[i] = (index->Reverse_Lookup(i) - right_operand) == val;
|
||||
res[i] = (raw.value() - right_operand) == val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mul) {
|
||||
res[i] = (index->Reverse_Lookup(i) * right_operand) == val;
|
||||
res[i] = (raw.value() * right_operand) == val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Div) {
|
||||
res[i] = (index->Reverse_Lookup(i) / right_operand) == val;
|
||||
res[i] = (raw.value() / right_operand) == val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] =
|
||||
(fmod(index->Reverse_Lookup(i), right_operand)) == val;
|
||||
res[i] = (fmod(raw.value(), right_operand)) == val;
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
@ -307,20 +310,19 @@ struct ArithOpIndexFunc {
|
|||
}
|
||||
} else if constexpr (cmp_op == proto::plan::OpType::NotEqual) {
|
||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||
res[i] = (index->Reverse_Lookup(i) + right_operand) != val;
|
||||
res[i] = (raw.value() + right_operand) != val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Sub) {
|
||||
res[i] = (index->Reverse_Lookup(i) - right_operand) != val;
|
||||
res[i] = (raw.value() - right_operand) != val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mul) {
|
||||
res[i] = (index->Reverse_Lookup(i) * right_operand) != val;
|
||||
res[i] = (raw.value() * right_operand) != val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Div) {
|
||||
res[i] = (index->Reverse_Lookup(i) / right_operand) != val;
|
||||
res[i] = (raw.value() / right_operand) != val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] =
|
||||
(fmod(index->Reverse_Lookup(i), right_operand)) != val;
|
||||
res[i] = (fmod(raw.value(), right_operand)) != val;
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
@ -330,20 +332,19 @@ struct ArithOpIndexFunc {
|
|||
}
|
||||
} else if constexpr (cmp_op == proto::plan::OpType::GreaterThan) {
|
||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||
res[i] = (index->Reverse_Lookup(i) + right_operand) > val;
|
||||
res[i] = (raw.value() + right_operand) > val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Sub) {
|
||||
res[i] = (index->Reverse_Lookup(i) - right_operand) > val;
|
||||
res[i] = (raw.value() - right_operand) > val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mul) {
|
||||
res[i] = (index->Reverse_Lookup(i) * right_operand) > val;
|
||||
res[i] = (raw.value() * right_operand) > val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Div) {
|
||||
res[i] = (index->Reverse_Lookup(i) / right_operand) > val;
|
||||
res[i] = (raw.value() / right_operand) > val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] =
|
||||
(fmod(index->Reverse_Lookup(i), right_operand)) > val;
|
||||
res[i] = (fmod(raw.value(), right_operand)) > val;
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
@ -353,20 +354,19 @@ struct ArithOpIndexFunc {
|
|||
}
|
||||
} else if constexpr (cmp_op == proto::plan::OpType::GreaterEqual) {
|
||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||
res[i] = (index->Reverse_Lookup(i) + right_operand) >= val;
|
||||
res[i] = (raw.value() + right_operand) >= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Sub) {
|
||||
res[i] = (index->Reverse_Lookup(i) - right_operand) >= val;
|
||||
res[i] = (raw.value() - right_operand) >= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mul) {
|
||||
res[i] = (index->Reverse_Lookup(i) * right_operand) >= val;
|
||||
res[i] = (raw.value() * right_operand) >= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Div) {
|
||||
res[i] = (index->Reverse_Lookup(i) / right_operand) >= val;
|
||||
res[i] = (raw.value() / right_operand) >= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] =
|
||||
(fmod(index->Reverse_Lookup(i), right_operand)) >= val;
|
||||
res[i] = (fmod(raw.value(), right_operand)) >= val;
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
@ -376,20 +376,19 @@ struct ArithOpIndexFunc {
|
|||
}
|
||||
} else if constexpr (cmp_op == proto::plan::OpType::LessThan) {
|
||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||
res[i] = (index->Reverse_Lookup(i) + right_operand) < val;
|
||||
res[i] = (raw.value() + right_operand) < val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Sub) {
|
||||
res[i] = (index->Reverse_Lookup(i) - right_operand) < val;
|
||||
res[i] = (raw.value() - right_operand) < val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mul) {
|
||||
res[i] = (index->Reverse_Lookup(i) * right_operand) < val;
|
||||
res[i] = (raw.value() * right_operand) < val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Div) {
|
||||
res[i] = (index->Reverse_Lookup(i) / right_operand) < val;
|
||||
res[i] = (raw.value() / right_operand) < val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] =
|
||||
(fmod(index->Reverse_Lookup(i), right_operand)) < val;
|
||||
res[i] = (fmod(raw.value(), right_operand)) < val;
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
@ -399,20 +398,19 @@ struct ArithOpIndexFunc {
|
|||
}
|
||||
} else if constexpr (cmp_op == proto::plan::OpType::LessEqual) {
|
||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||
res[i] = (index->Reverse_Lookup(i) + right_operand) <= val;
|
||||
res[i] = (raw.value() + right_operand) <= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Sub) {
|
||||
res[i] = (index->Reverse_Lookup(i) - right_operand) <= val;
|
||||
res[i] = (raw.value() - right_operand) <= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mul) {
|
||||
res[i] = (index->Reverse_Lookup(i) * right_operand) <= val;
|
||||
res[i] = (raw.value() * right_operand) <= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Div) {
|
||||
res[i] = (index->Reverse_Lookup(i) / right_operand) <= val;
|
||||
res[i] = (raw.value() / right_operand) <= val;
|
||||
} else if constexpr (arith_op ==
|
||||
proto::plan::ArithOpType::Mod) {
|
||||
res[i] =
|
||||
(fmod(index->Reverse_Lookup(i), right_operand)) <= val;
|
||||
res[i] = (fmod(raw.value(), right_operand)) <= val;
|
||||
} else {
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "BinaryRangeExpr.h"
|
||||
#include <utility>
|
||||
|
||||
#include "query/Utils.h"
|
||||
|
||||
|
@ -150,8 +151,12 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
|
|||
cached_overflow_res_->size() == batch_size) {
|
||||
return cached_overflow_res_;
|
||||
}
|
||||
auto res = std::make_shared<ColumnVector>(TargetBitmap(batch_size));
|
||||
return res;
|
||||
auto valid_res = ProcessChunksForValid<T>(is_index_mode_);
|
||||
auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
|
||||
std::move(valid_res));
|
||||
cached_overflow_res_ = res_vec;
|
||||
|
||||
return res_vec;
|
||||
};
|
||||
|
||||
if constexpr (std::is_integral_v<T> && !std::is_same_v<bool, T>) {
|
||||
|
@ -207,12 +212,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||
func(index_ptr, val1, val2, lower_inclusive, upper_inclusive));
|
||||
};
|
||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, val1, val2);
|
||||
AssertInfo(res.size() == real_batch_size,
|
||||
AssertInfo(res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
res.size(),
|
||||
res->size(),
|
||||
real_batch_size);
|
||||
return std::make_shared<ColumnVector>(std::move(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -240,14 +245,18 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||
PreCheckOverflow<T>(val1, val2, lower_inclusive, upper_inclusive)) {
|
||||
return res;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
||||
const T* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
HighPrecisionType val1,
|
||||
HighPrecisionType val2) {
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
|
@ -263,6 +272,16 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||
BinaryRangeElementFunc<T, false, false> func;
|
||||
func(val1, val2, data, size, res);
|
||||
}
|
||||
// there is a batch operation in BinaryRangeElementFunc,
|
||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||
// but to mask res with valid_data after the batch operation.
|
||||
if (valid_data != nullptr) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (!valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
auto skip_index_func =
|
||||
[val1, val2, lower_inclusive, upper_inclusive](
|
||||
|
@ -282,7 +301,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<T>(
|
||||
execute_sub_batch, skip_index_func, res, val1, val2);
|
||||
execute_sub_batch, skip_index_func, res, valid_res, val1, val2);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -301,9 +320,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
|
|||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
bool lower_inclusive = expr_->lower_inclusive_;
|
||||
bool upper_inclusive = expr_->upper_inclusive_;
|
||||
|
@ -313,26 +334,28 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
|
|||
|
||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer](
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValueType val1,
|
||||
ValueType val2) {
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFuncForJson<ValueType, true, true> func;
|
||||
func(val1, val2, pointer, data, size, res);
|
||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
BinaryRangeElementFuncForJson<ValueType, true, false> func;
|
||||
func(val1, val2, pointer, data, size, res);
|
||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFuncForJson<ValueType, false, true> func;
|
||||
func(val1, val2, pointer, data, size, res);
|
||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||
} else {
|
||||
BinaryRangeElementFuncForJson<ValueType, false, false> func;
|
||||
func(val1, val2, pointer, data, size, res);
|
||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, val1, val2);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -351,9 +374,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
|
|||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
bool lower_inclusive = expr_->lower_inclusive_;
|
||||
bool upper_inclusive = expr_->upper_inclusive_;
|
||||
|
@ -366,27 +391,29 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
|
|||
|
||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
||||
const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValueType val1,
|
||||
ValueType val2,
|
||||
int index) {
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFuncForArray<ValueType, true, true> func;
|
||||
func(val1, val2, index, data, size, res);
|
||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
BinaryRangeElementFuncForArray<ValueType, true, false> func;
|
||||
func(val1, val2, index, data, size, res);
|
||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeElementFuncForArray<ValueType, false, true> func;
|
||||
func(val1, val2, index, data, size, res);
|
||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||
} else {
|
||||
BinaryRangeElementFuncForArray<ValueType, false, false> func;
|
||||
func(val1, val2, index, data, size, res);
|
||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, val1, val2, index);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
|
|
@ -54,6 +54,10 @@ struct BinaryRangeElementFunc {
|
|||
|
||||
#define BinaryRangeJSONCompare(cmp) \
|
||||
do { \
|
||||
if (valid_data != nullptr && !valid_data[i]) { \
|
||||
res[i] = valid_res[i] = false; \
|
||||
break; \
|
||||
} \
|
||||
auto x = src[i].template at<GetType>(pointer); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
|
@ -81,8 +85,10 @@ struct BinaryRangeElementFuncForJson {
|
|||
ValueType val2,
|
||||
const std::string& pointer,
|
||||
const milvus::Json* src,
|
||||
const bool* valid_data,
|
||||
size_t n,
|
||||
TargetBitmapView res) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if constexpr (lower_inclusive && upper_inclusive) {
|
||||
BinaryRangeJSONCompare(val1 <= value && value <= val2);
|
||||
|
@ -107,9 +113,15 @@ struct BinaryRangeElementFuncForArray {
|
|||
ValueType val2,
|
||||
int index,
|
||||
const milvus::ArrayView* src,
|
||||
const bool* valid_data,
|
||||
size_t n,
|
||||
TargetBitmapView res) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (lower_inclusive && upper_inclusive) {
|
||||
if (index >= src[i].length()) {
|
||||
res[i] = false;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "CompareExpr.h"
|
||||
#include "common/type_c.h"
|
||||
#include <optional>
|
||||
#include "query/Relational.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -58,12 +59,19 @@ PhyCompareFilterExpr::GetChunkData(FieldId field_id,
|
|||
segment_->chunk_scalar_index<T>(field_id,
|
||||
current_chunk_id));
|
||||
}
|
||||
return indexing.Reverse_Lookup(current_chunk_pos++);
|
||||
auto raw = indexing.Reverse_Lookup(current_chunk_pos);
|
||||
current_chunk_pos++;
|
||||
if (!raw.has_value()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return raw.value();
|
||||
};
|
||||
}
|
||||
}
|
||||
auto chunk_data =
|
||||
segment_->chunk_data<T>(field_id, current_chunk_id).data();
|
||||
auto chunk_valid_data =
|
||||
segment_->chunk_data<T>(field_id, current_chunk_id).valid_data();
|
||||
auto current_chunk_size = segment_->chunk_size(field_id, current_chunk_id);
|
||||
return
|
||||
[=, ¤t_chunk_id, ¤t_chunk_pos]() mutable -> const number {
|
||||
|
@ -72,10 +80,16 @@ PhyCompareFilterExpr::GetChunkData(FieldId field_id,
|
|||
current_chunk_pos = 0;
|
||||
chunk_data =
|
||||
segment_->chunk_data<T>(field_id, current_chunk_id).data();
|
||||
chunk_valid_data =
|
||||
segment_->chunk_data<T>(field_id, current_chunk_id)
|
||||
.valid_data();
|
||||
current_chunk_size =
|
||||
segment_->chunk_size(field_id, current_chunk_id);
|
||||
}
|
||||
|
||||
if (chunk_valid_data && !chunk_valid_data[current_chunk_pos]) {
|
||||
current_chunk_pos++;
|
||||
return std::nullopt;
|
||||
}
|
||||
return chunk_data[current_chunk_pos++];
|
||||
};
|
||||
}
|
||||
|
@ -103,7 +117,12 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||
segment_->chunk_scalar_index<std::string>(
|
||||
field_id, current_chunk_id));
|
||||
}
|
||||
return indexing.Reverse_Lookup(current_chunk_pos++);
|
||||
auto raw = indexing.Reverse_Lookup(current_chunk_pos);
|
||||
current_chunk_pos++;
|
||||
if (!raw.has_value()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return raw.value();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -114,6 +133,9 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||
auto chunk_data =
|
||||
segment_->chunk_data<std::string>(field_id, current_chunk_id)
|
||||
.data();
|
||||
auto chunk_valid_data =
|
||||
segment_->chunk_data<std::string>(field_id, current_chunk_id)
|
||||
.valid_data();
|
||||
auto current_chunk_size =
|
||||
segment_->chunk_size(field_id, current_chunk_id);
|
||||
return [=,
|
||||
|
@ -126,16 +148,26 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||
segment_
|
||||
->chunk_data<std::string>(field_id, current_chunk_id)
|
||||
.data();
|
||||
chunk_valid_data =
|
||||
segment_
|
||||
->chunk_data<std::string>(field_id, current_chunk_id)
|
||||
.valid_data();
|
||||
current_chunk_size =
|
||||
segment_->chunk_size(field_id, current_chunk_id);
|
||||
}
|
||||
|
||||
if (chunk_valid_data && !chunk_valid_data[current_chunk_pos]) {
|
||||
current_chunk_pos++;
|
||||
return std::nullopt;
|
||||
}
|
||||
return chunk_data[current_chunk_pos++];
|
||||
};
|
||||
} else {
|
||||
auto chunk_data =
|
||||
segment_->chunk_view<std::string_view>(field_id, current_chunk_id)
|
||||
.first.data();
|
||||
auto chunk_valid_data =
|
||||
segment_->chunk_data<std::string_view>(field_id, current_chunk_id)
|
||||
.valid_data();
|
||||
auto current_chunk_size =
|
||||
segment_->chunk_size(field_id, current_chunk_id);
|
||||
return [=,
|
||||
|
@ -148,9 +180,17 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||
->chunk_view<std::string_view>(
|
||||
field_id, current_chunk_id)
|
||||
.first.data();
|
||||
chunk_valid_data = segment_
|
||||
->chunk_data<std::string_view>(
|
||||
field_id, current_chunk_id)
|
||||
.valid_data();
|
||||
current_chunk_size =
|
||||
segment_->chunk_size(field_id, current_chunk_id);
|
||||
}
|
||||
if (chunk_valid_data && !chunk_valid_data[current_chunk_pos]) {
|
||||
current_chunk_pos++;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return std::string(chunk_data[current_chunk_pos++]);
|
||||
};
|
||||
|
@ -203,9 +243,11 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto left = GetChunkData(expr_->left_data_type_,
|
||||
expr_->left_field_id_,
|
||||
|
@ -218,8 +260,15 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||
right_current_chunk_id_,
|
||||
right_current_chunk_pos_);
|
||||
for (int i = 0; i < real_batch_size; ++i) {
|
||||
res[i] = boost::apply_visitor(
|
||||
milvus::query::Relational<decltype(op)>{}, left(), right());
|
||||
if (!left().has_value() || !right().has_value()) {
|
||||
res[i] = false;
|
||||
valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] =
|
||||
boost::apply_visitor(milvus::query::Relational<decltype(op)>{},
|
||||
left().value(),
|
||||
right().value());
|
||||
}
|
||||
return res_vec;
|
||||
} else {
|
||||
|
@ -228,9 +277,11 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto left_data_barrier =
|
||||
segment_->num_chunk_data(expr_->left_field_id_);
|
||||
|
@ -255,10 +306,16 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||
for (int i = chunk_id == current_chunk_id_ ? current_chunk_pos_ : 0;
|
||||
i < chunk_size;
|
||||
++i) {
|
||||
res[processed_rows++] = boost::apply_visitor(
|
||||
milvus::query::Relational<decltype(op)>{},
|
||||
left(i),
|
||||
right(i));
|
||||
if (!left(i).has_value() || !right(i).has_value()) {
|
||||
res[processed_rows] = false;
|
||||
valid_res[processed_rows] = false;
|
||||
} else {
|
||||
res[processed_rows] = boost::apply_visitor(
|
||||
milvus::query::Relational<decltype(op)>{},
|
||||
left(i).value(),
|
||||
right(i).value());
|
||||
}
|
||||
processed_rows++;
|
||||
|
||||
if (processed_rows >= batch_size_) {
|
||||
current_chunk_id_ = chunk_id;
|
||||
|
@ -280,12 +337,23 @@ PhyCompareFilterExpr::GetChunkData(FieldId field_id,
|
|||
auto& indexing = segment_->chunk_scalar_index<T>(field_id, chunk_id);
|
||||
if (indexing.HasRawData()) {
|
||||
return [&indexing](int i) -> const number {
|
||||
return indexing.Reverse_Lookup(i);
|
||||
auto raw = indexing.Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return raw.value();
|
||||
};
|
||||
}
|
||||
}
|
||||
auto chunk_data = segment_->chunk_data<T>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
auto chunk_valid_data =
|
||||
segment_->chunk_data<T>(field_id, chunk_id).valid_data();
|
||||
return [chunk_data, chunk_valid_data](int i) -> const number {
|
||||
if (chunk_valid_data && !chunk_valid_data[i]) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return chunk_data[i];
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -297,8 +365,12 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||
auto& indexing =
|
||||
segment_->chunk_scalar_index<std::string>(field_id, chunk_id);
|
||||
if (indexing.HasRawData()) {
|
||||
return [&indexing](int i) -> const std::string {
|
||||
return indexing.Reverse_Lookup(i);
|
||||
return [&indexing](int i) -> const number {
|
||||
auto raw = indexing.Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return raw.value();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -308,12 +380,23 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||
.growing_enable_mmap) {
|
||||
auto chunk_data =
|
||||
segment_->chunk_data<std::string>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
auto chunk_valid_data =
|
||||
segment_->chunk_data<std::string>(field_id, chunk_id).valid_data();
|
||||
return [chunk_data, chunk_valid_data](int i) -> const number {
|
||||
if (chunk_valid_data && !chunk_valid_data[i]) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return chunk_data[i];
|
||||
};
|
||||
} else {
|
||||
auto chunk_data =
|
||||
segment_->chunk_view<std::string_view>(field_id, chunk_id)
|
||||
.first.data();
|
||||
return [chunk_data](int i) -> const number {
|
||||
auto chunk_info =
|
||||
segment_->chunk_view<std::string_view>(field_id, chunk_id);
|
||||
auto chunk_data = chunk_info.first.data();
|
||||
auto chunk_valid_data = chunk_info.second.data();
|
||||
return [chunk_data, chunk_valid_data](int i) -> const number {
|
||||
if (chunk_valid_data && !chunk_valid_data[i]) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return std::string(chunk_data[i]);
|
||||
};
|
||||
}
|
||||
|
@ -450,9 +533,11 @@ PhyCompareFilterExpr::ExecCompareRightType() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto expr_type = expr_->op_type_;
|
||||
auto execute_sub_batch = [expr_type](const T* left,
|
||||
|
@ -491,15 +576,14 @@ PhyCompareFilterExpr::ExecCompareRightType() {
|
|||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
fmt::format(
|
||||
"unsupported operator type for compare column expr: {}",
|
||||
expr_type));
|
||||
PanicInfo(OpTypeInvalid,
|
||||
fmt::format("unsupported operator type for "
|
||||
"compare column expr: {}",
|
||||
expr_type));
|
||||
}
|
||||
};
|
||||
int64_t processed_size =
|
||||
ProcessBothDataChunks<T, U>(execute_sub_batch, res);
|
||||
ProcessBothDataChunks<T, U>(execute_sub_batch, res, valid_res);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include <fmt/core.h>
|
||||
#include <boost/variant.hpp>
|
||||
#include <optional>
|
||||
|
||||
#include "common/EasyAssert.h"
|
||||
#include "common/Types.h"
|
||||
|
@ -29,14 +30,17 @@
|
|||
namespace milvus {
|
||||
namespace exec {
|
||||
|
||||
using number = boost::variant<bool,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
float,
|
||||
double,
|
||||
std::string>;
|
||||
using number_type = boost::variant<bool,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
float,
|
||||
double,
|
||||
std::string>;
|
||||
|
||||
using number = std::optional<number_type>;
|
||||
|
||||
using ChunkDataAccessor = std::function<const number(int)>;
|
||||
using MultipleChunkDataAccessor = std::function<const number()>;
|
||||
|
||||
|
@ -264,16 +268,19 @@ class PhyCompareFilterExpr : public Expr {
|
|||
|
||||
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
||||
int64_t
|
||||
ProcessBothDataChunks(FUNC func, TargetBitmapView res, ValTypes... values) {
|
||||
ProcessBothDataChunks(FUNC func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
if (segment_->is_chunked()) {
|
||||
return ProcessBothDataChunksForMultipleChunk<T,
|
||||
U,
|
||||
FUNC,
|
||||
ValTypes...>(
|
||||
func, res, values...);
|
||||
func, res, valid_res, values...);
|
||||
} else {
|
||||
return ProcessBothDataChunksForSingleChunk<T, U, FUNC, ValTypes...>(
|
||||
func, res, values...);
|
||||
func, res, valid_res, values...);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -281,6 +288,7 @@ class PhyCompareFilterExpr : public Expr {
|
|||
int64_t
|
||||
ProcessBothDataChunksForSingleChunk(FUNC func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
|
@ -304,6 +312,20 @@ class PhyCompareFilterExpr : public Expr {
|
|||
const T* left_data = left_chunk.data() + data_pos;
|
||||
const U* right_data = right_chunk.data() + data_pos;
|
||||
func(left_data, right_data, size, res + processed_size, values...);
|
||||
const bool* left_valid_data = left_chunk.valid_data();
|
||||
const bool* right_valid_data = right_chunk.valid_data();
|
||||
// mask with valid_data
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (left_valid_data && !left_valid_data[i + data_pos]) {
|
||||
res[processed_size + i] = false;
|
||||
valid_res[processed_size + i] = false;
|
||||
continue;
|
||||
}
|
||||
if (right_valid_data && !right_valid_data[i + data_pos]) {
|
||||
res[processed_size + i] = false;
|
||||
valid_res[processed_size + i] = false;
|
||||
}
|
||||
}
|
||||
processed_size += size;
|
||||
|
||||
if (processed_size >= batch_size_) {
|
||||
|
@ -320,6 +342,7 @@ class PhyCompareFilterExpr : public Expr {
|
|||
int64_t
|
||||
ProcessBothDataChunksForMultipleChunk(FUNC func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
|
@ -347,6 +370,20 @@ class PhyCompareFilterExpr : public Expr {
|
|||
const T* left_data = left_chunk.data() + data_pos;
|
||||
const U* right_data = right_chunk.data() + data_pos;
|
||||
func(left_data, right_data, size, res + processed_size, values...);
|
||||
const bool* left_valid_data = left_chunk.valid_data();
|
||||
const bool* right_valid_data = right_chunk.valid_data();
|
||||
// mask with valid_data
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (left_valid_data && !left_valid_data[i + data_pos]) {
|
||||
res[processed_size + i] = false;
|
||||
valid_res[processed_size + i] = false;
|
||||
continue;
|
||||
}
|
||||
if (right_valid_data && !right_valid_data[i + data_pos]) {
|
||||
res[processed_size + i] = false;
|
||||
valid_res[processed_size + i] = false;
|
||||
}
|
||||
}
|
||||
processed_size += size;
|
||||
|
||||
if (processed_size >= batch_size_) {
|
||||
|
|
|
@ -44,22 +44,30 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment() {
|
|||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
auto execute_sub_batch = [](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = data[i].exist(pointer);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
|
@ -248,6 +249,7 @@ class SegmentExpr : public Expr {
|
|||
FUNC func,
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
// For sealed segment, only single chunk
|
||||
Assert(num_data_chunk_ == 1);
|
||||
|
@ -256,13 +258,16 @@ class SegmentExpr : public Expr {
|
|||
|
||||
auto& skip_index = segment_->GetSkipIndex();
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||
auto data_vec =
|
||||
segment_
|
||||
->get_batch_views<T>(
|
||||
field_id_, 0, current_data_chunk_pos_, need_size)
|
||||
.first;
|
||||
|
||||
func(data_vec.data(), need_size, res, values...);
|
||||
auto views_info = segment_->get_batch_views<T>(
|
||||
field_id_, 0, current_data_chunk_pos_, need_size);
|
||||
// first is the raw data, second is valid_data
|
||||
// use valid_data to see if raw data is null
|
||||
func(views_info.first.data(),
|
||||
views_info.second.data(),
|
||||
need_size,
|
||||
res,
|
||||
valid_res,
|
||||
values...);
|
||||
}
|
||||
current_data_chunk_pos_ += need_size;
|
||||
return need_size;
|
||||
|
@ -274,6 +279,7 @@ class SegmentExpr : public Expr {
|
|||
FUNC func,
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
|
@ -281,7 +287,7 @@ class SegmentExpr : public Expr {
|
|||
std::is_same_v<T, Json>) {
|
||||
if (segment_->type() == SegmentType::Sealed) {
|
||||
return ProcessChunkForSealedSeg<T>(
|
||||
func, skip_func, res, values...);
|
||||
func, skip_func, res, valid_res, values...);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -303,7 +309,16 @@ class SegmentExpr : public Expr {
|
|||
if (!skip_func || !skip_func(skip_index, field_id_, i)) {
|
||||
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
||||
const T* data = chunk.data() + data_pos;
|
||||
func(data, size, res + processed_size, values...);
|
||||
const bool* valid_data = chunk.valid_data();
|
||||
if (valid_data != nullptr) {
|
||||
valid_data += data_pos;
|
||||
}
|
||||
func(data,
|
||||
valid_data,
|
||||
size,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
values...);
|
||||
}
|
||||
|
||||
processed_size += size;
|
||||
|
@ -322,6 +337,7 @@ class SegmentExpr : public Expr {
|
|||
FUNC func,
|
||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
|
@ -356,13 +372,21 @@ class SegmentExpr : public Expr {
|
|||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json>) {
|
||||
if (segment_->type() == SegmentType::Sealed) {
|
||||
// first is the raw data, second is valid_data
|
||||
// use valid_data to see if raw data is null
|
||||
auto data_vec = segment_
|
||||
->get_batch_views<T>(
|
||||
field_id_, i, data_pos, size)
|
||||
.first;
|
||||
auto valid_data = segment_
|
||||
->get_batch_views<T>(
|
||||
field_id_, i, data_pos, size)
|
||||
.second;
|
||||
func(data_vec.data(),
|
||||
valid_data.data(),
|
||||
size,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
values...);
|
||||
is_seal = true;
|
||||
}
|
||||
|
@ -370,7 +394,16 @@ class SegmentExpr : public Expr {
|
|||
if (!is_seal) {
|
||||
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
||||
const T* data = chunk.data() + data_pos;
|
||||
func(data, size, res + processed_size, values...);
|
||||
const bool* valid_data = chunk.valid_data();
|
||||
if (valid_data != nullptr) {
|
||||
valid_data += data_pos;
|
||||
}
|
||||
func(data,
|
||||
valid_data,
|
||||
size,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
values...);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -403,8 +436,10 @@ class SegmentExpr : public Expr {
|
|||
|
||||
int
|
||||
ProcessIndexOneChunk(TargetBitmap& result,
|
||||
TargetBitmap& valid_result,
|
||||
size_t chunk_id,
|
||||
const TargetBitmap& chunk_res,
|
||||
const TargetBitmap& chunk_valid_res,
|
||||
int processed_rows) {
|
||||
auto data_pos =
|
||||
chunk_id == current_index_chunk_ ? current_index_chunk_pos_ : 0;
|
||||
|
@ -416,33 +451,41 @@ class SegmentExpr : public Expr {
|
|||
// chunk_res.begin() + data_pos,
|
||||
// chunk_res.begin() + data_pos + size);
|
||||
result.append(chunk_res, data_pos, size);
|
||||
valid_result.append(chunk_valid_res, data_pos, size);
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename T, typename FUNC, typename... ValTypes>
|
||||
TargetBitmap
|
||||
VectorPtr
|
||||
ProcessIndexChunks(FUNC func, ValTypes... values) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
using Index = index::ScalarIndex<IndexInnerType>;
|
||||
TargetBitmap result;
|
||||
TargetBitmap valid_result;
|
||||
int processed_rows = 0;
|
||||
|
||||
for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
|
||||
// This cache result help getting result for every batch loop.
|
||||
// It avoids indexing execute for evevy batch because indexing
|
||||
// It avoids indexing execute for every batch because indexing
|
||||
// executing costs quite much time.
|
||||
if (cached_index_chunk_id_ != i) {
|
||||
const Index& index =
|
||||
segment_->chunk_scalar_index<IndexInnerType>(field_id_, i);
|
||||
auto* index_ptr = const_cast<Index*>(&index);
|
||||
cached_index_chunk_res_ = std::move(func(index_ptr, values...));
|
||||
auto valid_result = index_ptr->IsNotNull();
|
||||
cached_index_chunk_valid_res_ = std::move(valid_result);
|
||||
cached_index_chunk_id_ = i;
|
||||
}
|
||||
|
||||
auto size = ProcessIndexOneChunk(
|
||||
result, i, cached_index_chunk_res_, processed_rows);
|
||||
auto size = ProcessIndexOneChunk(result,
|
||||
valid_result,
|
||||
i,
|
||||
cached_index_chunk_res_,
|
||||
cached_index_chunk_valid_res_,
|
||||
processed_rows);
|
||||
|
||||
if (processed_rows + size >= batch_size_) {
|
||||
current_index_chunk_ = i;
|
||||
|
@ -454,23 +497,136 @@ class SegmentExpr : public Expr {
|
|||
processed_rows += size;
|
||||
}
|
||||
|
||||
return result;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
std::move(valid_result));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
ProcessChunksForValid(bool use_index) {
|
||||
if (use_index) {
|
||||
return ProcessIndexChunksForValid<T>();
|
||||
} else {
|
||||
return ProcessDataChunksForValid<T>();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
ProcessDataChunksForValid() {
|
||||
TargetBitmap valid_result(batch_size_);
|
||||
valid_result.set();
|
||||
int64_t processed_size = 0;
|
||||
for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) {
|
||||
auto data_pos =
|
||||
(i == current_data_chunk_) ? current_data_chunk_pos_ : 0;
|
||||
auto size =
|
||||
(i == (num_data_chunk_ - 1))
|
||||
? (segment_->type() == SegmentType::Growing
|
||||
? (active_count_ % size_per_chunk_ == 0
|
||||
? size_per_chunk_ - data_pos
|
||||
: active_count_ % size_per_chunk_ - data_pos)
|
||||
: active_count_ - data_pos)
|
||||
: size_per_chunk_ - data_pos;
|
||||
|
||||
size = std::min(size, batch_size_ - processed_size);
|
||||
|
||||
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
||||
const bool* valid_data = chunk.valid_data();
|
||||
if (valid_data == nullptr) {
|
||||
return valid_result;
|
||||
}
|
||||
valid_data += data_pos;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (!valid_data[i]) {
|
||||
valid_result[i + data_pos] = false;
|
||||
}
|
||||
}
|
||||
processed_size += size;
|
||||
if (processed_size >= batch_size_) {
|
||||
current_data_chunk_ = i;
|
||||
current_data_chunk_pos_ = data_pos + size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return valid_result;
|
||||
}
|
||||
|
||||
int
|
||||
ProcessIndexOneChunkForValid(TargetBitmap& valid_result,
|
||||
size_t chunk_id,
|
||||
const TargetBitmap& chunk_valid_res,
|
||||
int processed_rows) {
|
||||
auto data_pos =
|
||||
chunk_id == current_index_chunk_ ? current_index_chunk_pos_ : 0;
|
||||
auto size = std::min(
|
||||
std::min(size_per_chunk_ - data_pos, batch_size_ - processed_rows),
|
||||
int64_t(chunk_valid_res.size()));
|
||||
|
||||
valid_result.append(chunk_valid_res, data_pos, size);
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TargetBitmap
|
||||
ProcessIndexChunksForValid() {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
IndexInnerType;
|
||||
using Index = index::ScalarIndex<IndexInnerType>;
|
||||
int processed_rows = 0;
|
||||
TargetBitmap valid_result;
|
||||
valid_result.set();
|
||||
|
||||
for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
|
||||
// This cache result help getting result for every batch loop.
|
||||
// It avoids indexing execute for every batch because indexing
|
||||
// executing costs quite much time.
|
||||
if (cached_index_chunk_id_ != i) {
|
||||
const Index& index =
|
||||
segment_->chunk_scalar_index<IndexInnerType>(field_id_, i);
|
||||
auto* index_ptr = const_cast<Index*>(&index);
|
||||
auto execute_sub_batch = [](Index* index_ptr) {
|
||||
TargetBitmap res = index_ptr->IsNotNull();
|
||||
return res;
|
||||
};
|
||||
cached_index_chunk_valid_res_ = execute_sub_batch(index_ptr);
|
||||
cached_index_chunk_id_ = i;
|
||||
}
|
||||
|
||||
auto size = ProcessIndexOneChunkForValid(
|
||||
valid_result, i, cached_index_chunk_valid_res_, processed_rows);
|
||||
|
||||
if (processed_rows + size >= batch_size_) {
|
||||
current_index_chunk_ = i;
|
||||
current_index_chunk_pos_ = i == current_index_chunk_
|
||||
? current_index_chunk_pos_ + size
|
||||
: size;
|
||||
break;
|
||||
}
|
||||
processed_rows += size;
|
||||
}
|
||||
return valid_result;
|
||||
}
|
||||
|
||||
template <typename FUNC, typename... ValTypes>
|
||||
TargetBitmap
|
||||
VectorPtr
|
||||
ProcessTextMatchIndex(FUNC func, ValTypes... values) {
|
||||
TargetBitmap result;
|
||||
TargetBitmap valid_result;
|
||||
|
||||
if (cached_match_res_ == nullptr) {
|
||||
auto index = segment_->GetTextIndex(field_id_);
|
||||
auto res = std::move(func(index, values...));
|
||||
auto valid_res = index->IsNotNull();
|
||||
cached_match_res_ = std::make_shared<TargetBitmap>(std::move(res));
|
||||
cached_index_chunk_valid_res_ = std::move(valid_res);
|
||||
if (cached_match_res_->size() < active_count_) {
|
||||
// some entities are not visible in inverted index.
|
||||
// only happend on growing segment.
|
||||
TargetBitmap tail(active_count_ - cached_match_res_->size());
|
||||
cached_match_res_->append(tail);
|
||||
cached_index_chunk_valid_res_.append(tail);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -481,9 +637,13 @@ class SegmentExpr : public Expr {
|
|||
: batch_size_;
|
||||
result.append(
|
||||
*cached_match_res_, current_data_chunk_pos_, real_batch_size);
|
||||
valid_result.append(cached_index_chunk_valid_res_,
|
||||
current_data_chunk_pos_,
|
||||
real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
|
||||
return result;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
std::move(valid_result));
|
||||
}
|
||||
|
||||
template <typename T, typename FUNC, typename... ValTypes>
|
||||
|
@ -581,6 +741,8 @@ class SegmentExpr : public Expr {
|
|||
// Cache for index scan to avoid search index every batch
|
||||
int64_t cached_index_chunk_id_{-1};
|
||||
TargetBitmap cached_index_chunk_res_{};
|
||||
// Cache for chunk valid res.
|
||||
TargetBitmap cached_index_chunk_valid_res_{};
|
||||
|
||||
// Cache for text match.
|
||||
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "JsonContainsExpr.h"
|
||||
#include <utility>
|
||||
#include "common/Types.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -173,17 +174,21 @@ PhyJsonContainsFilterExpr::ExecArrayContains() {
|
|||
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
||||
"[ExecArrayContains]nested path must be null");
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::unordered_set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
auto execute_sub_batch = [](const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
auto executor = [&](size_t i) {
|
||||
const auto& array = data[i];
|
||||
|
@ -195,12 +200,16 @@ PhyJsonContainsFilterExpr::ExecArrayContains() {
|
|||
return false;
|
||||
};
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -221,9 +230,11 @@ PhyJsonContainsFilterExpr::ExecJsonContains() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::unordered_set<GetType> elements;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
@ -231,8 +242,10 @@ PhyJsonContainsFilterExpr::ExecJsonContains() {
|
|||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
auto execute_sub_batch = [](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
auto executor = [&](size_t i) {
|
||||
|
@ -253,12 +266,16 @@ PhyJsonContainsFilterExpr::ExecJsonContains() {
|
|||
return false;
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -274,9 +291,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::vector<proto::plan::Array> elements;
|
||||
|
@ -285,8 +304,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() {
|
|||
}
|
||||
auto execute_sub_batch =
|
||||
[](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::Array>& elements) {
|
||||
auto executor = [&](size_t i) -> bool {
|
||||
|
@ -316,12 +337,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() {
|
|||
return false;
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -344,9 +369,11 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::unordered_set<GetType> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
|
@ -354,8 +381,10 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() {
|
|||
}
|
||||
|
||||
auto execute_sub_batch = [](const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
auto executor = [&](size_t i) {
|
||||
std::unordered_set<GetType> tmp_elements(elements);
|
||||
|
@ -369,12 +398,16 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() {
|
|||
return tmp_elements.size() == 0;
|
||||
};
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -395,9 +428,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::unordered_set<GetType> elements;
|
||||
|
@ -406,8 +441,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() {
|
|||
}
|
||||
|
||||
auto execute_sub_batch = [](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::unordered_set<GetType>& elements) {
|
||||
auto executor = [&](const size_t i) -> bool {
|
||||
|
@ -431,12 +468,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() {
|
|||
return tmp_elements.size() == 0;
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -451,9 +492,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
|
@ -467,8 +510,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||
|
||||
auto execute_sub_batch =
|
||||
[](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::GenericValue>& elements,
|
||||
const std::unordered_set<int> elements_index) {
|
||||
|
@ -553,6 +598,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||
return tmp_elements_index.size() == 0;
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
@ -560,6 +609,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||
int64_t processed_size = ProcessDataChunks<Json>(execute_sub_batch,
|
||||
std::nullptr_t{},
|
||||
res,
|
||||
valid_res,
|
||||
pointer,
|
||||
elements,
|
||||
elements_index);
|
||||
|
@ -578,9 +628,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
|
@ -590,8 +642,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() {
|
|||
}
|
||||
auto execute_sub_batch =
|
||||
[](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::Array>& elements) {
|
||||
auto executor = [&](const size_t i) {
|
||||
|
@ -625,12 +679,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() {
|
|||
return exist_elements_index.size() == elements.size();
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -646,9 +704,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
|
@ -662,8 +722,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() {
|
|||
|
||||
auto execute_sub_batch =
|
||||
[](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string& pointer,
|
||||
const std::vector<proto::plan::GenericValue>& elements) {
|
||||
auto executor = [&](const size_t i) {
|
||||
|
@ -739,12 +801,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() {
|
|||
return false;
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -832,12 +898,12 @@ PhyJsonContainsFilterExpr::ExecArrayContainsForIndexSegmentImpl() {
|
|||
}
|
||||
};
|
||||
auto res = ProcessIndexChunks<GetType>(execute_sub_batch, elems);
|
||||
AssertInfo(res.size() == real_batch_size,
|
||||
AssertInfo(res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
res.size(),
|
||||
res->size(),
|
||||
real_batch_size);
|
||||
return std::make_shared<ColumnVector>(std::move(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
} //namespace exec
|
||||
|
|
|
@ -45,6 +45,10 @@ PhyLogicalBinaryExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||
"unsupported logical operator: {}",
|
||||
expr_->GetOpTypeString());
|
||||
}
|
||||
TargetBitmapView lvalid_view(lflat->GetValidRawData(), size);
|
||||
TargetBitmapView rvalid_view(rflat->GetValidRawData(), size);
|
||||
LogicalElementFunc<LogicalOpType::Or> func;
|
||||
func(lvalid_view, rvalid_view, size);
|
||||
result = std::move(left);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,9 @@ PhyLogicalUnaryExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||
auto flat_vec = GetColumnVector(result);
|
||||
TargetBitmapView data(flat_vec->GetRawData(), flat_vec->size());
|
||||
data.flip();
|
||||
TargetBitmapView valid_data(flat_vec->GetValidRawData(),
|
||||
flat_vec->size());
|
||||
data &= valid_data;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "TermExpr.h"
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "query/Utils.h"
|
||||
namespace milvus {
|
||||
namespace exec {
|
||||
|
@ -199,9 +201,12 @@ PhyTermFilterExpr::ExecPkTermImpl() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
// pk valid_bitmap is always all true
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
for (size_t i = 0; i < real_batch_size; ++i) {
|
||||
res[i] = cached_bits_[current_data_chunk_pos_++];
|
||||
|
@ -241,17 +246,21 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
AssertInfo(expr_->vals_.size() == 1,
|
||||
"element length in json array must be one");
|
||||
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
||||
|
||||
auto execute_sub_batch = [](const ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const ValueType& target_val) {
|
||||
auto executor = [&](size_t i) {
|
||||
for (int i = 0; i < data[i].length(); i++) {
|
||||
|
@ -263,12 +272,16 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
|||
return false;
|
||||
};
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
executor(i);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, target_val);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -289,9 +302,11 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
int index = -1;
|
||||
if (expr_->column_.nested_path_.size() > 0) {
|
||||
|
@ -309,12 +324,18 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||
}
|
||||
|
||||
auto execute_sub_batch = [](const ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
int index,
|
||||
const std::unordered_set<ValueType>& term_set) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (index >= data[i].length()) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (term_set.empty() || index >= data[i].length()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
|
@ -324,7 +345,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||
};
|
||||
|
||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, index, term_set);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, index, term_set);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -344,9 +365,11 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
AssertInfo(expr_->vals_.size() == 1,
|
||||
"element length in json array must be one");
|
||||
|
@ -354,8 +377,10 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
auto execute_sub_batch = [](const Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string pointer,
|
||||
const ValueType& target_val) {
|
||||
auto executor = [&](size_t i) {
|
||||
|
@ -375,11 +400,15 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||
return false;
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer, val);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -399,9 +428,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::unordered_set<ValueType> term_set;
|
||||
|
@ -416,8 +447,10 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||
}
|
||||
|
||||
auto execute_sub_batch = [](const Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::string pointer,
|
||||
const std::unordered_set<ValueType>& terms) {
|
||||
auto executor = [&](size_t i) {
|
||||
|
@ -439,11 +472,19 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||
return terms.find(ValueType(x.value())) != terms.end();
|
||||
};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (terms.empty()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = executor(i);
|
||||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, pointer, term_set);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, term_set);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -489,12 +530,12 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() {
|
|||
return func(index_ptr, vals.size(), vals.data());
|
||||
};
|
||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, vals);
|
||||
AssertInfo(res.size() == real_batch_size,
|
||||
AssertInfo(res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
res.size(),
|
||||
res->size(),
|
||||
real_batch_size);
|
||||
return std::make_shared<ColumnVector>(std::move(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -516,7 +557,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
|
|||
return std::move(func(index_ptr, vals.size(), (bool*)vals.data()));
|
||||
};
|
||||
auto res = ProcessIndexChunks<bool>(execute_sub_batch, vals);
|
||||
return std::make_shared<ColumnVector>(std::move(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -527,9 +568,11 @@ PhyTermFilterExpr::ExecVisitorImplForData() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
std::vector<T> vals;
|
||||
for (auto& val : expr_->vals_) {
|
||||
|
@ -542,16 +585,22 @@ PhyTermFilterExpr::ExecVisitorImplForData() {
|
|||
}
|
||||
std::unordered_set<T> vals_set(vals.begin(), vals.end());
|
||||
auto execute_sub_batch = [](const T* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
const std::unordered_set<T>& vals) {
|
||||
TermElementFuncSet<T> func;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = func(vals, data[i]);
|
||||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<T>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, vals_set);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "UnaryExpr.h"
|
||||
#include <optional>
|
||||
#include "common/Json.h"
|
||||
|
||||
namespace milvus {
|
||||
|
@ -260,9 +261,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
|
||||
ValueType val = GetValueFromProto<ValueType>(expr_->val_);
|
||||
auto op_type = expr_->op_type_;
|
||||
|
@ -271,48 +274,50 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||
}
|
||||
auto execute_sub_batch = [op_type](const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ValueType val,
|
||||
int index) {
|
||||
switch (op_type) {
|
||||
case proto::plan::GreaterThan: {
|
||||
UnaryElementFuncForArray<ValueType, proto::plan::GreaterThan>
|
||||
func;
|
||||
func(data, size, val, index, res);
|
||||
func(data, valid_data, size, val, index, res, valid_res);
|
||||
break;
|
||||
}
|
||||
case proto::plan::GreaterEqual: {
|
||||
UnaryElementFuncForArray<ValueType, proto::plan::GreaterEqual>
|
||||
func;
|
||||
func(data, size, val, index, res);
|
||||
func(data, valid_data, size, val, index, res, valid_res);
|
||||
break;
|
||||
}
|
||||
case proto::plan::LessThan: {
|
||||
UnaryElementFuncForArray<ValueType, proto::plan::LessThan> func;
|
||||
func(data, size, val, index, res);
|
||||
func(data, valid_data, size, val, index, res, valid_res);
|
||||
break;
|
||||
}
|
||||
case proto::plan::LessEqual: {
|
||||
UnaryElementFuncForArray<ValueType, proto::plan::LessEqual>
|
||||
func;
|
||||
func(data, size, val, index, res);
|
||||
func(data, valid_data, size, val, index, res, valid_res);
|
||||
break;
|
||||
}
|
||||
case proto::plan::Equal: {
|
||||
UnaryElementFuncForArray<ValueType, proto::plan::Equal> func;
|
||||
func(data, size, val, index, res);
|
||||
func(data, valid_data, size, val, index, res, valid_res);
|
||||
break;
|
||||
}
|
||||
case proto::plan::NotEqual: {
|
||||
UnaryElementFuncForArray<ValueType, proto::plan::NotEqual> func;
|
||||
func(data, size, val, index, res);
|
||||
func(data, valid_data, size, val, index, res, valid_res);
|
||||
break;
|
||||
}
|
||||
case proto::plan::PrefixMatch: {
|
||||
UnaryElementFuncForArray<ValueType, proto::plan::PrefixMatch>
|
||||
func;
|
||||
func(data, size, val, index, res);
|
||||
func(data, valid_data, size, val, index, res, valid_res);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -323,7 +328,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, val, index);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -432,14 +437,14 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
|||
}
|
||||
return res;
|
||||
});
|
||||
AssertInfo(batch_res.size() == real_batch_size,
|
||||
AssertInfo(batch_res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
batch_res.size(),
|
||||
batch_res->size(),
|
||||
real_batch_size);
|
||||
|
||||
// return the result.
|
||||
return std::make_shared<ColumnVector>(std::move(batch_res));
|
||||
return batch_res;
|
||||
}
|
||||
|
||||
template <typename ExprValueType>
|
||||
|
@ -455,9 +460,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
|
||||
ExprValueType val = GetValueFromProto<ExprValueType>(expr_->val_);
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
auto op_type = expr_->op_type_;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
|
||||
|
@ -492,12 +499,18 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
} while (false)
|
||||
|
||||
auto execute_sub_batch = [op_type, pointer](const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
ExprValueType val) {
|
||||
switch (op_type) {
|
||||
case proto::plan::GreaterThan: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
|
@ -508,6 +521,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
case proto::plan::GreaterEqual: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
|
@ -518,6 +535,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
case proto::plan::LessThan: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
|
@ -528,6 +549,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
case proto::plan::LessEqual: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
|
@ -538,6 +563,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
case proto::plan::Equal: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -554,6 +583,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
case proto::plan::NotEqual: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
auto doc = data[i].doc();
|
||||
auto array = doc.at_pointer(pointer).get_array();
|
||||
|
@ -570,6 +603,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
case proto::plan::PrefixMatch: {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
|
@ -584,6 +621,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
auto regex_pattern = translator(val);
|
||||
RegexMatcher matcher(regex_pattern);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = false;
|
||||
} else {
|
||||
|
@ -601,7 +642,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||
}
|
||||
};
|
||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||
execute_sub_batch, std::nullptr_t{}, res, val);
|
||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
|
@ -693,12 +734,12 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||
};
|
||||
auto val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, val);
|
||||
AssertInfo(res.size() == real_batch_size,
|
||||
AssertInfo(res->size() == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}",
|
||||
res.size(),
|
||||
res->size(),
|
||||
real_batch_size);
|
||||
return std::make_shared<ColumnVector>(std::move(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -720,10 +761,11 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
|||
switch (expr_->op_type_) {
|
||||
case proto::plan::GreaterThan:
|
||||
case proto::plan::GreaterEqual: {
|
||||
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(batch_size));
|
||||
cached_overflow_res_ = res_vec;
|
||||
TargetBitmap(batch_size), std::move(valid_res));
|
||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||
cached_overflow_res_ = res_vec;
|
||||
|
||||
if (milvus::query::lt_lb<T>(val)) {
|
||||
res.set();
|
||||
|
@ -733,10 +775,11 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
|||
}
|
||||
case proto::plan::LessThan:
|
||||
case proto::plan::LessEqual: {
|
||||
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(batch_size));
|
||||
cached_overflow_res_ = res_vec;
|
||||
TargetBitmap(batch_size), std::move(valid_res));
|
||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||
cached_overflow_res_ = res_vec;
|
||||
|
||||
if (milvus::query::gt_ub<T>(val)) {
|
||||
res.set();
|
||||
|
@ -745,19 +788,21 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
|||
return res_vec;
|
||||
}
|
||||
case proto::plan::Equal: {
|
||||
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(batch_size));
|
||||
cached_overflow_res_ = res_vec;
|
||||
TargetBitmap(batch_size), std::move(valid_res));
|
||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||
cached_overflow_res_ = res_vec;
|
||||
|
||||
res.reset();
|
||||
return res_vec;
|
||||
}
|
||||
case proto::plan::NotEqual: {
|
||||
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(batch_size));
|
||||
cached_overflow_res_ = res_vec;
|
||||
TargetBitmap(batch_size), std::move(valid_res));
|
||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||
cached_overflow_res_ = res_vec;
|
||||
|
||||
res.set();
|
||||
return res_vec;
|
||||
|
@ -788,13 +833,17 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||
return nullptr;
|
||||
}
|
||||
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||
auto res_vec =
|
||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
||||
auto res_vec = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||
valid_res.set();
|
||||
auto expr_type = expr_->op_type_;
|
||||
auto execute_sub_batch = [expr_type](const T* data,
|
||||
const bool* valid_data,
|
||||
const int size,
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res,
|
||||
IndexInnerType val) {
|
||||
switch (expr_type) {
|
||||
case proto::plan::GreaterThan: {
|
||||
|
@ -843,6 +892,16 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||
fmt::format("unsupported operator type for unary expr: {}",
|
||||
expr_type));
|
||||
}
|
||||
// there is a batch operation in BinaryRangeElementFunc,
|
||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||
// but to mask res with valid_data after the batch operation.
|
||||
if (valid_data != nullptr) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (!valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
||||
FieldId field_id,
|
||||
|
@ -850,8 +909,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||
return skip_index.CanSkipUnaryRange<T>(
|
||||
field_id, chunk_id, expr_type, val);
|
||||
};
|
||||
int64_t processed_size =
|
||||
ProcessDataChunks<T>(execute_sub_batch, skip_index_func, res, val);
|
||||
int64_t processed_size = ProcessDataChunks<T>(
|
||||
execute_sub_batch, skip_index_func, res, valid_res, val);
|
||||
AssertInfo(processed_size == real_batch_size,
|
||||
"internal error: expr processed rows {} not equal "
|
||||
"expect batch size {}, related params[active_count:{}, "
|
||||
|
@ -881,7 +940,7 @@ PhyUnaryRangeFilterExpr::ExecTextMatch() {
|
|||
return index->MatchQuery(query);
|
||||
};
|
||||
auto res = ProcessTextMatchIndex(func, query);
|
||||
return std::make_shared<ColumnVector>(std::move(res));
|
||||
return res;
|
||||
};
|
||||
|
||||
} // namespace exec
|
||||
|
|
|
@ -148,11 +148,17 @@ struct UnaryElementFuncForArray {
|
|||
ValueType>;
|
||||
void
|
||||
operator()(const ArrayView* src,
|
||||
const bool* valid_data,
|
||||
size_t size,
|
||||
ValueType val,
|
||||
int index,
|
||||
TargetBitmapView res) {
|
||||
TargetBitmapView res,
|
||||
TargetBitmapView valid_res) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
res[i] = valid_res[i] = false;
|
||||
continue;
|
||||
}
|
||||
if constexpr (op == proto::plan::OpType::Equal) {
|
||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||
res[i] = src[i].is_same_array(val);
|
||||
|
@ -224,7 +230,11 @@ struct UnaryIndexFuncForMatch {
|
|||
RegexMatcher matcher(regex_pattern);
|
||||
for (int64_t i = 0; i < cnt; i++) {
|
||||
auto raw = index->Reverse_Lookup(i);
|
||||
res[i] = matcher(raw);
|
||||
if (!raw.has_value()) {
|
||||
res[i] = false;
|
||||
continue;
|
||||
}
|
||||
res[i] = matcher(raw.value());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -68,6 +68,7 @@ PhyFilterBitsNode::GetOutput() {
|
|||
operator_context_->get_exec_context(), exprs_.get(), input_.get());
|
||||
|
||||
TargetBitmap bitset;
|
||||
TargetBitmap valid_bitset;
|
||||
while (num_processed_rows_ < need_process_rows_) {
|
||||
exprs_->Eval(0, 1, true, eval_ctx, results_);
|
||||
|
||||
|
@ -79,13 +80,17 @@ PhyFilterBitsNode::GetOutput() {
|
|||
auto col_vec_size = col_vec->size();
|
||||
TargetBitmapView view(col_vec->GetRawData(), col_vec_size);
|
||||
bitset.append(view);
|
||||
TargetBitmapView valid_view(col_vec->GetValidRawData(), col_vec_size);
|
||||
valid_bitset.append(valid_view);
|
||||
num_processed_rows_ += col_vec_size;
|
||||
}
|
||||
bitset.flip();
|
||||
Assert(bitset.size() == need_process_rows_);
|
||||
Assert(valid_bitset.size() == need_process_rows_);
|
||||
// num_processed_rows_ = need_process_rows_;
|
||||
std::vector<VectorPtr> col_res;
|
||||
col_res.push_back(std::make_shared<ColumnVector>(std::move(bitset)));
|
||||
col_res.push_back(std::make_shared<ColumnVector>(std::move(bitset),
|
||||
std::move(valid_bitset)));
|
||||
std::chrono::high_resolution_clock::time_point scalar_end =
|
||||
std::chrono::high_resolution_clock::now();
|
||||
double scalar_cost =
|
||||
|
|
|
@ -51,13 +51,15 @@ PhyMvccNode::GetOutput() {
|
|||
is_finished_ = true;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto col_input =
|
||||
is_source_node_
|
||||
? std::make_shared<ColumnVector>(TargetBitmap(active_count_))
|
||||
: GetColumnVector(input_);
|
||||
// the first vector is filtering result and second bitset is a valid bitset
|
||||
// if valid_bitset[i]==false, means result[i] is null
|
||||
auto col_input = is_source_node_ ? std::make_shared<ColumnVector>(
|
||||
TargetBitmap(active_count_),
|
||||
TargetBitmap(active_count_))
|
||||
: GetColumnVector(input_);
|
||||
|
||||
TargetBitmapView data(col_input->GetRawData(), col_input->size());
|
||||
// need to expose null?
|
||||
segment_->mask_with_timestamps(data, query_timestamp_);
|
||||
segment_->mask_with_delete(data, active_count_, query_timestamp_);
|
||||
is_finished_ = true;
|
||||
|
|
|
@ -100,7 +100,9 @@ class SealedDataGetter : public DataGetter<T> {
|
|||
}
|
||||
return field_data_->operator[](idx);
|
||||
} else {
|
||||
return (*field_index_).Reverse_Lookup(idx);
|
||||
auto raw = (*field_index_).Reverse_Lookup(idx);
|
||||
AssertInfo(raw.has_value(), "field data not found");
|
||||
return raw.value();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -80,7 +80,7 @@ BitmapIndex<T>::Build(const Config& config) {
|
|||
|
||||
template <typename T>
|
||||
void
|
||||
BitmapIndex<T>::Build(size_t n, const T* data) {
|
||||
BitmapIndex<T>::Build(size_t n, const T* data, const bool* valid_data) {
|
||||
if (is_built_) {
|
||||
return;
|
||||
}
|
||||
|
@ -89,12 +89,14 @@ BitmapIndex<T>::Build(size_t n, const T* data) {
|
|||
}
|
||||
|
||||
total_num_rows_ = n;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||
|
||||
T* p = const_cast<T*>(data);
|
||||
for (int i = 0; i < n; ++i, ++p) {
|
||||
data_[*p].add(i);
|
||||
valid_bitset.set(i);
|
||||
if (valid_data == nullptr || valid_data[i]) {
|
||||
data_[*p].add(i);
|
||||
valid_bitset_.set(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (data_.size() < DEFAULT_BITMAP_INDEX_BUILD_MODE_BOUND) {
|
||||
|
@ -120,7 +122,7 @@ BitmapIndex<T>::BuildPrimitiveField(
|
|||
if (data->is_valid(i)) {
|
||||
auto val = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_[*val].add(offset);
|
||||
valid_bitset.set(offset);
|
||||
valid_bitset_.set(offset);
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
@ -139,7 +141,7 @@ BitmapIndex<T>::BuildWithFieldData(
|
|||
PanicInfo(DataIsEmpty, "scalar bitmap index can not build null values");
|
||||
}
|
||||
total_num_rows_ = total_num_rows;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||
|
||||
switch (schema_.data_type()) {
|
||||
case proto::schema::DataType::Bool:
|
||||
|
@ -184,7 +186,7 @@ BitmapIndex<T>::BuildArrayField(const std::vector<FieldDataPtr>& field_datas) {
|
|||
auto val = array->template get_data<T>(j);
|
||||
data_[val].add(offset);
|
||||
}
|
||||
valid_bitset.set(offset);
|
||||
valid_bitset_.set(offset);
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
@ -359,7 +361,7 @@ BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||
data_[key] = value;
|
||||
}
|
||||
for (const auto& v : value) {
|
||||
valid_bitset.set(v);
|
||||
valid_bitset_.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -422,7 +424,7 @@ BitmapIndex<std::string>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||
data_[key] = value;
|
||||
}
|
||||
for (const auto& v : value) {
|
||||
valid_bitset.set(v);
|
||||
valid_bitset_.set(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -516,7 +518,7 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
|||
index_meta_buffer->size);
|
||||
auto index_length = index_meta.first;
|
||||
total_num_rows_ = index_meta.second;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||
|
||||
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
|
||||
|
||||
|
@ -645,7 +647,7 @@ BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
|||
}
|
||||
}
|
||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||
res &= valid_bitset;
|
||||
res &= valid_bitset_;
|
||||
return res;
|
||||
} else {
|
||||
TargetBitmap res(total_num_rows_, false);
|
||||
|
@ -657,7 +659,7 @@ BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
|||
}
|
||||
res.flip();
|
||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||
res &= valid_bitset;
|
||||
res &= valid_bitset_;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
@ -667,7 +669,7 @@ const TargetBitmap
|
|||
BitmapIndex<T>::IsNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap res(total_num_rows_, true);
|
||||
res &= valid_bitset;
|
||||
res &= valid_bitset_;
|
||||
res.flip();
|
||||
return res;
|
||||
}
|
||||
|
@ -677,7 +679,7 @@ const TargetBitmap
|
|||
BitmapIndex<T>::IsNotNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap res(total_num_rows_, true);
|
||||
res &= valid_bitset;
|
||||
res &= valid_bitset_;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -1086,11 +1088,15 @@ BitmapIndex<T>::Reverse_Lookup_InCache(size_t idx) const {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
std::optional<T>
|
||||
BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
AssertInfo(idx < total_num_rows_, "out of range of total coun");
|
||||
|
||||
if (!valid_bitset_[idx]) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (use_offset_cache_) {
|
||||
return Reverse_Lookup_InCache(idx);
|
||||
}
|
||||
|
@ -1125,6 +1131,7 @@ BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
|||
fmt::format(
|
||||
"scalar bitmap index can not lookup target value of index {}",
|
||||
idx));
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -77,7 +77,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
}
|
||||
|
||||
void
|
||||
Build(size_t n, const T* values) override;
|
||||
Build(size_t n, const T* values, const bool* valid_data = nullptr) override;
|
||||
|
||||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
@ -106,7 +106,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
T upper_bound_value,
|
||||
bool ub_inclusive) override;
|
||||
|
||||
T
|
||||
std::optional<T>
|
||||
Reverse_Lookup(size_t offset) const override;
|
||||
|
||||
int64_t
|
||||
|
@ -267,7 +267,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
|
||||
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
||||
TargetBitmap valid_bitset;
|
||||
TargetBitmap valid_bitset_;
|
||||
};
|
||||
|
||||
} // namespace index
|
||||
|
|
|
@ -67,10 +67,12 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
}
|
||||
|
||||
void
|
||||
Build(size_t n, const T* values) override {
|
||||
Build(size_t n,
|
||||
const T* values,
|
||||
const bool* valid_data = nullptr) override {
|
||||
SelectIndexBuildType(n, values);
|
||||
auto index = GetInternalIndex();
|
||||
index->Build(n, values);
|
||||
index->Build(n, values, valid_data);
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
|
@ -133,7 +135,7 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||
lower_bound_value, lb_inclusive, upper_bound_value, ub_inclusive);
|
||||
}
|
||||
|
||||
T
|
||||
std::optional<T>
|
||||
Reverse_Lookup(size_t offset) const override {
|
||||
return internal_index_->Reverse_Lookup(offset);
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
* deprecated, only used in small chunk index.
|
||||
*/
|
||||
void
|
||||
Build(size_t n, const T* values) override {
|
||||
Build(size_t n, const T* values, const bool* valid_data) override {
|
||||
PanicInfo(ErrorCode::NotImplemented, "Build should not be called");
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||
return false;
|
||||
}
|
||||
|
||||
T
|
||||
std::optional<T>
|
||||
Reverse_Lookup(size_t offset) const override {
|
||||
PanicInfo(ErrorCode::NotImplemented,
|
||||
"Reverse_Lookup should not be handled by inverted index");
|
||||
|
|
|
@ -80,7 +80,7 @@ class ScalarIndex : public IndexBase {
|
|||
GetIndexType() const = 0;
|
||||
|
||||
virtual void
|
||||
Build(size_t n, const T* values) = 0;
|
||||
Build(size_t n, const T* values, const bool* valid_data = nullptr) = 0;
|
||||
|
||||
virtual const TargetBitmap
|
||||
In(size_t n, const T* values) = 0;
|
||||
|
@ -117,7 +117,7 @@ class ScalarIndex : public IndexBase {
|
|||
T upper_bound_value,
|
||||
bool ub_inclusive) = 0;
|
||||
|
||||
virtual T
|
||||
virtual std::optional<T>
|
||||
Reverse_Lookup(size_t offset) const = 0;
|
||||
|
||||
virtual const TargetBitmap
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <pb/schema.pb.h>
|
||||
#include <vector>
|
||||
|
@ -61,7 +62,7 @@ ScalarIndexSort<T>::Build(const Config& config) {
|
|||
|
||||
template <typename T>
|
||||
void
|
||||
ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
||||
ScalarIndexSort<T>::Build(size_t n, const T* values, const bool* valid_data) {
|
||||
if (is_built_)
|
||||
return;
|
||||
if (n == 0) {
|
||||
|
@ -69,13 +70,17 @@ ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
|||
}
|
||||
data_.reserve(n);
|
||||
total_num_rows_ = n;
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||
idx_to_offsets_.resize(n);
|
||||
|
||||
T* p = const_cast<T*>(values);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
data_.emplace_back(IndexStructure(*p++, i));
|
||||
valid_bitset.set(i);
|
||||
for (size_t i = 0; i < n; ++i, ++p) {
|
||||
if (!valid_data || valid_data[i]) {
|
||||
data_.emplace_back(IndexStructure(*p, i));
|
||||
valid_bitset_.set(i);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(data_.begin(), data_.end());
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
|
@ -97,7 +102,7 @@ ScalarIndexSort<T>::BuildWithFieldData(
|
|||
}
|
||||
|
||||
data_.reserve(length);
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||
int64_t offset = 0;
|
||||
for (const auto& data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
|
@ -105,7 +110,7 @@ ScalarIndexSort<T>::BuildWithFieldData(
|
|||
if (data->is_valid(i)) {
|
||||
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_.emplace_back(IndexStructure(*value, offset));
|
||||
valid_bitset.set(offset);
|
||||
valid_bitset_.set(offset);
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
@ -175,11 +180,11 @@ ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
|
|||
index_num_rows->data.get(),
|
||||
(size_t)index_num_rows->size);
|
||||
idx_to_offsets_.resize(total_num_rows_);
|
||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
||||
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||
memcpy(data_.data(), index_data->data.get(), (size_t)index_data->size);
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
valid_bitset.set(data_[i].idx_);
|
||||
valid_bitset_.set(data_[i].idx_);
|
||||
}
|
||||
|
||||
is_built_ = true;
|
||||
|
@ -256,7 +261,7 @@ ScalarIndexSort<T>::NotIn(const size_t n, const T* values) {
|
|||
}
|
||||
}
|
||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||
bitset &= valid_bitset;
|
||||
bitset &= valid_bitset_;
|
||||
return bitset;
|
||||
}
|
||||
|
||||
|
@ -265,7 +270,7 @@ const TargetBitmap
|
|||
ScalarIndexSort<T>::IsNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(total_num_rows_, true);
|
||||
bitset &= valid_bitset;
|
||||
bitset &= valid_bitset_;
|
||||
bitset.flip();
|
||||
return bitset;
|
||||
}
|
||||
|
@ -275,7 +280,7 @@ const TargetBitmap
|
|||
ScalarIndexSort<T>::IsNotNull() {
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmap bitset(total_num_rows_, true);
|
||||
bitset &= valid_bitset;
|
||||
bitset &= valid_bitset_;
|
||||
return bitset;
|
||||
}
|
||||
|
||||
|
@ -355,11 +360,14 @@ ScalarIndexSort<T>::Range(T lower_bound_value,
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
std::optional<T>
|
||||
ScalarIndexSort<T>::Reverse_Lookup(size_t idx) const {
|
||||
AssertInfo(idx < idx_to_offsets_.size(), "out of range of total count");
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
|
||||
if (!valid_bitset_[idx]) {
|
||||
return std::nullopt;
|
||||
}
|
||||
auto offset = idx_to_offsets_[idx];
|
||||
return data_[offset].a_;
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
}
|
||||
|
||||
void
|
||||
Build(size_t n, const T* values) override;
|
||||
Build(size_t n, const T* values, const bool* valid_data = nullptr) override;
|
||||
|
||||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
@ -82,7 +82,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
T upper_bound_value,
|
||||
bool ub_inclusive) override;
|
||||
|
||||
T
|
||||
std::optional<T>
|
||||
Reverse_Lookup(size_t offset) const override;
|
||||
|
||||
int64_t
|
||||
|
@ -127,8 +127,8 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||
std::vector<IndexStructure<T>> data_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
size_t total_num_rows_{0};
|
||||
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
||||
TargetBitmap valid_bitset;
|
||||
// generate valid_bitset_ to speed up NotIn and IsNull and IsNotNull operate
|
||||
TargetBitmap valid_bitset_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <boost/uuid/uuid_generators.hpp>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
|
@ -118,7 +119,9 @@ StringIndexMarisa::BuildWithFieldData(
|
|||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::Build(size_t n, const std::string* values) {
|
||||
StringIndexMarisa::Build(size_t n,
|
||||
const std::string* values,
|
||||
const bool* valid_data) {
|
||||
if (built_) {
|
||||
PanicInfo(IndexAlreadyBuild, "index has been built");
|
||||
}
|
||||
|
@ -127,12 +130,14 @@ StringIndexMarisa::Build(size_t n, const std::string* values) {
|
|||
{
|
||||
// fill key set.
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
keyset.push_back(values[i].c_str());
|
||||
if (valid_data == nullptr || valid_data[i]) {
|
||||
keyset.push_back(values[i].c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trie_.build(keyset, MARISA_LABEL_ORDER);
|
||||
fill_str_ids(n, values);
|
||||
fill_str_ids(n, values, valid_data);
|
||||
fill_offsets();
|
||||
|
||||
built_ = true;
|
||||
|
@ -213,7 +218,7 @@ StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
|
|||
|
||||
auto str_ids = set.GetByName(MARISA_STR_IDS);
|
||||
auto str_ids_len = str_ids->size;
|
||||
str_ids_.resize(str_ids_len / sizeof(size_t));
|
||||
str_ids_.resize(str_ids_len / sizeof(size_t), MARISA_NULL_KEY_ID);
|
||||
memcpy(str_ids_.data(), str_ids->data.get(), str_ids_len);
|
||||
|
||||
fill_offsets();
|
||||
|
@ -491,9 +496,14 @@ StringIndexMarisa::PrefixMatch(std::string_view prefix) {
|
|||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::fill_str_ids(size_t n, const std::string* values) {
|
||||
str_ids_.resize(n);
|
||||
StringIndexMarisa::fill_str_ids(size_t n,
|
||||
const std::string* values,
|
||||
const bool* valid_data) {
|
||||
str_ids_.resize(n, MARISA_NULL_KEY_ID);
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (valid_data != nullptr && !valid_data[i]) {
|
||||
continue;
|
||||
}
|
||||
auto str = values[i];
|
||||
auto str_id = lookup(str);
|
||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
||||
|
@ -534,11 +544,13 @@ StringIndexMarisa::prefix_match(const std::string_view prefix) {
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string
|
||||
std::optional<std::string>
|
||||
StringIndexMarisa::Reverse_Lookup(size_t offset) const {
|
||||
AssertInfo(offset < str_ids_.size(), "out of range of total count");
|
||||
marisa::Agent agent;
|
||||
if (str_ids_[offset] < 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
agent.set_query(str_ids_[offset]);
|
||||
trie_.reverse_lookup(agent);
|
||||
return std::string(agent.key().ptr(), agent.key().length());
|
||||
|
|
|
@ -55,7 +55,9 @@ class StringIndexMarisa : public StringIndex {
|
|||
}
|
||||
|
||||
void
|
||||
Build(size_t n, const std::string* values) override;
|
||||
Build(size_t n,
|
||||
const std::string* values,
|
||||
const bool* valid_data = nullptr) override;
|
||||
|
||||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
@ -87,7 +89,7 @@ class StringIndexMarisa : public StringIndex {
|
|||
const TargetBitmap
|
||||
PrefixMatch(const std::string_view prefix) override;
|
||||
|
||||
std::string
|
||||
std::optional<std::string>
|
||||
Reverse_Lookup(size_t offset) const override;
|
||||
|
||||
BinarySet
|
||||
|
@ -100,7 +102,7 @@ class StringIndexMarisa : public StringIndex {
|
|||
|
||||
private:
|
||||
void
|
||||
fill_str_ids(size_t n, const std::string* values);
|
||||
fill_str_ids(size_t n, const std::string* values, const bool* valid_data);
|
||||
|
||||
void
|
||||
fill_offsets();
|
||||
|
@ -122,7 +124,7 @@ class StringIndexMarisa : public StringIndex {
|
|||
private:
|
||||
Config config_;
|
||||
marisa::Trie trie_;
|
||||
std::vector<size_t> str_ids_; // used to retrieve.
|
||||
std::vector<int64_t> str_ids_; // used to retrieve.
|
||||
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
||||
bool built_ = false;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
|
|
|
@ -26,7 +26,7 @@ template <typename T>
|
|||
inline index::ScalarIndexPtr<T>
|
||||
generate_scalar_index(Span<T> data) {
|
||||
auto indexing = std::make_unique<index::ScalarIndexSort<T>>();
|
||||
indexing->Build(data.row_count(), data.data());
|
||||
indexing->Build(data.row_count(), data.data(), data.valid_data());
|
||||
return indexing;
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ template <>
|
|||
inline index::ScalarIndexPtr<std::string>
|
||||
generate_scalar_index(Span<std::string> data) {
|
||||
auto indexing = index::CreateStringIndexSort();
|
||||
indexing->Build(data.row_count(), data.data());
|
||||
indexing->Build(data.row_count(), data.data(), data.valid_data());
|
||||
return indexing;
|
||||
}
|
||||
|
||||
|
|
|
@ -196,8 +196,9 @@ ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||
int64_index->HasRawData()) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
insert_record_.insert_pk(int64_index->Reverse_Lookup(i),
|
||||
i);
|
||||
auto raw = int64_index->Reverse_Lookup(i);
|
||||
AssertInfo(raw.has_value(), "pk not found");
|
||||
insert_record_.insert_pk(raw.value(), i);
|
||||
}
|
||||
insert_record_.seal_pks();
|
||||
}
|
||||
|
@ -210,8 +211,9 @@ ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||
string_index->HasRawData()) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
insert_record_.insert_pk(
|
||||
string_index->Reverse_Lookup(i), i);
|
||||
auto raw = string_index->Reverse_Lookup(i);
|
||||
AssertInfo(raw.has_value(), "pk not found");
|
||||
insert_record_.insert_pk(raw.value(), i);
|
||||
}
|
||||
insert_record_.seal_pks();
|
||||
}
|
||||
|
@ -1630,7 +1632,11 @@ ChunkedSegmentSealedImpl::CreateTextIndex(FieldId field_id) {
|
|||
"converted to string index");
|
||||
auto n = impl->Size();
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
index->AddText(impl->Reverse_Lookup(i), i);
|
||||
auto raw = impl->Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
continue;
|
||||
}
|
||||
index->AddText(raw.value(), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -299,6 +299,7 @@ ScalarFieldIndexing<T>::BuildIndexRange(int64_t ack_beg,
|
|||
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
|
||||
auto chunk_data = source->get_chunk_data(chunk_id);
|
||||
// build index for chunk
|
||||
// seem no lint, not pass valid_data here
|
||||
// TODO
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
auto indexing = index::CreateStringIndexSort();
|
||||
|
|
|
@ -198,8 +198,9 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||
int64_index->HasRawData()) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
insert_record_.insert_pk(int64_index->Reverse_Lookup(i),
|
||||
i);
|
||||
auto raw = int64_index->Reverse_Lookup(i);
|
||||
AssertInfo(raw.has_value(), "Primary key not found");
|
||||
insert_record_.insert_pk(raw.value(), i);
|
||||
}
|
||||
insert_record_.seal_pks();
|
||||
}
|
||||
|
@ -212,8 +213,9 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||
string_index->HasRawData()) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
insert_record_.insert_pk(
|
||||
string_index->Reverse_Lookup(i), i);
|
||||
auto raw = string_index->Reverse_Lookup(i);
|
||||
AssertInfo(raw.has_value(), "Primary key not found");
|
||||
insert_record_.insert_pk(raw.value(), i);
|
||||
}
|
||||
insert_record_.seal_pks();
|
||||
}
|
||||
|
@ -2108,7 +2110,11 @@ SegmentSealedImpl::CreateTextIndex(FieldId field_id) {
|
|||
"converted to string index");
|
||||
auto n = impl->Size();
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
index->AddText(impl->Reverse_Lookup(i), i);
|
||||
auto raw = impl->Reverse_Lookup(i);
|
||||
if (!raw.has_value()) {
|
||||
continue;
|
||||
}
|
||||
index->AddText(raw.value(), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -683,6 +683,11 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(static_cast<milvus::proto::schema::DataType>(
|
||||
field_meta.get_data_type()));
|
||||
auto nullable = field_meta.is_nullable();
|
||||
std::vector<bool> valid_data;
|
||||
if (nullable) {
|
||||
valid_data.resize(count);
|
||||
}
|
||||
|
||||
auto scalar_array = data_array->mutable_scalars();
|
||||
switch (data_type) {
|
||||
|
@ -691,7 +696,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<bool> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_bool_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -702,7 +716,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int8_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -713,7 +736,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int16_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -724,7 +756,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int32_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -735,7 +776,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int64_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_long_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -746,7 +796,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<float> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_float_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -757,7 +816,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<double> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_double_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -768,7 +836,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<std::string> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
// if has no value, means nullable must be true, no need to check nullable again here
|
||||
if (!raw.has_value()) {
|
||||
valid_data[i] = false;
|
||||
continue;
|
||||
}
|
||||
if (nullable) {
|
||||
valid_data[i] = true;
|
||||
}
|
||||
raw_data[i] = raw.value();
|
||||
}
|
||||
auto obj = scalar_array->mutable_string_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
|
@ -780,6 +857,11 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||
}
|
||||
}
|
||||
|
||||
if (nullable) {
|
||||
*(data_array->mutable_valid_data()) = {valid_data.begin(),
|
||||
valid_data.end()};
|
||||
}
|
||||
|
||||
return data_array;
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -166,7 +166,8 @@ GenAlwaysFalseExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|||
}
|
||||
|
||||
auto
|
||||
GenAlwaysTrueExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
GenAlwaysTrueExprIfValid(const FieldMeta& fvec_meta,
|
||||
const FieldMeta& str_meta) {
|
||||
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
||||
auto not_expr = GenNotExpr();
|
||||
not_expr->set_allocated_child(always_false_expr);
|
||||
|
@ -196,7 +197,7 @@ GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|||
|
||||
auto
|
||||
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||
auto always_true_expr = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
||||
auto always_true_expr = GenAlwaysTrueExprIfValid(fvec_meta, str_meta);
|
||||
proto::plan::VectorType vector_type;
|
||||
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
vector_type = proto::plan::VectorType::FloatVector;
|
||||
|
@ -299,6 +300,82 @@ TEST(StringExpr, Term) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, TermNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto vec_2k_3k = []() -> std::vector<std::string> {
|
||||
std::vector<std::string> ret;
|
||||
for (int i = 2000; i < 3000; i++) {
|
||||
ret.push_back(std::to_string(i));
|
||||
}
|
||||
return ret;
|
||||
}();
|
||||
|
||||
std::map<int, std::vector<std::string>> terms = {
|
||||
{0, {"2000", "3000"}},
|
||||
{1, {"2000"}},
|
||||
{2, {"3000"}},
|
||||
{3, {}},
|
||||
{4, {vec_2k_3k}},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
FixedVector<bool> valid_data;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||
auto end = FIELD_DATA(new_str_col, string).end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||
valid_data.insert(valid_data.end(),
|
||||
new_str_valid_col.begin(),
|
||||
new_str_valid_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
for (const auto& [_, term] : terms) {
|
||||
auto plan_proto = GenTermPlan(fvec_meta, str_meta, term);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(
|
||||
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||
seg_promote,
|
||||
N * num_iters,
|
||||
MAX_TIMESTAMP);
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
if (!valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
continue;
|
||||
}
|
||||
auto val = str_col[i];
|
||||
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
||||
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, Compare) {
|
||||
auto schema = GenTestSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
|
@ -395,6 +472,7 @@ TEST(StringExpr, Compare) {
|
|||
for (const auto& [op, ref_func] : testcases) {
|
||||
auto plan_proto = gen_compare_plan(op);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(
|
||||
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||
|
@ -414,6 +492,269 @@ TEST(StringExpr, Compare) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, CompareNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
|
||||
|
||||
auto gen_compare_plan =
|
||||
[&, fvec_meta, str_meta, another_str_meta](
|
||||
proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto str_col_info =
|
||||
test::GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto another_str_col_info =
|
||||
test::GenColumnInfo(another_str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
|
||||
auto compare_expr = GenCompareExpr(op);
|
||||
compare_expr->set_allocated_left_column_info(str_col_info);
|
||||
compare_expr->set_allocated_right_column_info(another_str_col_info);
|
||||
|
||||
auto expr = test::GenExpr().release();
|
||||
expr->set_allocated_compare_expr(compare_expr);
|
||||
|
||||
proto::plan::VectorType vector_type;
|
||||
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
vector_type = proto::plan::VectorType::FloatVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
vector_type = proto::plan::VectorType::BinaryVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||
vector_type = proto::plan::VectorType::Float16Vector;
|
||||
}
|
||||
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return plan_node;
|
||||
};
|
||||
|
||||
std::vector<std::tuple<proto::plan::OpType,
|
||||
std::function<bool(std::string&, std::string&)>>>
|
||||
testcases{
|
||||
{proto::plan::OpType::GreaterThan,
|
||||
[](std::string& v1, std::string& v2) { return v1 > v2; }},
|
||||
{proto::plan::OpType::GreaterEqual,
|
||||
[](std::string& v1, std::string& v2) { return v1 >= v2; }},
|
||||
{proto::plan::OpType::LessThan,
|
||||
[](std::string& v1, std::string& v2) { return v1 < v2; }},
|
||||
{proto::plan::OpType::LessEqual,
|
||||
[](std::string& v1, std::string& v2) { return v1 <= v2; }},
|
||||
{proto::plan::OpType::Equal,
|
||||
[](std::string& v1, std::string& v2) { return v1 == v2; }},
|
||||
{proto::plan::OpType::NotEqual,
|
||||
[](std::string& v1, std::string& v2) { return v1 != v2; }},
|
||||
{proto::plan::OpType::PrefixMatch,
|
||||
[](std::string& v1, std::string& v2) {
|
||||
return PrefixMatch(v1, v2);
|
||||
}},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
std::vector<std::string> another_str_col;
|
||||
FixedVector<bool> valid_data;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
|
||||
auto reserve_col = [&, raw_data](const FieldMeta& field_meta,
|
||||
std::vector<std::string>& str_col) {
|
||||
auto new_str_col = raw_data.get_col(field_meta.get_id());
|
||||
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||
auto end = FIELD_DATA(new_str_col, string).end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
};
|
||||
|
||||
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||
valid_data.insert(valid_data.end(),
|
||||
new_str_valid_col.begin(),
|
||||
new_str_valid_col.end());
|
||||
|
||||
reserve_col(str_meta, str_col);
|
||||
reserve_col(another_str_meta, another_str_col);
|
||||
|
||||
{
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
for (const auto& [op, ref_func] : testcases) {
|
||||
auto plan_proto = gen_compare_plan(op);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(
|
||||
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||
seg_promote,
|
||||
N * num_iters,
|
||||
MAX_TIMESTAMP);
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
if (!valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
continue;
|
||||
}
|
||||
auto val = str_col[i];
|
||||
auto another_val = another_str_col[i];
|
||||
auto ref = ref_func(val, another_val);
|
||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, CompareNullable2) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR, true);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
|
||||
|
||||
auto gen_compare_plan =
|
||||
[&, fvec_meta, str_meta, another_str_meta](
|
||||
proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto str_col_info =
|
||||
test::GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto another_str_col_info =
|
||||
test::GenColumnInfo(another_str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
|
||||
auto compare_expr = GenCompareExpr(op);
|
||||
compare_expr->set_allocated_left_column_info(str_col_info);
|
||||
compare_expr->set_allocated_right_column_info(another_str_col_info);
|
||||
|
||||
auto expr = test::GenExpr().release();
|
||||
expr->set_allocated_compare_expr(compare_expr);
|
||||
|
||||
proto::plan::VectorType vector_type;
|
||||
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
vector_type = proto::plan::VectorType::FloatVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
vector_type = proto::plan::VectorType::BinaryVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||
vector_type = proto::plan::VectorType::Float16Vector;
|
||||
}
|
||||
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return plan_node;
|
||||
};
|
||||
|
||||
std::vector<std::tuple<proto::plan::OpType,
|
||||
std::function<bool(std::string&, std::string&)>>>
|
||||
testcases{
|
||||
{proto::plan::OpType::GreaterThan,
|
||||
[](std::string& v1, std::string& v2) { return v1 > v2; }},
|
||||
{proto::plan::OpType::GreaterEqual,
|
||||
[](std::string& v1, std::string& v2) { return v1 >= v2; }},
|
||||
{proto::plan::OpType::LessThan,
|
||||
[](std::string& v1, std::string& v2) { return v1 < v2; }},
|
||||
{proto::plan::OpType::LessEqual,
|
||||
[](std::string& v1, std::string& v2) { return v1 <= v2; }},
|
||||
{proto::plan::OpType::Equal,
|
||||
[](std::string& v1, std::string& v2) { return v1 == v2; }},
|
||||
{proto::plan::OpType::NotEqual,
|
||||
[](std::string& v1, std::string& v2) { return v1 != v2; }},
|
||||
{proto::plan::OpType::PrefixMatch,
|
||||
[](std::string& v1, std::string& v2) {
|
||||
return PrefixMatch(v1, v2);
|
||||
}},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
std::vector<std::string> another_str_col;
|
||||
FixedVector<bool> valid_data;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
|
||||
auto reserve_col = [&, raw_data](const FieldMeta& field_meta,
|
||||
std::vector<std::string>& str_col) {
|
||||
auto new_str_col = raw_data.get_col(field_meta.get_id());
|
||||
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||
auto end = FIELD_DATA(new_str_col, string).end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
};
|
||||
|
||||
auto new_str_valid_col =
|
||||
raw_data.get_col_valid(another_str_meta.get_id());
|
||||
valid_data.insert(valid_data.end(),
|
||||
new_str_valid_col.begin(),
|
||||
new_str_valid_col.end());
|
||||
|
||||
reserve_col(str_meta, str_col);
|
||||
reserve_col(another_str_meta, another_str_col);
|
||||
|
||||
{
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
for (const auto& [op, ref_func] : testcases) {
|
||||
auto plan_proto = gen_compare_plan(op);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(
|
||||
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||
seg_promote,
|
||||
N * num_iters,
|
||||
MAX_TIMESTAMP);
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
if (!valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
continue;
|
||||
}
|
||||
auto val = str_col[i];
|
||||
auto another_val = another_str_col[i];
|
||||
auto ref = ref_func(val, another_val);
|
||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, UnaryRange) {
|
||||
auto schema = GenTestSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
|
@ -510,6 +851,116 @@ TEST(StringExpr, UnaryRange) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, UnaryRangeNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto gen_unary_range_plan =
|
||||
[&, fvec_meta, str_meta](
|
||||
proto::plan::OpType op,
|
||||
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto unary_range_expr = test::GenUnaryRangeExpr(op, value);
|
||||
unary_range_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = test::GenExpr().release();
|
||||
expr->set_allocated_unary_range_expr(unary_range_expr);
|
||||
|
||||
proto::plan::VectorType vector_type;
|
||||
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
vector_type = proto::plan::VectorType::FloatVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
vector_type = proto::plan::VectorType::BinaryVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||
vector_type = proto::plan::VectorType::Float16Vector;
|
||||
}
|
||||
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return plan_node;
|
||||
};
|
||||
|
||||
std::vector<std::tuple<proto::plan::OpType,
|
||||
std::string,
|
||||
std::function<bool(std::string&)>>>
|
||||
testcases{
|
||||
{proto::plan::OpType::GreaterThan,
|
||||
"2000",
|
||||
[](std::string& val) { return val > "2000"; }},
|
||||
{proto::plan::OpType::GreaterEqual,
|
||||
"2000",
|
||||
[](std::string& val) { return val >= "2000"; }},
|
||||
{proto::plan::OpType::LessThan,
|
||||
"3000",
|
||||
[](std::string& val) { return val < "3000"; }},
|
||||
{proto::plan::OpType::LessEqual,
|
||||
"3000",
|
||||
[](std::string& val) { return val <= "3000"; }},
|
||||
{proto::plan::OpType::PrefixMatch,
|
||||
"a",
|
||||
[](std::string& val) { return PrefixMatch(val, "a"); }},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
FixedVector<bool> valid_data;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||
auto end = FIELD_DATA(new_str_col, string).end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||
valid_data.insert(valid_data.end(),
|
||||
new_str_valid_col.begin(),
|
||||
new_str_valid_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
for (const auto& [op, value, ref_func] : testcases) {
|
||||
auto plan_proto = gen_unary_range_plan(op, value);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(
|
||||
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||
seg_promote,
|
||||
N * num_iters,
|
||||
MAX_TIMESTAMP);
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
if (!valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
continue;
|
||||
}
|
||||
auto val = str_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref)
|
||||
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, BinaryRange) {
|
||||
auto schema = GenTestSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
|
@ -625,6 +1076,136 @@ TEST(StringExpr, BinaryRange) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(StringExpr, BinaryRangeNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto gen_binary_range_plan =
|
||||
[&, fvec_meta, str_meta](
|
||||
bool lb_inclusive,
|
||||
bool ub_inclusive,
|
||||
std::string lb,
|
||||
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
|
||||
proto::schema::DataType::VarChar,
|
||||
false,
|
||||
false);
|
||||
auto binary_range_expr =
|
||||
GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
|
||||
binary_range_expr->set_allocated_column_info(column_info);
|
||||
|
||||
auto expr = test::GenExpr().release();
|
||||
expr->set_allocated_binary_range_expr(binary_range_expr);
|
||||
|
||||
proto::plan::VectorType vector_type;
|
||||
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
vector_type = proto::plan::VectorType::FloatVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
vector_type = proto::plan::VectorType::BinaryVector;
|
||||
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||
vector_type = proto::plan::VectorType::Float16Vector;
|
||||
}
|
||||
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||
|
||||
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||
plan_node->set_allocated_vector_anns(anns);
|
||||
return plan_node;
|
||||
};
|
||||
|
||||
// bool lb_inclusive, bool ub_inclusive, std::string lb, std::string ub
|
||||
std::vector<std::tuple<bool,
|
||||
bool,
|
||||
std::string,
|
||||
std::string,
|
||||
std::function<bool(std::string&)>>>
|
||||
testcases{
|
||||
{false,
|
||||
false,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string& val) { return val > "2000" && val < "3000"; }},
|
||||
{false,
|
||||
true,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string& val) { return val > "2000" && val <= "3000"; }},
|
||||
{true,
|
||||
false,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string& val) { return val >= "2000" && val < "3000"; }},
|
||||
{true,
|
||||
true,
|
||||
"2000",
|
||||
"3000",
|
||||
[](std::string& val) { return val >= "2000" && val <= "3000"; }},
|
||||
{true,
|
||||
true,
|
||||
"2000",
|
||||
"1000",
|
||||
[](std::string& val) { return false; }},
|
||||
};
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> str_col;
|
||||
FixedVector<bool> valid_data;
|
||||
int num_iters = 100;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||
auto end = FIELD_DATA(new_str_col, string).end();
|
||||
str_col.insert(str_col.end(), begin, end);
|
||||
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||
valid_data.insert(valid_data.end(),
|
||||
new_str_valid_col.begin(),
|
||||
new_str_valid_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] :
|
||||
testcases) {
|
||||
auto plan_proto =
|
||||
gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
|
||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(
|
||||
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||
seg_promote,
|
||||
N * num_iters,
|
||||
MAX_TIMESTAMP);
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
if (!valid_data[i]) {
|
||||
ASSERT_EQ(ans, false);
|
||||
continue;
|
||||
}
|
||||
auto val = str_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref)
|
||||
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
||||
<< "@" << ub << "@" << i << "!!" << val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
|
||||
auto schema = GenStrPKSchema();
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
|
@ -718,7 +1299,7 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
|||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto expr_proto = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
||||
auto expr_proto = GenAlwaysTrueExprIfValid(fvec_meta, str_meta);
|
||||
auto plan_proto = GenPlanNode();
|
||||
plan_proto->mutable_query()->set_allocated_predicates(expr_proto);
|
||||
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
||||
|
@ -733,4 +1314,47 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
|||
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
||||
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(),
|
||||
N);
|
||||
ASSERT_EQ(retrieved->fields_data(0).valid_data_size(), 0);
|
||||
}
|
||||
|
||||
TEST(AlwaysTrueStringPlan, QueryWithOutputFieldsNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||
schema->AddDebugField(
|
||||
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||
schema->set_primary_field_id(pk);
|
||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||
|
||||
auto N = 10000;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
||||
auto str_col =
|
||||
dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||
auto valid_data = dataset.get_col_valid(str_meta.get_id());
|
||||
auto segment = CreateGrowingSegment(schema, empty_index_meta);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0,
|
||||
N,
|
||||
dataset.row_ids_.data(),
|
||||
dataset.timestamps_.data(),
|
||||
dataset.raw_);
|
||||
|
||||
auto expr_proto = GenAlwaysTrueExprIfValid(fvec_meta, str_meta);
|
||||
auto plan_proto = GenPlanNode();
|
||||
plan_proto->mutable_query()->set_allocated_predicates(expr_proto);
|
||||
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
||||
auto plan = ProtoParser(*schema).CreateRetrievePlan(*plan_proto);
|
||||
|
||||
Timestamp time = MAX_TIMESTAMP;
|
||||
|
||||
auto retrieved = segment->Retrieve(
|
||||
nullptr, plan.get(), time, DEFAULT_MAX_OUTPUT_SIZE, false);
|
||||
ASSERT_EQ(retrieved->offset().size(), N / 2);
|
||||
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
||||
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(),
|
||||
N / 2);
|
||||
ASSERT_EQ(retrieved->fields_data(0).valid_data().size(), N / 2);
|
||||
}
|
||||
|
|
|
@ -139,7 +139,9 @@ template <typename T>
|
|||
inline void
|
||||
assert_reverse(ScalarIndex<T>* index, const std::vector<T>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_EQ(index->Reverse_Lookup(offset), arr[offset]);
|
||||
auto raw = index->Reverse_Lookup(offset);
|
||||
ASSERT_TRUE(raw.has_value());
|
||||
ASSERT_EQ(raw.value(), arr[offset]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -147,7 +149,9 @@ template <>
|
|||
inline void
|
||||
assert_reverse(ScalarIndex<float>* index, const std::vector<float>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_TRUE(compare_float(index->Reverse_Lookup(offset), arr[offset]));
|
||||
auto raw = index->Reverse_Lookup(offset);
|
||||
ASSERT_TRUE(raw.has_value());
|
||||
ASSERT_TRUE(compare_float(raw.value(), arr[offset]));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -155,7 +159,9 @@ template <>
|
|||
inline void
|
||||
assert_reverse(ScalarIndex<double>* index, const std::vector<double>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_TRUE(compare_double(index->Reverse_Lookup(offset), arr[offset]));
|
||||
auto raw = index->Reverse_Lookup(offset);
|
||||
ASSERT_TRUE(raw.has_value());
|
||||
ASSERT_TRUE(compare_double(raw.value(), arr[offset]));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,7 +170,9 @@ inline void
|
|||
assert_reverse(ScalarIndex<std::string>* index,
|
||||
const std::vector<std::string>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_TRUE(arr[offset].compare(index->Reverse_Lookup(offset)) == 0);
|
||||
auto raw = index->Reverse_Lookup(offset);
|
||||
ASSERT_TRUE(raw.has_value());
|
||||
ASSERT_TRUE(arr[offset].compare(raw.value()) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -667,8 +667,14 @@ DataGenForJsonArray(SchemaPtr schema,
|
|||
auto insert_data = std::make_unique<InsertRecordProto>();
|
||||
auto insert_cols = [&insert_data](
|
||||
auto& data, int64_t count, auto& field_meta) {
|
||||
FixedVector<bool> valid_data(count);
|
||||
if (field_meta.is_nullable()) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
valid_data[i] = i % 2 == 0 ? true : false;
|
||||
}
|
||||
}
|
||||
auto array = milvus::segcore::CreateDataArrayFrom(
|
||||
data.data(), nullptr, count, field_meta);
|
||||
data.data(), valid_data.data(), count, field_meta);
|
||||
insert_data->mutable_fields_data()->AddAllocated(array.release());
|
||||
};
|
||||
for (auto field_id : schema->get_field_ids()) {
|
||||
|
|
|
@ -13019,7 +13019,6 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.GPU)
|
||||
@pytest.mark.skip(reason="issue #36184")
|
||||
def test_search_after_different_index_with_params_none_default_data(self, varchar_scalar_index, numeric_scalar_index,
|
||||
null_data_percent, _async):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue