fix: Fix the bug that null data can not filtered by null expr (#41135)

issue: #41063 
pr: #41124

Signed-off-by: sunby <sunbingyi1992@gmail.com>
pull/41149/head
Bingyi Sun 2025-04-08 00:26:26 +08:00 committed by GitHub
parent 281a4b0300
commit 9eb74d7418
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 27 additions and 5 deletions

View File

@ -85,6 +85,16 @@ class EvalCtx {
bitmap_input_.clear();
}
void
set_apply_valid_data_after_flip(bool apply_valid_data_after_flip) {
apply_valid_data_after_flip_ = apply_valid_data_after_flip;
}
bool
get_apply_valid_data_after_flip() const {
return apply_valid_data_after_flip_;
}
private:
ExecContext* exec_ctx_ = nullptr;
ExprSet* expr_set_ = nullptr;
@ -94,6 +104,9 @@ class EvalCtx {
// used for expr pre filter, that avoid unnecessary execution on filtered data
TargetBitmap bitmap_input_;
// for some expr(eg. exists), we do not need to apply valid data after flip
bool apply_valid_data_after_flip_ = true;
};
} // namespace exec

View File

@ -24,6 +24,7 @@ namespace exec {
void
PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
context.set_apply_valid_data_after_flip(false);
auto input = context.get_offset_input();
SetHasOffsetInput((input != nullptr));
switch (expr_->column_.data_type_) {

View File

@ -30,9 +30,11 @@ PhyLogicalUnaryExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto flat_vec = GetColumnVector(result);
TargetBitmapView data(flat_vec->GetRawData(), flat_vec->size());
data.flip();
TargetBitmapView valid_data(flat_vec->GetValidRawData(),
flat_vec->size());
data &= valid_data;
if (context.get_apply_valid_data_after_flip()) {
TargetBitmapView valid_data(flat_vec->GetValidRawData(),
flat_vec->size());
data &= valid_data;
}
}
}

View File

@ -9,6 +9,7 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <algorithm>
#include <boost/format.hpp>
#include <fstream>
#include <gtest/gtest.h>
@ -16744,6 +16745,8 @@ TEST_P(JsonIndexExistsTest, TestExistsExpr) {
test_cases = {
{{"a"}, true, 0b1111111000000100},
{{"a", "b"}, true, 0b0000100000000000},
{{"a"}, false, 0b0000000111111011},
{{"a", "b"}, false, 0b1111011111111111},
};
auto json_index_path = GetParam();
@ -16752,7 +16755,7 @@ TEST_P(JsonIndexExistsTest, TestExistsExpr) {
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
auto json_fid = schema->AddDebugField("json", DataType::JSON);
auto json_fid = schema->AddDebugField("json", DataType::JSON, true);
schema->set_primary_field_id(i64_fid);
auto seg = CreateSealedSegment(schema);
@ -16774,12 +16777,15 @@ TEST_P(JsonIndexExistsTest, TestExistsExpr) {
static_cast<json_index_type*>(inv_index.release()));
auto json_field =
std::make_shared<FieldData<milvus::Json>>(DataType::JSON, false);
std::make_shared<FieldData<milvus::Json>>(DataType::JSON, true);
std::vector<milvus::Json> jsons;
for (auto& json_str : json_strs) {
jsons.push_back(milvus::Json(simdjson::padded_string(json_str)));
}
json_field->add_json_data(jsons);
auto json_valid_data = json_field->ValidData();
json_valid_data[0] = 0xFF;
json_valid_data[1] = 0xFE;
json_index->BuildWithFieldData({json_field});
json_index->finish();