mirror of https://github.com/milvus-io/milvus.git
fix: Align brute force search with json index for exists expr (#41056)
issue: #35528 pr: #41004 Signed-off-by: sunby <sunbingyi1992@gmail.com>pull/41071/head
parent
5017e9ab8c
commit
240f766511
|
@ -201,7 +201,9 @@ class Json {
|
|||
|
||||
bool
|
||||
exist(std::string_view pointer) const {
|
||||
return doc().at_pointer(pointer).error() == simdjson::SUCCESS;
|
||||
auto doc = this->doc();
|
||||
auto res = doc.at_pointer(pointer);
|
||||
return res.error() == simdjson::SUCCESS && !res.is_null();
|
||||
}
|
||||
|
||||
// construct JSON pointer with provided path
|
||||
|
|
|
@ -115,7 +115,8 @@ JsonInvertedIndex<T>::build_index_for_json(
|
|||
|
||||
auto exists = path_exists(json_column->dom_doc(), tokens);
|
||||
if (!exists ||
|
||||
json_column->doc().at_pointer(nested_path_).is_null()) {
|
||||
nested_path_ != "" &&
|
||||
json_column->doc().at_pointer(nested_path_).is_null()) {
|
||||
error_recorder_.Record(
|
||||
*json_column, nested_path_, simdjson::NO_SUCH_FIELD);
|
||||
this->null_offset_.push_back(offset);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <regex>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
|
@ -16712,22 +16713,40 @@ TEST(JsonIndexTest, TestJsonNotEqualExpr) {
|
|||
EXPECT_EQ(final.count(), 2 * json_strs.size() - 4);
|
||||
}
|
||||
|
||||
TEST(JsonIndexTest, TestExistsExpr) {
|
||||
std::unordered_map<std::string, bool> json_strs_match = {
|
||||
{R"({"a": 1.0})", true},
|
||||
{R"({"a": "abc"})", true},
|
||||
{R"({"a": 3.0})", true},
|
||||
{R"({"a": true})", true},
|
||||
{R"({"a": {"b": 1}})", true},
|
||||
{R"({"a": []})", true},
|
||||
{R"({"a": ["a", "b"]})", true},
|
||||
{R"({"a": null})", false}, // exists null
|
||||
{R"(1)", false},
|
||||
{R"("abc")", false},
|
||||
{R"(1.0)", false},
|
||||
{R"(true)", false},
|
||||
{R"([1, 2, 3])", false},
|
||||
{R"({"a": 1, "b": 2})", true}};
|
||||
class JsonIndexExistsTest : public ::testing::TestWithParam<std::string> {};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(JsonIndexExistsTestParams,
|
||||
JsonIndexExistsTest,
|
||||
::testing::Values("/a", ""));
|
||||
|
||||
TEST_P(JsonIndexExistsTest, TestExistsExpr) {
|
||||
std::vector<std::string> json_strs = {
|
||||
R"({"a": 1.0})",
|
||||
R"({"a": "abc"})",
|
||||
R"({"a": 3.0})",
|
||||
R"({"a": true})",
|
||||
R"({"a": {"b": 1}})",
|
||||
R"({"a": []})",
|
||||
R"({"a": ["a", "b"]})",
|
||||
R"({"a": null})", // exists null
|
||||
R"(1)",
|
||||
R"("abc")",
|
||||
R"(1.0)",
|
||||
R"(true)",
|
||||
R"([1, 2, 3])",
|
||||
R"({"a": 1, "b": 2})",
|
||||
R"({})",
|
||||
R"(null)",
|
||||
};
|
||||
|
||||
// bool: exists or not
|
||||
std::vector<std::tuple<std::vector<std::string>, bool, uint32_t>>
|
||||
test_cases = {
|
||||
{{"a"}, true, 0b1111111000000100},
|
||||
{{"a", "b"}, true, 0b0000100000000000},
|
||||
};
|
||||
|
||||
auto json_index_path = GetParam();
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
|
@ -16747,7 +16766,7 @@ TEST(JsonIndexTest, TestExistsExpr) {
|
|||
auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex(
|
||||
index::INVERTED_INDEX_TYPE,
|
||||
JsonCastType::DOUBLE,
|
||||
"/a",
|
||||
json_index_path,
|
||||
file_manager_ctx);
|
||||
|
||||
using json_index_type = index::JsonInvertedIndex<double>;
|
||||
|
@ -16757,13 +16776,8 @@ TEST(JsonIndexTest, TestExistsExpr) {
|
|||
auto json_field =
|
||||
std::make_shared<FieldData<milvus::Json>>(DataType::JSON, false);
|
||||
std::vector<milvus::Json> jsons;
|
||||
BitsetType expect;
|
||||
expect.resize(json_strs_match.size());
|
||||
int i = 0;
|
||||
for (auto& [json_str, match] : json_strs_match) {
|
||||
for (auto& json_str : json_strs) {
|
||||
jsons.push_back(milvus::Json(simdjson::padded_string(json_str)));
|
||||
expect.set(i, match);
|
||||
i++;
|
||||
}
|
||||
json_field->add_json_data(jsons);
|
||||
|
||||
|
@ -16774,14 +16788,36 @@ TEST(JsonIndexTest, TestExistsExpr) {
|
|||
load_index_info.field_id = json_fid.get();
|
||||
load_index_info.field_type = DataType::JSON;
|
||||
load_index_info.index = std::move(json_index);
|
||||
load_index_info.index_params = {{JSON_PATH, "/a"}};
|
||||
load_index_info.index_params = {{JSON_PATH, json_index_path}};
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto exists_expr = std::make_shared<expr::ExistsExpr>(
|
||||
expr::ColumnInfo(json_fid, DataType::JSON, {"a"}, true));
|
||||
auto plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID,
|
||||
exists_expr);
|
||||
auto result = ExecuteQueryExpr(
|
||||
plan, seg.get(), json_strs_match.size(), MAX_TIMESTAMP);
|
||||
EXPECT_TRUE(result == expect);
|
||||
auto json_field_data_info =
|
||||
FieldDataInfo(json_fid.get(), json_strs.size(), {json_field});
|
||||
seg->LoadFieldData(json_fid, json_field_data_info);
|
||||
|
||||
for (auto& [nested_path, exists, expect] : test_cases) {
|
||||
BitsetType expect_res;
|
||||
expect_res.resize(json_strs.size());
|
||||
for (int i = json_strs.size() - 1; expect > 0; i--) {
|
||||
expect_res.set(i, (expect & 1) != 0);
|
||||
expect >>= 1;
|
||||
}
|
||||
|
||||
std::shared_ptr<expr::ITypeFilterExpr> exists_expr;
|
||||
if (exists) {
|
||||
exists_expr = std::make_shared<expr::ExistsExpr>(
|
||||
expr::ColumnInfo(json_fid, DataType::JSON, nested_path, true));
|
||||
} else {
|
||||
auto child_expr = std::make_shared<expr::ExistsExpr>(
|
||||
expr::ColumnInfo(json_fid, DataType::JSON, nested_path, true));
|
||||
exists_expr = std::make_shared<expr::LogicalUnaryExpr>(
|
||||
expr::LogicalUnaryExpr::OpType::LogicalNot, child_expr);
|
||||
}
|
||||
auto plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID,
|
||||
exists_expr);
|
||||
auto result =
|
||||
ExecuteQueryExpr(plan, seg.get(), json_strs.size(), MAX_TIMESTAMP);
|
||||
|
||||
EXPECT_TRUE(result == expect_res);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,39 +19,6 @@
|
|||
using namespace milvus;
|
||||
using namespace milvus::index;
|
||||
|
||||
TEST(JsonIndexTest, TestBuildNonExistJsonPath) {
|
||||
std::string json_path = "/hello";
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
auto file_manager_ctx = storage::FileManagerContext();
|
||||
file_manager_ctx.fieldDataMeta.field_schema.set_data_type(
|
||||
milvus::proto::schema::JSON);
|
||||
file_manager_ctx.fieldDataMeta.field_schema.set_fieldid(json_fid.get());
|
||||
file_manager_ctx.fieldDataMeta.field_id = json_fid.get();
|
||||
auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex(
|
||||
index::INVERTED_INDEX_TYPE,
|
||||
JsonCastType::DOUBLE,
|
||||
json_path,
|
||||
file_manager_ctx);
|
||||
auto json_index = std::unique_ptr<JsonInvertedIndex<int32_t>>(
|
||||
static_cast<JsonInvertedIndex<int32_t>*>(inv_index.release()));
|
||||
|
||||
std::vector<std::string> json_raw_data = {R"({"hello": 1})",
|
||||
R"({"world": 2})"};
|
||||
|
||||
std::vector<milvus::Json> jsons;
|
||||
for (auto& json : json_raw_data) {
|
||||
jsons.push_back(milvus::Json(simdjson::padded_string(json)));
|
||||
}
|
||||
|
||||
auto json_field =
|
||||
std::make_shared<FieldData<milvus::Json>>(DataType::JSON, false);
|
||||
json_field->add_json_data(jsons);
|
||||
json_index->BuildWithFieldData({json_field});
|
||||
json_index->finish();
|
||||
json_index->create_reader();
|
||||
}
|
||||
|
||||
TEST(JsonIndexTest, TestJSONErrRecorder) {
|
||||
std::vector<std::string> json_raw_data = {
|
||||
R"(1)",
|
||||
|
|
Loading…
Reference in New Issue