// Copyright (C) 2019-2020 Zilliz. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License #include "common/Consts.h" #include "common/JsonCastType.h" #include "common/Schema.h" #include "common/Types.h" #include "expr/ITypeExpr.h" #include "index/IndexFactory.h" #include "index/JsonInvertedIndex.h" #include "mmap/Types.h" #include "pb/plan.pb.h" #include "plan/PlanNode.h" #include "query/ExecPlanNodeVisitor.h" #include "segcore/ChunkedSegmentSealedImpl.h" #include "segcore/Types.h" #include "storage/RemoteChunkManagerSingleton.h" #include "storage/Util.h" #include "test_cachinglayer/cachinglayer_test_utils.h" #include "test_utils/storage_test_utils.h" #include #include using namespace milvus; using namespace milvus::index; TEST(JsonIndexTest, TestJSONErrRecorder) { std::vector json_raw_data = { R"(1)", R"({"a": true})", R"({"a": 1.0})", R"({"a": 1})", R"({"a": null})", R"({"a": [1,2,3]})", R"({"a": [1.0,2,3]})", R"({"a": {"b": 1}})", R"({"a": "1"})", R"({"a": 1, "a": 1.0})", }; std::string json_path = "/a"; auto schema = std::make_shared(); auto json_fid = schema->AddDebugField("json", DataType::JSON); auto file_manager_ctx = storage::FileManagerContext(); file_manager_ctx.fieldDataMeta.field_schema.set_data_type( milvus::proto::schema::JSON); file_manager_ctx.fieldDataMeta.field_schema.set_fieldid(json_fid.get()); file_manager_ctx.fieldDataMeta.field_id = json_fid.get(); auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex( index::INVERTED_INDEX_TYPE, JsonCastType::FromString("DOUBLE"), json_path, file_manager_ctx); auto json_index = std::unique_ptr>( static_cast*>(inv_index.release())); std::vector jsons; for (auto& json : json_raw_data) { jsons.push_back(milvus::Json(simdjson::padded_string(json))); } auto json_field = std::make_shared>(DataType::JSON, false); json_field->add_json_data(jsons); json_index->BuildWithFieldData({json_field}); auto error_map = json_index->GetErrorRecorder().GetErrorMap(); EXPECT_EQ(error_map.size(), 2); EXPECT_EQ(error_map[simdjson::error_code::INCORRECT_TYPE].count, 5); EXPECT_EQ(error_map[simdjson::error_code::NO_SUCH_FIELD].count, 2); } TEST(JsonIndexTest, TestJsonContains) { std::vector json_raw_data = { R"(1)", R"("a simple string")", R"(null)", R"(true)", R"([])", R"({})", R"([1, 2, 3])", R"([1.0, 2.0, 3.0])", R"([true, false, null])", R"(["hello", "world"])", R"([{"nested": true}, {"nested": false}])", R"({"a": true})", R"({"a": 1.0})", R"({"a": 1})", R"({"a": null})", R"({"a": "hello"})", R"({"a": {"nested": true}})", R"({"a": [1, 2, 3]})", R"({"a": [1.0, 2, 3]})", R"({"a": [true, false]})", R"({"a": ["x", "y"]})", R"({"a": [{"nested": true}, {"nested": false}]})", R"({"a": []})", }; auto json_path = "/a"; auto schema = std::make_shared(); auto json_fid = schema->AddDebugField("json", DataType::JSON); auto file_manager_ctx = storage::FileManagerContext(); file_manager_ctx.fieldDataMeta.field_schema.set_data_type( milvus::proto::schema::JSON); file_manager_ctx.fieldDataMeta.field_schema.set_fieldid(json_fid.get()); file_manager_ctx.fieldDataMeta.field_id = json_fid.get(); auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex( index::INVERTED_INDEX_TYPE, JsonCastType::FromString("ARRAY_DOUBLE"), json_path, file_manager_ctx); auto json_index = std::unique_ptr>( static_cast*>(inv_index.release())); std::vector jsons; for (auto& json : json_raw_data) { jsons.push_back(milvus::Json(simdjson::padded_string(json))); } auto json_field = std::make_shared>(DataType::JSON, false); json_field->add_json_data(jsons); json_index->BuildWithFieldData({json_field}); json_index->finish(); json_index->create_reader(milvus::index::SetBitsetSealed); auto segment = segcore::CreateSealedSegment(schema); segcore::LoadIndexInfo load_index_info; load_index_info.field_id = json_fid.get(); load_index_info.field_type = DataType::JSON; load_index_info.index_params = {{JSON_PATH, json_path}, {JSON_CAST_TYPE, "ARRAY_DOUBLE"}}; load_index_info.cache_index = CreateTestCacheIndex("test", std::move(json_index)); segment->LoadIndex(load_index_info); auto cm = milvus::storage::RemoteChunkManagerSingleton::GetInstance() .GetRemoteChunkManager(); auto load_info = PrepareSingleFieldInsertBinlog( 0, 0, 0, json_fid.get(), {json_field}, cm); segment->LoadFieldData(load_info); std::vector>> test_cases; proto::plan::GenericValue value; value.set_int64_val(1); test_cases.push_back(std::make_tuple(value, std::vector{17, 18})); // proto::plan::GenericValue value2; // value2.set_bool_val(true); // test_cases.push_back(std::make_tuple(value2, std::vector{8})); // proto::plan::GenericValue value3; // value3.set_string_val("x"); // test_cases.push_back(std::make_tuple(value3, std::vector{9})); for (auto& test_case : test_cases) { auto expr = std::make_shared( expr::ColumnInfo(json_fid, DataType::JSON, {"a"}, true), proto::plan::JSONContainsExpr_JSONOp:: JSONContainsExpr_JSONOp_Contains, true, std::vector{value}); auto plan = std::make_shared(DEFAULT_PLANNODE_ID, expr); auto result = query::ExecuteQueryExpr( plan, segment.get(), json_raw_data.size(), MAX_TIMESTAMP); auto expect_result = std::get<1>(test_case); EXPECT_EQ(result.count(), expect_result.size()); for (auto& id : expect_result) { EXPECT_TRUE(result[id]); } } } TEST(JsonIndexTest, TestJsonCast) { std::vector json_raw_data = { R"(1)", R"({"a": 1.0})", R"({"a": 1})", R"({"a": "1.0"})", R"({"a": true})", R"({"a": [1, 2, 3]})", R"({"a": {"b": 1}})", }; auto json_path = "/a"; auto schema = std::make_shared(); auto json_fid = schema->AddDebugField("json", DataType::JSON); auto file_manager_ctx = storage::FileManagerContext(); file_manager_ctx.fieldDataMeta.field_schema.set_data_type( milvus::proto::schema::JSON); file_manager_ctx.fieldDataMeta.field_schema.set_fieldid(json_fid.get()); file_manager_ctx.fieldDataMeta.field_id = json_fid.get(); auto inv_index = index::IndexFactory::GetInstance().CreateJsonIndex( index::INVERTED_INDEX_TYPE, JsonCastType::FromString("DOUBLE"), json_path, file_manager_ctx, "STRING_TO_DOUBLE"); auto json_index = std::unique_ptr>( static_cast*>(inv_index.release())); std::vector jsons; for (auto& json : json_raw_data) { jsons.push_back(milvus::Json(simdjson::padded_string(json))); } auto json_field = std::make_shared>(DataType::JSON, false); json_field->add_json_data(jsons); json_index->BuildWithFieldData({json_field}); json_index->finish(); json_index->create_reader(milvus::index::SetBitsetSealed); auto segment = segcore::CreateSealedSegment(schema); segcore::LoadIndexInfo load_index_info; load_index_info.field_id = json_fid.get(); load_index_info.field_type = DataType::JSON; load_index_info.cache_index = CreateTestCacheIndex("", std::move(json_index)); load_index_info.index_params = {{JSON_PATH, json_path}, {JSON_CAST_TYPE, "DOUBLE"}}; segment->LoadIndex(load_index_info); auto cm = milvus::storage::RemoteChunkManagerSingleton::GetInstance() .GetRemoteChunkManager(); auto load_info = PrepareSingleFieldInsertBinlog( 0, 0, 0, json_fid.get(), {json_field}, cm); segment->LoadFieldData(load_info); std::vector>> test_cases; proto::plan::GenericValue value; value.set_int64_val(1); test_cases.push_back(std::make_tuple(value, std::vector{1, 2, 3})); for (auto& test_case : test_cases) { auto expr = std::make_shared( expr::ColumnInfo(json_fid, DataType::JSON, {"a"}, true), proto::plan::OpType::Equal, value, std::vector{}); auto plan = std::make_shared(DEFAULT_PLANNODE_ID, expr); auto result = query::ExecuteQueryExpr( plan, segment.get(), json_raw_data.size(), MAX_TIMESTAMP); auto expect_result = std::get<1>(test_case); EXPECT_EQ(result.count(), expect_result.size()); for (auto& id : expect_result) { EXPECT_TRUE(result[id]); } } }