fix:fix format error for json (#41031)

#41026

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
pull/41058/head
zhagnlu 2025-04-02 14:32:32 +08:00 committed by GitHub
parent 7945ff8784
commit 0d3bd3131c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 127 additions and 4 deletions

View File

@ -228,6 +228,22 @@ GetCommonPrefix(const std::string& str1, const std::string& str2) {
return str1.substr(0, i);
}
// Escape braces in the input string,
// used for fmt::format json string
inline std::string
EscapeBraces(const std::string& input) {
std::string result;
for (char ch : input) {
if (ch == '{')
result += "{{";
else if (ch == '}')
result += "}}";
else
result += ch;
}
return result;
}
inline knowhere::sparse::SparseRow<float>
CopyAndWrapSparseRow(const void* data,
size_t size,

View File

@ -180,7 +180,7 @@ Driver::Next(std::shared_ptr<BlockingState>& blocking_state) {
"Operator::{} failed for [Operator:{}, plan node id: " \
"{}] : {}", \
method_name, \
operator->get_operator_type(), \
operator->ToString(), \
operator->get_plannode_id(), \
e.what()); \
LOG_ERROR(err_msg); \

View File

@ -25,6 +25,7 @@
#include <unordered_map>
#include <unordered_set>
#include "common/Common.h"
#include "common/Tracer.h"
#include "common/Types.h"
#include "common/type_c.h"
@ -439,8 +440,8 @@ VectorMemIndex<T>::Query(const DatasetPtr dataset,
PanicInfo(
ErrorCode::UnexpectedError,
// escape json brace in case of using message as format
"failed to search: config={{{}}} {}: {}",
search_conf.dump(),
"failed to search: config={} {}: {}",
milvus::EscapeBraces(search_conf.dump()),
KnowhereStatusString(res.error()),
res.what());
}

View File

@ -151,6 +151,88 @@ TEST(Sealed, without_predicate) {
EXPECT_EQ(sr->get_total_result_count(), 0);
}
TEST(Sealed, without_search_ef_less_than_limit) {
auto schema = std::make_shared<Schema>();
auto dim = 16;
auto topK = 5;
auto metric_type = knowhere::metric::L2;
auto fake_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
auto float_fid = schema->AddDebugField("age", DataType::FLOAT);
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
schema->set_primary_field_id(i64_fid);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 100
round_decimal: 3
metric_type: "L2"
search_params: "{\"ef\": 10}"
>
placeholder_tag: "$0"
>)";
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(fake_id);
auto query_ptr = vec_col.data() + BIAS * dim;
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
auto plan =
CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size());
auto num_queries = 5;
auto ph_group_raw =
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp timestamp = 1000000;
milvus::index::CreateIndexInfo create_index_info;
create_index_info.field_type = DataType::VECTOR_FLOAT;
create_index_info.metric_type = knowhere::metric::L2;
create_index_info.index_type = knowhere::IndexEnum::INDEX_HNSW;
create_index_info.index_engine_version =
knowhere::Version::GetCurrentVersion().VersionNumber();
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, milvus::storage::FileManagerContext());
auto build_conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::indexparam::M, "16"},
{knowhere::indexparam::EF, "10"}};
auto database = knowhere::GenDataSet(N, dim, vec_col.data());
indexing->BuildWithDataset(database, build_conf);
LoadIndexInfo load_info;
load_info.field_id = fake_id.get();
load_info.index = std::move(indexing);
load_info.index_params["metric_type"] = "L2";
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(fake_id);
sealed_segment->LoadIndex(load_info);
// Test that search fails when ef parameter is less than top-k
// HNSW index requires ef to be larger than k for proper search
bool exception_thrown = false;
try {
auto sr = sealed_segment->Search(plan.get(), ph_group.get(), timestamp);
FAIL() << "Expected exception for invalid ef parameter";
} catch (const std::exception& e) {
exception_thrown = true;
std::string error_msg = e.what();
ASSERT_TRUE(error_msg.find("ef(10) should be larger than k(100)") !=
std::string::npos)
<< "Unexpected error message: " << error_msg;
}
ASSERT_TRUE(exception_thrown) << "Expected exception was not thrown";
}
TEST(Sealed, with_predicate) {
auto schema = std::make_shared<Schema>();
auto dim = 16;

View File

@ -288,7 +288,7 @@ if [[ ${RUN_CPPLINT} == "ON" ]]; then
echo "clang-format check passed!"
else
# compile and build
make -j ${jobs} install || exit 1
make -j 7 install || exit 1
fi
if command -v ccache &> /dev/null

View File

@ -1249,6 +1249,30 @@ class TestCollectionSearchInvalid(TestcaseBase):
"err_msg": f"metric type {metric} not found or not supported, "
"supported: [HAMMING JACCARD]"})
@pytest.mark.tags(CaseLabel.L2)
def test_search_ef_less_than_limit(self):
"""
target: test the scenario which search with ef less than limit
method: 1. create collection
2. search with ef less than limit
expected: raise exception and report the error
"""
collection_w = self.init_collection_general(prefix, True, 2000, 0, is_index=False)[0]
index_hnsw = {
"index_type": "HNSW",
"metric_type": "L2",
"params": {"M": 8, "efConstruction" : 256},
}
collection_w.create_index(ct.default_float_vec_field_name, index_params=index_hnsw)
collection_w.flush()
collection_w.load()
search_params = {"metric_type": "L2", "params": {"ef": 10}}
res = collection_w.search(vectors, ct.default_float_vec_field_name,
search_params, limit=100,
check_task=CheckTasks.err_res,
check_items={"err_code": 65535,
"err_msg": "query failed: N6milvus21ExecOperatorExceptionE :Operator::GetOutput failed"})
@pytest.mark.tags(CaseLabel.L1)
def test_search_dynamic_compare_two_fields(self):
"""