feat: Support for multiple forms of JSON (#31052)

issue: #31051

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
pull/31182/head
cai.zhang 2024-03-11 19:55:02 +08:00 committed by GitHub
parent 937f2440ab
commit 6a83f16871
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 45 additions and 64 deletions

View File

@ -133,6 +133,9 @@ class Json {
// construct JSON pointer with provided path
static std::string
pointer(std::vector<std::string> nested_path) {
if (nested_path.empty()) {
return "";
}
std::for_each(
nested_path.begin(), nested_path.end(), [](std::string& key) {
boost::replace_all(key, "~", "~0");
@ -145,6 +148,19 @@ class Json {
template <typename T>
value_result<T>
at(std::string_view pointer) const {
if (pointer == "") {
if constexpr (std::is_same_v<std::string_view, T> ||
std::is_same_v<std::string, T>) {
return doc().get_string(false);
} else if constexpr (std::is_same_v<bool, T>) {
return doc().get_bool();
} else if constexpr (std::is_same_v<int64_t, T>) {
return doc().get_int64();
} else if constexpr (std::is_same_v<double, T>) {
return doc().get_double();
}
}
return doc().at_pointer(pointer).get<T>();
}

View File

@ -36,9 +36,9 @@ func (v *ParserVisitor) translateIdentifier(identifier string) (*ExprWithType, e
if identifier != field.Name {
nestedPath = append(nestedPath, identifier)
}
if typeutil.IsJSONType(field.DataType) && len(nestedPath) == 0 {
return nil, fmt.Errorf("can not comparisons jsonField directly")
}
//if typeutil.IsJSONType(field.DataType) && len(nestedPath) == 0 {
// return nil, fmt.Errorf("can not comparisons jsonField directly")
//}
return &ExprWithType{
expr: &planpb.Expr{
Expr: &planpb.Expr_ColumnExpr{
@ -1071,7 +1071,12 @@ func (v *ParserVisitor) VisitExists(ctx *parser.ExistsContext) interface{} {
if columnInfo.GetDataType() != schemapb.DataType_JSON {
return fmt.Errorf(
"exists oerations are only supportted on json field, got:%s", columnInfo.GetDataType())
"exists operations are only supportted on json field, got:%s", columnInfo.GetDataType())
}
if len(columnInfo.GetNestedPath()) == 0 {
return fmt.Errorf(
"exists operations are only supportted on json key")
}
return &ExprWithType{

View File

@ -200,13 +200,13 @@ func TestExpr_BinaryRange(t *testing.T) {
`"str16" > VarCharField > "str15"`,
`18 > DoubleField > 17`,
`100 > B > 14`,
`1 < JSONField < 3`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
invalidExprs := []string{
`1 < JSONField < 3`,
`1 < ArrayField < 3`,
`1 < A+B < 3`,
}
@ -235,6 +235,8 @@ func TestExpr_BinaryArith(t *testing.T) {
`A * 15 > 16`,
`JSONField['A'] / 17 >= 18`,
`ArrayField[0] % 19 >= 20`,
`JSONField + 15 == 16`,
`15 + JSONField == 16`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
@ -242,8 +244,6 @@ func TestExpr_BinaryArith(t *testing.T) {
// TODO: enable these after execution backend is ready.
unsupported := []string{
`JSONField + 15 == 16`,
`15 + JSONField == 16`,
`ArrayField + 15 == 16`,
`15 + ArrayField == 16`,
}
@ -465,8 +465,9 @@ func TestExpr_Invalid(t *testing.T) {
`StringField % VarCharField`,
`StringField * 2`,
`2 / StringField`,
`JSONField / 2 == 1`,
//`JSONField / 2 == 1`,
`2 % JSONField == 1`,
`2 % Int64Field == 1`,
`ArrayField / 2 == 1`,
`2 / ArrayField == 1`,
// ----------------------- ==/!= -------------------------
@ -483,8 +484,8 @@ func TestExpr_Invalid(t *testing.T) {
`"str" >= false`,
`VarCharField < FloatField`,
`FloatField > VarCharField`,
`JSONField > 1`,
`1 < JSONField`,
//`JSONField > 1`,
//`1 < JSONField`,
`ArrayField > 2`,
`2 < ArrayField`,
// ------------------------ like ------------------------
@ -702,15 +703,15 @@ func Test_InvalidExprOnJSONField(t *testing.T) {
`exists $meta`,
`exists JSONField`,
`exists ArrayField`,
`$meta > 0`,
`JSONField == 0`,
`$meta < 100`,
`0 < $meta < 100`,
`20 > $meta > 0`,
`$meta + 5 > 0`,
`$meta > 2 + 5`,
//`$meta > 0`,
//`JSONField == 0`,
//`$meta < 100`,
//`0 < $meta < 100`,
//`20 > $meta > 0`,
//`$meta + 5 > 0`,
//`$meta > 2 + 5`,
`exists $meta["A"] > 10 `,
`exists Int64Field `,
`exists Int64Field`,
`A[[""B""]] > 10`,
`A["[""B""]"] > 10`,
`A[[""B""]] > 10`,
@ -881,7 +882,7 @@ func Test_InvalidJSONContains(t *testing.T) {
`json_contains([1,2,3], 1)`,
`json_contains([1,2,3], [1,2,3])`,
`json_contains([1,2,3], [1,2])`,
`json_contains($meta, 1)`,
//`json_contains($meta, 1)`,
`json_contains(A, B)`,
`not json_contains(A, B)`,
`json_contains(A, B > 5)`,
@ -889,9 +890,9 @@ func Test_InvalidJSONContains(t *testing.T) {
`json_contains(A, StringField > 5)`,
`json_contains(A)`,
`json_contains(A, 5, C)`,
`json_contains(JSONField, 5)`,
`json_Contains(JSONField, 5)`,
`JSON_contains(JSONField, 5)`,
//`json_contains(JSONField, 5)`,
//`json_Contains(JSONField, 5)`,
//`JSON_contains(JSONField, 5)`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{

View File

@ -1,7 +1,6 @@
package proxy
import (
"encoding/json"
"fmt"
"math"
"reflect"
@ -370,18 +369,6 @@ func (v *validateUtil) checkJSONFieldData(field *schemapb.FieldData, fieldSchema
}
}
var jsonMap map[string]interface{}
for _, data := range jsonArray {
err := json.Unmarshal(data, &jsonMap)
if err != nil {
log.Warn("insert invalid JSON data, milvus only support json map without nesting",
zap.ByteString("data", data),
zap.Error(err),
)
return merr.WrapErrIoFailedReason(err.Error())
}
}
return nil
}

View File

@ -3422,27 +3422,4 @@ func Test_validateUtil_checkJSONData(t *testing.T) {
err := v.checkJSONFieldData(data, f)
assert.Error(t, err)
})
t.Run("invalid_JSON_data", func(t *testing.T) {
v := newValidateUtil(withOverflowCheck(), withMaxLenCheck())
jsonData := "hello"
f := &schemapb.FieldSchema{
DataType: schemapb.DataType_JSON,
}
data := &schemapb.FieldData{
FieldName: "json",
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: [][]byte{[]byte(jsonData)},
},
},
},
},
}
err := v.checkJSONFieldData(data, f)
assert.Error(t, err)
})
}

View File

@ -365,9 +365,6 @@ func (s *JSONExprSuite) TestJSON_DynamicSchemaWithJSON() {
}
s.doSearch(collectionName, []string{integration.JSONField}, expr, dim, checkFunc)
log.Info("nested path expression run successfully")
expr = `jsonField == ""`
s.doSearchWithInvalidExpr(collectionName, []string{integration.JSONField}, expr, dim)
}
func (s *JSONExprSuite) checkSearch(collectionName, fieldName string, dim int) {
@ -722,9 +719,6 @@ func (s *JSONExprSuite) checkSearch(collectionName, fieldName string, dim int) {
expr = `1+5 <= A+1 < 5+10`
s.doSearchWithInvalidExpr(collectionName, []string{fieldName}, expr, dim)
expr = `$meta == ""`
s.doSearchWithInvalidExpr(collectionName, []string{fieldName}, expr, dim)
}
func (s *JSONExprSuite) insertFlushIndexLoad(ctx context.Context, dbName, collectionName string, rowNum int, dim int, fieldData []*schemapb.FieldData) {

View File

@ -9326,6 +9326,7 @@ class TestCollectionSearchJSON(TestcaseBase):
# The followings are invalid base cases
******************************************************************
"""
@pytest.mark.skip("Supported json like: 1, \"abc\", [1,2,3,4]")
@pytest.mark.tags(CaseLabel.L1)
def test_search_json_expression_object(self):
"""