mirror of https://github.com/milvus-io/milvus.git
fix: [2.5] Decode unicode for json key in expression (#38653)
issue: #38626 master pr: #38651 Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>pull/38663/head
parent
7d46a8f17e
commit
bb3d993da5
|
@ -28,6 +28,7 @@ func (v *ParserVisitor) VisitParens(ctx *parser.ParensContext) interface{} {
|
|||
}
|
||||
|
||||
func (v *ParserVisitor) translateIdentifier(identifier string) (*ExprWithType, error) {
|
||||
identifier = decodeUnicode(identifier)
|
||||
field, err := v.schema.GetFieldFromNameDefaultJSON(identifier)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -1005,6 +1006,7 @@ func (v *ParserVisitor) VisitBitOr(ctx *parser.BitOrContext) interface{} {
|
|||
*/
|
||||
// More tests refer to plan_parser_v2_test.go::Test_JSONExpr
|
||||
func (v *ParserVisitor) getColumnInfoFromJSONIdentifier(identifier string) (*planpb.ColumnInfo, error) {
|
||||
identifier = decodeUnicode(identifier)
|
||||
fieldName := strings.Split(identifier, "[")[0]
|
||||
nestedPath := make([]string, 0)
|
||||
field, err := v.schema.GetFieldFromNameDefaultJSON(fieldName)
|
||||
|
|
|
@ -2,9 +2,7 @@ package planparserv2
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/antlr4-go/antlr/v4"
|
||||
"github.com/hashicorp/golang-lru/v2/expirable"
|
||||
|
@ -153,36 +151,6 @@ func CreateRetrievePlan(schema *typeutil.SchemaHelper, exprStr string, exprTempl
|
|||
return planNode, nil
|
||||
}
|
||||
|
||||
func convertHanToASCII(s string) string {
|
||||
var builder strings.Builder
|
||||
builder.Grow(len(s) * 6)
|
||||
skipCur := false
|
||||
n := len(s)
|
||||
for i, r := range s {
|
||||
if skipCur {
|
||||
builder.WriteRune(r)
|
||||
skipCur = false
|
||||
continue
|
||||
}
|
||||
if r == '\\' {
|
||||
if i+1 < n && !isEscapeCh(s[i+1]) {
|
||||
return s
|
||||
}
|
||||
skipCur = true
|
||||
builder.WriteRune(r)
|
||||
continue
|
||||
}
|
||||
|
||||
if unicode.Is(unicode.Han, r) {
|
||||
builder.WriteString(formatUnicode(uint32(r)))
|
||||
} else {
|
||||
builder.WriteRune(r)
|
||||
}
|
||||
}
|
||||
|
||||
return builder.String()
|
||||
}
|
||||
|
||||
func CreateSearchPlan(schema *typeutil.SchemaHelper, exprStr string, vectorFieldName string, queryInfo *planpb.QueryInfo, exprTemplateValues map[string]*schemapb.TemplateValue) (*planpb.PlanNode, error) {
|
||||
parse := func() (*planpb.Expr, error) {
|
||||
if len(exprStr) <= 0 {
|
||||
|
|
|
@ -1473,3 +1473,36 @@ func BenchmarkTemplateWithString(b *testing.B) {
|
|||
assert.NotNil(b, plan)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNestedPathWithChinese(t *testing.T) {
|
||||
schema := newTestSchemaHelper(t)
|
||||
|
||||
expr := `A["姓名"] == "小明"`
|
||||
plan, err := CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
|
||||
Topk: 0,
|
||||
MetricType: "",
|
||||
SearchParams: "",
|
||||
RoundDecimal: 0,
|
||||
}, nil)
|
||||
assert.NoError(t, err, expr)
|
||||
paths := plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetColumnInfo().GetNestedPath()
|
||||
assert.NotNil(t, paths)
|
||||
assert.Equal(t, 2, len(paths))
|
||||
assert.Equal(t, "A", paths[0])
|
||||
assert.Equal(t, "姓名", paths[1])
|
||||
|
||||
expr = `A["年份"]["月份"] == "九月"`
|
||||
plan, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
|
||||
Topk: 0,
|
||||
MetricType: "",
|
||||
SearchParams: "",
|
||||
RoundDecimal: 0,
|
||||
}, nil)
|
||||
assert.NoError(t, err, expr)
|
||||
paths = plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetColumnInfo().GetNestedPath()
|
||||
assert.NotNil(t, paths)
|
||||
assert.Equal(t, 3, len(paths))
|
||||
assert.Equal(t, "A", paths[0])
|
||||
assert.Equal(t, "年份", paths[1])
|
||||
assert.Equal(t, "月份", paths[2])
|
||||
}
|
||||
|
|
|
@ -2,8 +2,10 @@ package planparserv2
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/json"
|
||||
|
@ -730,3 +732,41 @@ func parseJSONValue(value interface{}) (*planpb.GenericValue, schemapb.DataType,
|
|||
return nil, schemapb.DataType_None, fmt.Errorf("%v is of unknown type: %T\n", value, v)
|
||||
}
|
||||
}
|
||||
|
||||
func convertHanToASCII(s string) string {
|
||||
var builder strings.Builder
|
||||
builder.Grow(len(s) * 6)
|
||||
skipCur := false
|
||||
n := len(s)
|
||||
for i, r := range s {
|
||||
if skipCur {
|
||||
builder.WriteRune(r)
|
||||
skipCur = false
|
||||
continue
|
||||
}
|
||||
if r == '\\' {
|
||||
if i+1 < n && !isEscapeCh(s[i+1]) {
|
||||
return s
|
||||
}
|
||||
skipCur = true
|
||||
builder.WriteRune(r)
|
||||
continue
|
||||
}
|
||||
|
||||
if unicode.Is(unicode.Han, r) {
|
||||
builder.WriteString(formatUnicode(uint32(r)))
|
||||
} else {
|
||||
builder.WriteRune(r)
|
||||
}
|
||||
}
|
||||
|
||||
return builder.String()
|
||||
}
|
||||
|
||||
func decodeUnicode(input string) string {
|
||||
re := regexp.MustCompile(`\\u[0-9a-fA-F]{4}`)
|
||||
return re.ReplaceAllStringFunc(input, func(match string) string {
|
||||
code, _ := strconv.ParseInt(match[2:], 16, 32)
|
||||
return string(rune(code))
|
||||
})
|
||||
}
|
||||
|
|
|
@ -328,3 +328,10 @@ func Test_getArrayElementType(t *testing.T) {
|
|||
assert.Equal(t, schemapb.DataType_None, getArrayElementType(expr))
|
||||
})
|
||||
}
|
||||
|
||||
func Test_decodeUnicode(t *testing.T) {
|
||||
s1 := "A[\"\\u5e74\\u4efd\"][\"\\u6708\\u4efd\"]"
|
||||
|
||||
assert.NotEqual(t, `A["年份"]["月份"]`, s1)
|
||||
assert.Equal(t, `A["年份"]["月份"]`, decodeUnicode(s1))
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue