mirror of https://github.com/milvus-io/milvus.git
Support input empty string (#19111)
Signed-off-by: yhmo <yihua.mo@zilliz.com> Signed-off-by: yhmo <yihua.mo@zilliz.com>pull/19173/head
parent
6e8d959f76
commit
36e333d062
|
@ -556,14 +556,6 @@ func generateVarCharArray(numRows int, maxLen int) []string {
|
|||
ret := make([]string, numRows)
|
||||
for i := 0; i < numRows; i++ {
|
||||
ret[i] = funcutil.RandomString(rand.Intn(maxLen))
|
||||
|
||||
// In the pr #18881 we make a temp fix to avoid user to insert empty strings for varchar field.
|
||||
// The insertTask.checkVarcharFieldData() will check empty strings for insert request.
|
||||
// So, we don't allow the random string to be empty in unittest.
|
||||
// TODO: once we can support empty string, remove this line.
|
||||
if len(ret[i]) == 0 {
|
||||
ret[i] = " "
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
|
|
|
@ -163,30 +163,6 @@ func (it *insertTask) checkPrimaryFieldData() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// currently we don't support inserting empty string(not sure the string index can handle it).
|
||||
// this method check each row of each varchar field, return error if found any empty string.
|
||||
// this is not a good solution, the correct way is:
|
||||
// allow user to insert empty string, store empty string in parquet, ignore empty string in string index
|
||||
// TODO: once we support empty string, delete this method
|
||||
func (it *insertTask) checkVarcharFieldData() error {
|
||||
fieldsData := it.InsertRequest.GetFieldsData()
|
||||
for _, fd := range fieldsData {
|
||||
if fd.GetType() != schemapb.DataType_VarChar {
|
||||
continue
|
||||
}
|
||||
|
||||
values := fd.GetScalars().GetStringData().GetData()
|
||||
for _, str := range values {
|
||||
if len(str) == 0 {
|
||||
log.Error("value of varchar field cannot be empty string", zap.String("field name", fd.GetFieldName()))
|
||||
return fmt.Errorf("value of varchar field '%s' cannot be empty string", fd.GetFieldName())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (it *insertTask) PreExecute(ctx context.Context) error {
|
||||
sp, ctx := trace.StartSpanFromContextWithOperationName(it.ctx, "Proxy-Insert-PreExecute")
|
||||
defer sp.Finish()
|
||||
|
@ -255,13 +231,6 @@ func (it *insertTask) PreExecute(ctx context.Context) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// check empty string for varchar field. currently we don't support empty string
|
||||
// TODO: once we support empty string, delete this line and the method checkVarcharFieldData()
|
||||
err = it.checkVarcharFieldData()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// set field ID to insert field data
|
||||
err = fillFieldIDBySchema(it.GetFieldsData(), collSchema)
|
||||
if err != nil {
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package proxy
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||
|
@ -347,98 +346,3 @@ func TestInsertTask_CheckAligned(t *testing.T) {
|
|||
err = case2.CheckAligned()
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestInsertTask_checkVarcharFieldData(t *testing.T) {
|
||||
ids := make([]int64, 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
ids[i] = int64(i)
|
||||
}
|
||||
|
||||
values := make([]string, 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
values[i] = strconv.Itoa(i)
|
||||
}
|
||||
|
||||
it := insertTask{
|
||||
schema: &schemapb.CollectionSchema{
|
||||
Name: "TestInsertTask_checkVarcharFieldData",
|
||||
Description: "TestInsertTask_checkVarcharFieldData",
|
||||
AutoID: false,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
AutoID: true,
|
||||
DataType: schemapb.DataType_Int64,
|
||||
},
|
||||
{
|
||||
AutoID: false,
|
||||
DataType: schemapb.DataType_VarChar,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
it.FieldsData = []*schemapb.FieldData{
|
||||
{
|
||||
Type: schemapb.DataType_Int64,
|
||||
FieldName: "aaa",
|
||||
Field: &schemapb.FieldData_Scalars{
|
||||
Scalars: &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_LongData{
|
||||
LongData: &schemapb.LongArray{
|
||||
Data: ids,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: schemapb.DataType_VarChar,
|
||||
FieldName: "bbb",
|
||||
Field: &schemapb.FieldData_Scalars{
|
||||
Scalars: &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_StringData{
|
||||
StringData: &schemapb.StringArray{
|
||||
Data: values,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := it.checkVarcharFieldData()
|
||||
assert.Nil(t, err)
|
||||
|
||||
values = append(values, "")
|
||||
it.FieldsData = []*schemapb.FieldData{
|
||||
{
|
||||
Type: schemapb.DataType_Int64,
|
||||
FieldName: "aaa",
|
||||
Field: &schemapb.FieldData_Scalars{
|
||||
Scalars: &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_LongData{
|
||||
LongData: &schemapb.LongArray{
|
||||
Data: ids,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: schemapb.DataType_VarChar,
|
||||
FieldName: "bbb",
|
||||
Field: &schemapb.FieldData_Scalars{
|
||||
Scalars: &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_StringData{
|
||||
StringData: &schemapb.StringArray{
|
||||
Data: values,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err = it.checkVarcharFieldData()
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
|
|
@ -273,14 +273,11 @@ func (w *PayloadWriter) AddDoubleToPayload(msgs []float64) error {
|
|||
|
||||
func (w *PayloadWriter) AddOneStringToPayload(msg string) error {
|
||||
length := len(msg)
|
||||
if length == 0 {
|
||||
return errors.New("can't add empty string into payload")
|
||||
}
|
||||
|
||||
cmsg := C.CString(msg)
|
||||
clength := C.int(length)
|
||||
defer C.free(unsafe.Pointer(cmsg))
|
||||
|
||||
// the C.AddOneStringToPayload can handle empty string
|
||||
status := C.AddOneStringToPayload(w.payloadWriterPtr, cmsg, clength)
|
||||
return HandleCStatus(&status, "AddOneStringToPayload failed")
|
||||
}
|
||||
|
|
|
@ -567,7 +567,7 @@ func TestPayload_CGO_ReaderandWriter(t *testing.T) {
|
|||
assert.NotNil(t, err)
|
||||
|
||||
err = w.AddOneStringToPayload("")
|
||||
assert.NotNil(t, err)
|
||||
assert.Nil(t, err)
|
||||
err = w.FinishPayloadWriter()
|
||||
assert.Nil(t, err)
|
||||
err = w.AddOneStringToPayload("c")
|
||||
|
|
|
@ -567,7 +567,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
|
|||
assert.NotNil(t, err)
|
||||
|
||||
err = w.AddOneStringToPayload("")
|
||||
assert.NotNil(t, err)
|
||||
assert.Nil(t, err)
|
||||
err = w.FinishPayloadWriter()
|
||||
assert.Nil(t, err)
|
||||
err = w.AddOneStringToPayload("c")
|
||||
|
|
|
@ -1202,22 +1202,6 @@ class TestInsertString(TestcaseBase):
|
|||
error = {ct.err_code: 0, ct.err_msg: 'Data type is not support.'}
|
||||
collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_insert_string_field_empty(self):
|
||||
"""
|
||||
target: test create collection with string field
|
||||
method: 1.create a collection
|
||||
2.Insert string field with empty data
|
||||
expected: raise an error
|
||||
"""
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
nb = 1000
|
||||
data = cf.gen_default_list_data(nb)
|
||||
data[2] = [""for _ in range(nb)]
|
||||
error = {ct.err_code: 1, ct.err_msg: f'value of varchar field {ct.default_string_field_name} cannot be empty string'}
|
||||
collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_insert_string_field_space(self):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue