Support input empty string (#19111)

Signed-off-by: yhmo <yihua.mo@zilliz.com>

Signed-off-by: yhmo <yihua.mo@zilliz.com>
pull/19173/head
groot 2022-09-09 18:28:38 +08:00 committed by GitHub
parent 6e8d959f76
commit 36e333d062
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 3 additions and 157 deletions

View File

@ -556,14 +556,6 @@ func generateVarCharArray(numRows int, maxLen int) []string {
ret := make([]string, numRows)
for i := 0; i < numRows; i++ {
ret[i] = funcutil.RandomString(rand.Intn(maxLen))
// In the pr #18881 we make a temp fix to avoid user to insert empty strings for varchar field.
// The insertTask.checkVarcharFieldData() will check empty strings for insert request.
// So, we don't allow the random string to be empty in unittest.
// TODO: once we can support empty string, remove this line.
if len(ret[i]) == 0 {
ret[i] = " "
}
}
return ret

View File

@ -163,30 +163,6 @@ func (it *insertTask) checkPrimaryFieldData() error {
return nil
}
// currently we don't support inserting empty string(not sure the string index can handle it).
// this method check each row of each varchar field, return error if found any empty string.
// this is not a good solution, the correct way is:
// allow user to insert empty string, store empty string in parquet, ignore empty string in string index
// TODO: once we support empty string, delete this method
func (it *insertTask) checkVarcharFieldData() error {
fieldsData := it.InsertRequest.GetFieldsData()
for _, fd := range fieldsData {
if fd.GetType() != schemapb.DataType_VarChar {
continue
}
values := fd.GetScalars().GetStringData().GetData()
for _, str := range values {
if len(str) == 0 {
log.Error("value of varchar field cannot be empty string", zap.String("field name", fd.GetFieldName()))
return fmt.Errorf("value of varchar field '%s' cannot be empty string", fd.GetFieldName())
}
}
}
return nil
}
func (it *insertTask) PreExecute(ctx context.Context) error {
sp, ctx := trace.StartSpanFromContextWithOperationName(it.ctx, "Proxy-Insert-PreExecute")
defer sp.Finish()
@ -255,13 +231,6 @@ func (it *insertTask) PreExecute(ctx context.Context) error {
return err
}
// check empty string for varchar field. currently we don't support empty string
// TODO: once we support empty string, delete this line and the method checkVarcharFieldData()
err = it.checkVarcharFieldData()
if err != nil {
return err
}
// set field ID to insert field data
err = fillFieldIDBySchema(it.GetFieldsData(), collSchema)
if err != nil {

View File

@ -1,7 +1,6 @@
package proxy
import (
"strconv"
"testing"
"github.com/milvus-io/milvus/internal/proto/commonpb"
@ -347,98 +346,3 @@ func TestInsertTask_CheckAligned(t *testing.T) {
err = case2.CheckAligned()
assert.NoError(t, err)
}
func TestInsertTask_checkVarcharFieldData(t *testing.T) {
ids := make([]int64, 10)
for i := 0; i < 10; i++ {
ids[i] = int64(i)
}
values := make([]string, 10)
for i := 0; i < 10; i++ {
values[i] = strconv.Itoa(i)
}
it := insertTask{
schema: &schemapb.CollectionSchema{
Name: "TestInsertTask_checkVarcharFieldData",
Description: "TestInsertTask_checkVarcharFieldData",
AutoID: false,
Fields: []*schemapb.FieldSchema{
{
AutoID: true,
DataType: schemapb.DataType_Int64,
},
{
AutoID: false,
DataType: schemapb.DataType_VarChar,
},
},
},
}
it.FieldsData = []*schemapb.FieldData{
{
Type: schemapb.DataType_Int64,
FieldName: "aaa",
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: ids,
},
},
},
},
},
{
Type: schemapb.DataType_VarChar,
FieldName: "bbb",
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: values,
},
},
},
},
},
}
err := it.checkVarcharFieldData()
assert.Nil(t, err)
values = append(values, "")
it.FieldsData = []*schemapb.FieldData{
{
Type: schemapb.DataType_Int64,
FieldName: "aaa",
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: ids,
},
},
},
},
},
{
Type: schemapb.DataType_VarChar,
FieldName: "bbb",
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: values,
},
},
},
},
},
}
err = it.checkVarcharFieldData()
assert.Error(t, err)
}

View File

@ -273,14 +273,11 @@ func (w *PayloadWriter) AddDoubleToPayload(msgs []float64) error {
func (w *PayloadWriter) AddOneStringToPayload(msg string) error {
length := len(msg)
if length == 0 {
return errors.New("can't add empty string into payload")
}
cmsg := C.CString(msg)
clength := C.int(length)
defer C.free(unsafe.Pointer(cmsg))
// the C.AddOneStringToPayload can handle empty string
status := C.AddOneStringToPayload(w.payloadWriterPtr, cmsg, clength)
return HandleCStatus(&status, "AddOneStringToPayload failed")
}

View File

@ -567,7 +567,7 @@ func TestPayload_CGO_ReaderandWriter(t *testing.T) {
assert.NotNil(t, err)
err = w.AddOneStringToPayload("")
assert.NotNil(t, err)
assert.Nil(t, err)
err = w.FinishPayloadWriter()
assert.Nil(t, err)
err = w.AddOneStringToPayload("c")

View File

@ -567,7 +567,7 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.NotNil(t, err)
err = w.AddOneStringToPayload("")
assert.NotNil(t, err)
assert.Nil(t, err)
err = w.FinishPayloadWriter()
assert.Nil(t, err)
err = w.AddOneStringToPayload("c")

View File

@ -1202,22 +1202,6 @@ class TestInsertString(TestcaseBase):
error = {ct.err_code: 0, ct.err_msg: 'Data type is not support.'}
collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_insert_string_field_empty(self):
"""
target: test create collection with string field
method: 1.create a collection
2.Insert string field with empty data
expected: raise an error
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
nb = 1000
data = cf.gen_default_list_data(nb)
data[2] = [""for _ in range(nb)]
error = {ct.err_code: 1, ct.err_msg: f'value of varchar field {ct.default_string_field_name} cannot be empty string'}
collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_insert_string_field_space(self):
"""