Hide dynamic field for describing collection (#24312)

Signed-off-by: cai.zhang <cai.zhang@zilliz.com>
pull/24326/head
cai.zhang 2023-05-23 14:27:25 +08:00 committed by GitHub
parent 97cd0409e4
commit 7819a5733f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 148 additions and 25 deletions

View File

@ -322,6 +322,15 @@ func (coord *RootCoordMock) CreateCollection(ctx context.Context, req *milvuspb.
for i := range schema.Fields {
schema.Fields[i].FieldID = int64(common.StartOfUserFieldID + i)
}
if schema.EnableDynamicField {
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
FieldID: int64(common.StartOfUserFieldID + len(schema.Fields)),
Name: common.MetaFieldName,
Description: "$meta",
DataType: schemapb.DataType_JSON,
IsDynamic: true,
})
}
collID := typeutil.UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
coord.collName2ID[req.CollectionName] = collID

View File

@ -489,6 +489,9 @@ func (dct *describeCollectionTask) Execute(ctx context.Context) error {
dct.result.Aliases = result.Aliases
dct.result.Properties = result.Properties
for _, field := range result.Schema.Fields {
if field.IsDynamic {
continue
}
if field.FieldID >= common.StartOfUserFieldID {
dct.result.Schema.Fields = append(dct.result.Schema.Fields, &schemapb.FieldSchema{
FieldID: field.FieldID,

View File

@ -300,22 +300,17 @@ func (it *insertTask) verifyDynamicFieldData() error {
}
func (it *insertTask) checkDynamicFieldData() error {
var dataNameSet = typeutil.NewSet[string]()
for _, data := range it.insertMsg.FieldsData {
dataNameSet.Insert(data.GetFieldName())
}
if _, ok := dataNameSet[common.MetaFieldName]; ok {
return it.verifyDynamicFieldData()
if data.IsDynamic {
data.FieldName = common.MetaFieldName
return it.verifyDynamicFieldData()
}
}
defaultData := make([][]byte, it.insertMsg.NRows())
for i := range defaultData {
defaultData[i] = []byte("{}")
}
dynamicData, err := autoGenDynamicFieldData(defaultData)
if err != nil {
return err
}
dynamicData := autoGenDynamicFieldData(defaultData)
it.insertMsg.FieldsData = append(it.insertMsg.FieldsData, dynamicData)
return nil
}

View File

@ -279,14 +279,15 @@ func TestInsertTask_checkDynamicFieldData(t *testing.T) {
jsonBytes, err := json.MarshalIndent(data, "", " ")
assert.NoError(t, err)
jsonData = append(jsonData, jsonBytes)
jsonFieldData, err := autoGenDynamicFieldData(jsonData)
assert.NoError(t, err)
jsonFieldData := autoGenDynamicFieldData(jsonData)
it := insertTask{
ctx: context.Background(),
insertMsg: &msgstream.InsertMsg{
InsertRequest: msgpb.InsertRequest{
CollectionName: "collectionName",
FieldsData: []*schemapb.FieldData{jsonFieldData},
NumRows: 1,
Version: msgpb.InsertDataVersion_ColumnBased,
},
},
schema: newTestSchema(),
@ -310,14 +311,15 @@ func TestInsertTask_checkDynamicFieldData(t *testing.T) {
jsonBytes, err := json.MarshalIndent(data, "", " ")
assert.NoError(t, err)
jsonData = append(jsonData, jsonBytes)
jsonFieldData, err := autoGenDynamicFieldData(jsonData)
assert.NoError(t, err)
jsonFieldData := autoGenDynamicFieldData(jsonData)
it := insertTask{
ctx: context.Background(),
insertMsg: &msgstream.InsertMsg{
InsertRequest: msgpb.InsertRequest{
CollectionName: "collectionName",
FieldsData: []*schemapb.FieldData{jsonFieldData},
NumRows: 1,
Version: msgpb.InsertDataVersion_ColumnBased,
},
},
schema: newTestSchema(),
@ -340,14 +342,15 @@ func TestInsertTask_checkDynamicFieldData(t *testing.T) {
jsonBytes, err := json.MarshalIndent(data, "", " ")
assert.NoError(t, err)
jsonData = append(jsonData, jsonBytes)
jsonFieldData, err := autoGenDynamicFieldData(jsonData)
assert.NoError(t, err)
jsonFieldData := autoGenDynamicFieldData(jsonData)
it := insertTask{
ctx: context.Background(),
insertMsg: &msgstream.InsertMsg{
InsertRequest: msgpb.InsertRequest{
CollectionName: "collectionName",
FieldsData: []*schemapb.FieldData{jsonFieldData},
NumRows: 1,
Version: msgpb.InsertDataVersion_ColumnBased,
},
},
schema: newTestSchema(),
@ -358,19 +361,20 @@ func TestInsertTask_checkDynamicFieldData(t *testing.T) {
})
t.Run("json data is string", func(t *testing.T) {
data := "abcdefg"
jsonFieldData, err := autoGenDynamicFieldData([][]byte{[]byte(data)})
assert.NoError(t, err)
jsonFieldData := autoGenDynamicFieldData([][]byte{[]byte(data)})
it := insertTask{
ctx: context.Background(),
insertMsg: &msgstream.InsertMsg{
InsertRequest: msgpb.InsertRequest{
CollectionName: "collectionName",
FieldsData: []*schemapb.FieldData{jsonFieldData},
NumRows: 1,
Version: msgpb.InsertDataVersion_ColumnBased,
},
},
schema: newTestSchema(),
}
err = it.checkDynamicFieldData()
err := it.checkDynamicFieldData()
assert.Error(t, err)
})
t.Run("no json data", func(t *testing.T) {
@ -380,6 +384,8 @@ func TestInsertTask_checkDynamicFieldData(t *testing.T) {
InsertRequest: msgpb.InsertRequest{
CollectionName: "collectionName",
FieldsData: []*schemapb.FieldData{},
NumRows: 1,
Version: msgpb.InsertDataVersion_ColumnBased,
},
},
schema: newTestSchema(),

View File

@ -108,6 +108,49 @@ func constructCollectionSchema(
}
}
func constructCollectionSchemaEnableDynamicSchema(
int64Field, floatVecField string,
dim int,
collectionName string,
) *schemapb.CollectionSchema {
pk := &schemapb.FieldSchema{
FieldID: 0,
Name: int64Field,
IsPrimaryKey: true,
Description: "",
DataType: schemapb.DataType_Int64,
TypeParams: nil,
IndexParams: nil,
AutoID: true,
}
fVec := &schemapb.FieldSchema{
FieldID: 0,
Name: floatVecField,
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(dim),
},
},
IndexParams: nil,
AutoID: false,
}
return &schemapb.CollectionSchema{
Name: collectionName,
Description: "",
AutoID: false,
EnableDynamicField: true,
Fields: []*schemapb.FieldSchema{
pk,
fVec,
},
}
}
func constructCollectionSchemaByDataType(collectionName string, fieldName2DataType map[string]schemapb.DataType, primaryFieldName string, autoID bool) *schemapb.CollectionSchema {
fieldsSchema := make([]*schemapb.FieldSchema, 0)
@ -1021,6 +1064,71 @@ func TestDescribeCollectionTask_ShardsNum1(t *testing.T) {
assert.Equal(t, collectionName, task.result.GetCollectionName())
}
func TestDescribeCollectionTask_EnableDynamicSchema(t *testing.T) {
rc := NewRootCoordMock()
rc.Start()
defer rc.Stop()
qc := getQueryCoord()
qc.Start()
defer qc.Stop()
ctx := context.Background()
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
prefix := "TestDescribeCollectionTask"
dbName := ""
collectionName := prefix + funcutil.GenRandomStr()
shardsNum := common.DefaultShardsNum
int64Field := "int64"
floatVecField := "fvec"
dim := 128
schema := constructCollectionSchemaEnableDynamicSchema(int64Field, floatVecField, dim, collectionName)
marshaledSchema, err := proto.Marshal(schema)
assert.NoError(t, err)
createColReq := &milvuspb.CreateCollectionRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_DropCollection,
MsgID: 100,
Timestamp: 100,
},
DbName: dbName,
CollectionName: collectionName,
Schema: marshaledSchema,
ShardsNum: shardsNum,
}
rc.CreateCollection(ctx, createColReq)
globalMetaCache.GetCollectionID(ctx, collectionName)
//CreateCollection
task := &describeCollectionTask{
Condition: NewTaskCondition(ctx),
DescribeCollectionRequest: &milvuspb.DescribeCollectionRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_DescribeCollection,
MsgID: 100,
Timestamp: 100,
},
DbName: dbName,
CollectionName: collectionName,
},
ctx: ctx,
rootCoord: rc,
result: nil,
}
err = task.PreExecute(ctx)
assert.Nil(t, err)
err = task.Execute(ctx)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, task.result.Status.ErrorCode)
assert.Equal(t, shardsNum, task.result.ShardsNum)
assert.Equal(t, collectionName, task.result.GetCollectionName())
assert.Equal(t, 2, len(task.result.Schema.Fields))
}
func TestDescribeCollectionTask_ShardsNum2(t *testing.T) {
rc := NewRootCoordMock()
rc.Start()

View File

@ -560,8 +560,8 @@ func autoGenPrimaryFieldData(fieldSchema *schemapb.FieldSchema, data interface{}
return &fieldData, nil
}
func autoGenDynamicFieldData(data [][]byte) (*schemapb.FieldData, error) {
fieldData := &schemapb.FieldData{
func autoGenDynamicFieldData(data [][]byte) *schemapb.FieldData {
return &schemapb.FieldData{
FieldName: common.MetaFieldName,
Type: schemapb.DataType_JSON,
Field: &schemapb.FieldData_Scalars{
@ -573,8 +573,8 @@ func autoGenDynamicFieldData(data [][]byte) (*schemapb.FieldData, error) {
},
},
},
IsDynamic: true,
}
return fieldData, nil
}
// fillFieldIDBySchema set fieldID to fieldData according FieldSchemas

View File

@ -114,10 +114,11 @@ func (s *JSONExprSuite) TestJsonEnableDynamicSchema() {
describeCollectionResp, err := c.proxy.DescribeCollection(ctx, &milvuspb.DescribeCollectionRequest{CollectionName: collectionName})
s.NoError(err)
s.True(describeCollectionResp.Schema.EnableDynamicField)
s.Equal(3, len(describeCollectionResp.GetSchema().GetFields()))
s.Equal(2, len(describeCollectionResp.GetSchema().GetFields()))
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
jsonData := newJSONData(common.MetaFieldName, rowNum)
jsonData.IsDynamic = true
s.insertFlushIndexLoad(ctx, c, dbName, collectionName, rowNum, dim, []*schemapb.FieldData{fVecColumn, jsonData})
s.checkSearch(c, collectionName, common.MetaFieldName, dim)
@ -196,7 +197,7 @@ func (s *JSONExprSuite) TestJSON_InsertWithoutDynamicData() {
describeCollectionResp, err := c.proxy.DescribeCollection(ctx, &milvuspb.DescribeCollectionRequest{CollectionName: collectionName})
s.NoError(err)
s.True(describeCollectionResp.Schema.EnableDynamicField)
s.Equal(3, len(describeCollectionResp.GetSchema().GetFields()))
s.Equal(2, len(describeCollectionResp.GetSchema().GetFields()))
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
s.insertFlushIndexLoad(ctx, c, dbName, collectionName, rowNum, dim, []*schemapb.FieldData{fVecColumn})
@ -290,11 +291,12 @@ func (s *JSONExprSuite) TestJSON_DynamicSchemaWithJSON() {
describeCollectionResp, err := c.proxy.DescribeCollection(ctx, &milvuspb.DescribeCollectionRequest{CollectionName: collectionName})
s.NoError(err)
s.True(describeCollectionResp.Schema.EnableDynamicField)
s.Equal(4, len(describeCollectionResp.GetSchema().GetFields()))
s.Equal(3, len(describeCollectionResp.GetSchema().GetFields()))
fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim)
jsonData := newJSONData(jsonField, rowNum)
dynamicData := newJSONData(common.MetaFieldName, rowNum)
dynamicData.IsDynamic = true
s.insertFlushIndexLoad(ctx, c, dbName, collectionName, rowNum, dim, []*schemapb.FieldData{fVecColumn, jsonData, dynamicData})
s.checkSearch(c, collectionName, common.MetaFieldName, dim)