mirror of https://github.com/milvus-io/milvus.git
fix: Use pk from binlog during import (#32118)
During binlog import, even if the primary key's autoID is set to true, the primary key from the binlog should be used instead of being reassigned. issue: https://github.com/milvus-io/milvus/discussions/31943, https://github.com/milvus-io/milvus/issues/28521 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>pull/32307/head
parent
e19d17076f
commit
558feed5ed
|
@ -27,6 +27,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/internal/datanode/metacache"
|
"github.com/milvus-io/milvus/internal/datanode/metacache"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/importutilv2"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -161,6 +162,11 @@ func NewPreImportTask(req *datapb.PreImportRequest) Task {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
// During binlog import, even if the primary key's autoID is set to true,
|
||||||
|
// the primary key from the binlog should be used instead of being reassigned.
|
||||||
|
if importutilv2.IsBackup(req.GetOptions()) {
|
||||||
|
UnsetAutoID(req.GetSchema())
|
||||||
|
}
|
||||||
return &PreImportTask{
|
return &PreImportTask{
|
||||||
PreImportTask: &datapb.PreImportTask{
|
PreImportTask: &datapb.PreImportTask{
|
||||||
JobID: req.GetJobID(),
|
JobID: req.GetJobID(),
|
||||||
|
@ -230,6 +236,11 @@ type ImportTask struct {
|
||||||
|
|
||||||
func NewImportTask(req *datapb.ImportRequest) Task {
|
func NewImportTask(req *datapb.ImportRequest) Task {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
// During binlog import, even if the primary key's autoID is set to true,
|
||||||
|
// the primary key from the binlog should be used instead of being reassigned.
|
||||||
|
if importutilv2.IsBackup(req.GetOptions()) {
|
||||||
|
UnsetAutoID(req.GetSchema())
|
||||||
|
}
|
||||||
task := &ImportTask{
|
task := &ImportTask{
|
||||||
ImportTaskV2: &datapb.ImportTaskV2{
|
ImportTaskV2: &datapb.ImportTaskV2{
|
||||||
JobID: req.GetJobID(),
|
JobID: req.GetJobID(),
|
||||||
|
|
|
@ -217,3 +217,12 @@ func LogStats(manager TaskManager) {
|
||||||
tasks = manager.GetBy(WithType(ImportTaskType))
|
tasks = manager.GetBy(WithType(ImportTaskType))
|
||||||
logFunc(tasks, ImportTaskType)
|
logFunc(tasks, ImportTaskType)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func UnsetAutoID(schema *schemapb.CollectionSchema) {
|
||||||
|
for _, field := range schema.GetFields() {
|
||||||
|
if field.GetIsPrimaryKey() && field.GetAutoID() {
|
||||||
|
field.AutoID = false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -89,3 +89,27 @@ func Test_AppendSystemFieldsData(t *testing.T) {
|
||||||
assert.Equal(t, count, insertData.Data[common.RowIDField].RowNum())
|
assert.Equal(t, count, insertData.Data[common.RowIDField].RowNum())
|
||||||
assert.Equal(t, count, insertData.Data[common.TimeStampField].RowNum())
|
assert.Equal(t, count, insertData.Data[common.TimeStampField].RowNum())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_UnsetAutoID(t *testing.T) {
|
||||||
|
pkField := &schemapb.FieldSchema{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "pk",
|
||||||
|
DataType: schemapb.DataType_Int64,
|
||||||
|
IsPrimaryKey: true,
|
||||||
|
AutoID: true,
|
||||||
|
}
|
||||||
|
vecField := &schemapb.FieldSchema{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "vec",
|
||||||
|
DataType: schemapb.DataType_FloatVector,
|
||||||
|
}
|
||||||
|
|
||||||
|
schema := &schemapb.CollectionSchema{}
|
||||||
|
schema.Fields = []*schemapb.FieldSchema{pkField, vecField}
|
||||||
|
UnsetAutoID(schema)
|
||||||
|
for _, field := range schema.GetFields() {
|
||||||
|
if field.GetIsPrimaryKey() {
|
||||||
|
assert.False(t, schema.GetFields()[0].GetAutoID())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
|
"github.com/samber/lo"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
|
@ -37,7 +38,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/tests/integration"
|
"github.com/milvus-io/milvus/tests/integration"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *BulkInsertSuite) PrepareCollectionA() (int64, int64) {
|
func (s *BulkInsertSuite) PrepareCollectionA() (int64, int64, *schemapb.IDs) {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
c := s.Cluster
|
c := s.Cluster
|
||||||
|
@ -86,6 +87,7 @@ func (s *BulkInsertSuite) PrepareCollectionA() (int64, int64) {
|
||||||
})
|
})
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
s.Equal(int32(0), insertResult.GetStatus().GetCode())
|
s.Equal(int32(0), insertResult.GetStatus().GetCode())
|
||||||
|
insertedIDs := insertResult.GetIDs()
|
||||||
|
|
||||||
// flush
|
// flush
|
||||||
flushResp, err := c.Proxy.Flush(ctx, &milvuspb.FlushRequest{
|
flushResp, err := c.Proxy.Flush(ctx, &milvuspb.FlushRequest{
|
||||||
|
@ -148,7 +150,7 @@ func (s *BulkInsertSuite) PrepareCollectionA() (int64, int64) {
|
||||||
// get collectionID and partitionID
|
// get collectionID and partitionID
|
||||||
collectionID := showCollectionsResp.GetCollectionIds()[0]
|
collectionID := showCollectionsResp.GetCollectionIds()[0]
|
||||||
partitionID := showPartitionsResp.GetPartitionIDs()[0]
|
partitionID := showPartitionsResp.GetPartitionIDs()[0]
|
||||||
return collectionID, partitionID
|
return collectionID, partitionID, insertedIDs
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BulkInsertSuite) TestBinlogImport() {
|
func (s *BulkInsertSuite) TestBinlogImport() {
|
||||||
|
@ -157,7 +159,7 @@ func (s *BulkInsertSuite) TestBinlogImport() {
|
||||||
endTs = "548373346338803234"
|
endTs = "548373346338803234"
|
||||||
)
|
)
|
||||||
|
|
||||||
collectionID, partitionID := s.PrepareCollectionA()
|
collectionID, partitionID, insertedIDs := s.PrepareCollectionA()
|
||||||
|
|
||||||
c := s.Cluster
|
c := s.Cluster
|
||||||
ctx, cancel := context.WithTimeout(c.GetContext(), 60*time.Second)
|
ctx, cancel := context.WithTimeout(c.GetContext(), 60*time.Second)
|
||||||
|
@ -252,4 +254,13 @@ func (s *BulkInsertSuite) TestBinlogImport() {
|
||||||
err = merr.CheckRPCCall(searchResult, err)
|
err = merr.CheckRPCCall(searchResult, err)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
s.Equal(nq*topk, len(searchResult.GetResults().GetScores()))
|
s.Equal(nq*topk, len(searchResult.GetResults().GetScores()))
|
||||||
|
// check ids from collectionA, because during binlog import, even if the primary key's autoID is set to true,
|
||||||
|
// the primary key from the binlog should be used instead of being reassigned.
|
||||||
|
insertedIDsMap := lo.SliceToMap(insertedIDs.GetIntId().GetData(), func(id int64) (int64, struct{}) {
|
||||||
|
return id, struct{}{}
|
||||||
|
})
|
||||||
|
for _, id := range searchResult.GetResults().GetIds().GetIntId().GetData() {
|
||||||
|
_, ok := insertedIDsMap[id]
|
||||||
|
s.True(ok)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue