mirror of https://github.com/milvus-io/milvus.git
parent
120c876122
commit
de318c2e7c
|
@ -986,6 +986,7 @@ func (node *DataNode) Import(ctx context.Context, req *datapb.ImportTaskRequest)
|
||||||
AutoIds: make([]int64, 0),
|
AutoIds: make([]int64, 0),
|
||||||
RowCount: 0,
|
RowCount: 0,
|
||||||
}
|
}
|
||||||
|
importResult.Infos = append(importResult.Infos, &commonpb.KeyValuePair{Key: importutil.ProgressPercent, Value: "0"})
|
||||||
|
|
||||||
// Spawn a new context to ignore cancellation from parental context.
|
// Spawn a new context to ignore cancellation from parental context.
|
||||||
newCtx, cancel := context.WithTimeout(context.TODO(), ImportCallTimeout)
|
newCtx, cancel := context.WithTimeout(context.TODO(), ImportCallTimeout)
|
||||||
|
|
|
@ -607,8 +607,9 @@ func (m *importManager) updateTaskInfo(ir *rootcoordpb.ImportResult) (*datapb.Im
|
||||||
if kv.GetKey() == importutil.FailedReason {
|
if kv.GetKey() == importutil.FailedReason {
|
||||||
toPersistImportTaskInfo.State.ErrorMessage = kv.GetValue()
|
toPersistImportTaskInfo.State.ErrorMessage = kv.GetValue()
|
||||||
break
|
break
|
||||||
} else if kv.GetKey() == importutil.PersistTimeCost {
|
} else if kv.GetKey() == importutil.PersistTimeCost ||
|
||||||
toPersistImportTaskInfo.Infos = append(toPersistImportTaskInfo.Infos, kv)
|
kv.GetKey() == importutil.ProgressPercent {
|
||||||
|
importutil.UpdateKVInfo(&toPersistImportTaskInfo.Infos, kv.GetKey(), kv.GetValue())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Info("importManager update task info", zap.Any("toPersistImportTaskInfo", toPersistImportTaskInfo))
|
log.Info("importManager update task info", zap.Any("toPersistImportTaskInfo", toPersistImportTaskInfo))
|
||||||
|
@ -667,6 +668,9 @@ func (m *importManager) setImportTaskStateAndReason(taskID int64, targetState co
|
||||||
// Meta persist should be done before memory objs change.
|
// Meta persist should be done before memory objs change.
|
||||||
toPersistImportTaskInfo := cloneImportTaskInfo(t)
|
toPersistImportTaskInfo := cloneImportTaskInfo(t)
|
||||||
toPersistImportTaskInfo.State.StateCode = targetState
|
toPersistImportTaskInfo.State.StateCode = targetState
|
||||||
|
if targetState == commonpb.ImportState_ImportCompleted {
|
||||||
|
importutil.UpdateKVInfo(&toPersistImportTaskInfo.Infos, importutil.ProgressPercent, "100")
|
||||||
|
}
|
||||||
tryUpdateErrMsg(errReason, toPersistImportTaskInfo)
|
tryUpdateErrMsg(errReason, toPersistImportTaskInfo)
|
||||||
// Update task in task store.
|
// Update task in task store.
|
||||||
if err := m.persistTaskInfo(toPersistImportTaskInfo); err != nil {
|
if err := m.persistTaskInfo(toPersistImportTaskInfo); err != nil {
|
||||||
|
@ -684,6 +688,9 @@ func (m *importManager) setImportTaskStateAndReason(taskID int64, targetState co
|
||||||
// Meta persist should be done before memory objs change.
|
// Meta persist should be done before memory objs change.
|
||||||
toPersistImportTaskInfo := cloneImportTaskInfo(v)
|
toPersistImportTaskInfo := cloneImportTaskInfo(v)
|
||||||
toPersistImportTaskInfo.State.StateCode = targetState
|
toPersistImportTaskInfo.State.StateCode = targetState
|
||||||
|
if targetState == commonpb.ImportState_ImportCompleted {
|
||||||
|
importutil.UpdateKVInfo(&toPersistImportTaskInfo.Infos, importutil.ProgressPercent, "100")
|
||||||
|
}
|
||||||
tryUpdateErrMsg(errReason, toPersistImportTaskInfo)
|
tryUpdateErrMsg(errReason, toPersistImportTaskInfo)
|
||||||
// Update task in task store.
|
// Update task in task store.
|
||||||
if err := m.persistTaskInfo(toPersistImportTaskInfo); err != nil {
|
if err := m.persistTaskInfo(toPersistImportTaskInfo); err != nil {
|
||||||
|
@ -704,6 +711,9 @@ func (m *importManager) setImportTaskStateAndReason(taskID int64, targetState co
|
||||||
} else {
|
} else {
|
||||||
toPersistImportTaskInfo := cloneImportTaskInfo(ti)
|
toPersistImportTaskInfo := cloneImportTaskInfo(ti)
|
||||||
toPersistImportTaskInfo.State.StateCode = targetState
|
toPersistImportTaskInfo.State.StateCode = targetState
|
||||||
|
if targetState == commonpb.ImportState_ImportCompleted {
|
||||||
|
importutil.UpdateKVInfo(&toPersistImportTaskInfo.Infos, importutil.ProgressPercent, "100")
|
||||||
|
}
|
||||||
tryUpdateErrMsg(errReason, toPersistImportTaskInfo)
|
tryUpdateErrMsg(errReason, toPersistImportTaskInfo)
|
||||||
// Update task in task store.
|
// Update task in task store.
|
||||||
if err := m.persistTaskInfo(toPersistImportTaskInfo); err != nil {
|
if err := m.persistTaskInfo(toPersistImportTaskInfo); err != nil {
|
||||||
|
|
|
@ -31,12 +31,13 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type BinlogParser struct {
|
type BinlogParser struct {
|
||||||
ctx context.Context // for canceling parse process
|
ctx context.Context // for canceling parse process
|
||||||
collectionSchema *schemapb.CollectionSchema // collection schema
|
collectionSchema *schemapb.CollectionSchema // collection schema
|
||||||
shardNum int32 // sharding number of the collection
|
shardNum int32 // sharding number of the collection
|
||||||
blockSize int64 // maximum size of a read block(unit:byte)
|
blockSize int64 // maximum size of a read block(unit:byte)
|
||||||
chunkManager storage.ChunkManager // storage interfaces to browse/read the files
|
chunkManager storage.ChunkManager // storage interfaces to browse/read the files
|
||||||
callFlushFunc ImportFlushFunc // call back function to flush segment
|
callFlushFunc ImportFlushFunc // call back function to flush segment
|
||||||
|
updateProgressFunc func(percent int64) // update working progress percent value
|
||||||
|
|
||||||
// a timestamp to define the start time point of restore, data before this time point will be ignored
|
// a timestamp to define the start time point of restore, data before this time point will be ignored
|
||||||
// set this value to 0, all the data will be imported
|
// set this value to 0, all the data will be imported
|
||||||
|
@ -57,6 +58,7 @@ func NewBinlogParser(ctx context.Context,
|
||||||
blockSize int64,
|
blockSize int64,
|
||||||
chunkManager storage.ChunkManager,
|
chunkManager storage.ChunkManager,
|
||||||
flushFunc ImportFlushFunc,
|
flushFunc ImportFlushFunc,
|
||||||
|
updateProgressFunc func(percent int64),
|
||||||
tsStartPoint uint64,
|
tsStartPoint uint64,
|
||||||
tsEndPoint uint64) (*BinlogParser, error) {
|
tsEndPoint uint64) (*BinlogParser, error) {
|
||||||
if collectionSchema == nil {
|
if collectionSchema == nil {
|
||||||
|
@ -248,12 +250,23 @@ func (p *BinlogParser) Parse(filePaths []string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, segmentHolder := range segmentHolders {
|
updateProgress := func(readBatch int) {
|
||||||
|
if p.updateProgressFunc != nil && len(segmentHolders) != 0 {
|
||||||
|
percent := (readBatch * ProgressValueForPersist) / len(segmentHolders)
|
||||||
|
log.Debug("Binlog parser: working progress", zap.Int("readBatch", readBatch),
|
||||||
|
zap.Int("totalBatchCount", len(segmentHolders)), zap.Int("percent", percent))
|
||||||
|
p.updateProgressFunc(int64(percent))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, segmentHolder := range segmentHolders {
|
||||||
err = p.parseSegmentFiles(segmentHolder)
|
err = p.parseSegmentFiles(segmentHolder)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updateProgress(i + 1)
|
||||||
|
|
||||||
// trigger gb after each segment finished
|
// trigger gb after each segment finished
|
||||||
triggerGC()
|
triggerGC()
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,17 +32,17 @@ func Test_NewBinlogParser(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
// nil schema
|
// nil schema
|
||||||
parser, err := NewBinlogParser(ctx, nil, 2, 1024, nil, nil, 0, math.MaxUint64)
|
parser, err := NewBinlogParser(ctx, nil, 2, 1024, nil, nil, nil, 0, math.MaxUint64)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// nil chunkmanager
|
// nil chunkmanager
|
||||||
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, nil, nil, 0, math.MaxUint64)
|
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, nil, nil, nil, 0, math.MaxUint64)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// nil flushfunc
|
// nil flushfunc
|
||||||
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, nil, 0, math.MaxUint64)
|
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, nil, nil, 0, math.MaxUint64)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
|
@ -50,12 +50,12 @@ func Test_NewBinlogParser(t *testing.T) {
|
||||||
flushFunc := func(fields map[storage.FieldID]storage.FieldData, shardID int) error {
|
flushFunc := func(fields map[storage.FieldID]storage.FieldData, shardID int) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, flushFunc, 0, math.MaxUint64)
|
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, flushFunc, nil, 0, math.MaxUint64)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
// tsStartPoint larger than tsEndPoint
|
// tsStartPoint larger than tsEndPoint
|
||||||
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, flushFunc, 2, 1)
|
parser, err = NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, flushFunc, nil, 2, 1)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
}
|
}
|
||||||
|
@ -127,7 +127,7 @@ func Test_BinlogParserConstructHolders(t *testing.T) {
|
||||||
"backup/bak1/data/delta_log/435978159196147009/435978159196147010/435978159261483009/434574382554415105",
|
"backup/bak1/data/delta_log/435978159196147009/435978159196147010/435978159261483009/434574382554415105",
|
||||||
}
|
}
|
||||||
|
|
||||||
parser, err := NewBinlogParser(ctx, sampleSchema(), 2, 1024, chunkManager, flushFunc, 0, math.MaxUint64)
|
parser, err := NewBinlogParser(ctx, sampleSchema(), 2, 1024, chunkManager, flushFunc, nil, 0, math.MaxUint64)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@ func Test_BinlogParserConstructHoldersFailed(t *testing.T) {
|
||||||
listResult: make(map[string][]string),
|
listResult: make(map[string][]string),
|
||||||
}
|
}
|
||||||
|
|
||||||
parser, err := NewBinlogParser(ctx, sampleSchema(), 2, 1024, chunkManager, flushFunc, 0, math.MaxUint64)
|
parser, err := NewBinlogParser(ctx, sampleSchema(), 2, 1024, chunkManager, flushFunc, nil, 0, math.MaxUint64)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ func Test_BinlogParserParseFilesFailed(t *testing.T) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
parser, err := NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, flushFunc, 0, math.MaxUint64)
|
parser, err := NewBinlogParser(ctx, sampleSchema(), 2, 1024, &MockChunkManager{}, flushFunc, nil, 0, math.MaxUint64)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
@ -268,7 +268,10 @@ func Test_BinlogParserParse(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
parser, err := NewBinlogParser(ctx, schema, 2, 1024, chunkManager, flushFunc, 0, math.MaxUint64)
|
updateProgress := func(percent int64) {
|
||||||
|
assert.Greater(t, percent, int64(0))
|
||||||
|
}
|
||||||
|
parser, err := NewBinlogParser(ctx, schema, 2, 1024, chunkManager, flushFunc, updateProgress, 0, math.MaxUint64)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ import (
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"go.uber.org/zap/zapcore"
|
"go.uber.org/zap/zapcore"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/common"
|
"github.com/milvus-io/milvus/internal/common"
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
|
@ -539,3 +540,22 @@ func pkToShard(pk interface{}, shardNum uint32) (uint32, error) {
|
||||||
|
|
||||||
return shard, nil
|
return shard, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func UpdateKVInfo(infos *[]*commonpb.KeyValuePair, k string, v string) error {
|
||||||
|
if infos == nil {
|
||||||
|
return errors.New("Import util: kv array pointer is nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
found := false
|
||||||
|
for _, kv := range *infos {
|
||||||
|
if kv.GetKey() == k {
|
||||||
|
kv.Value = v
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
*infos = append(*infos, &commonpb.KeyValuePair{Key: k, Value: v})
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -640,3 +640,24 @@ func Test_PkToShard(t *testing.T) {
|
||||||
hash, _ = typeutil.Hash32Int64(pk)
|
hash, _ = typeutil.Hash32Int64(pk)
|
||||||
assert.Equal(t, hash%shardNum, shard)
|
assert.Equal(t, hash%shardNum, shard)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_UpdateKVInfo(t *testing.T) {
|
||||||
|
err := UpdateKVInfo(nil, "a", "1")
|
||||||
|
assert.Error(t, err)
|
||||||
|
|
||||||
|
infos := make([]*commonpb.KeyValuePair, 0)
|
||||||
|
|
||||||
|
err = UpdateKVInfo(&infos, "a", "1")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, len(infos))
|
||||||
|
assert.Equal(t, "1", infos[0].Value)
|
||||||
|
|
||||||
|
err = UpdateKVInfo(&infos, "a", "2")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "2", infos[0].Value)
|
||||||
|
|
||||||
|
err = UpdateKVInfo(&infos, "b", "5")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 2, len(infos))
|
||||||
|
assert.Equal(t, "5", infos[1].Value)
|
||||||
|
}
|
||||||
|
|
|
@ -58,12 +58,16 @@ const (
|
||||||
// the total memory size might cause OOM.
|
// the total memory size might cause OOM.
|
||||||
MaxTotalSizeInMemory = 2 * 1024 * 1024 * 1024 // 2GB
|
MaxTotalSizeInMemory = 2 * 1024 * 1024 * 1024 // 2GB
|
||||||
|
|
||||||
|
// progress percent value of persist state
|
||||||
|
ProgressValueForPersist = 90
|
||||||
|
|
||||||
// keywords of import task informations
|
// keywords of import task informations
|
||||||
FailedReason = "failed_reason"
|
FailedReason = "failed_reason"
|
||||||
Files = "files"
|
Files = "files"
|
||||||
CollectionName = "collection"
|
CollectionName = "collection"
|
||||||
PartitionName = "partition"
|
PartitionName = "partition"
|
||||||
PersistTimeCost = "persist_cost"
|
PersistTimeCost = "persist_cost"
|
||||||
|
ProgressPercent = "progress_percent"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ReportImportAttempts is the maximum # of attempts to retry when import fails.
|
// ReportImportAttempts is the maximum # of attempts to retry when import fails.
|
||||||
|
@ -102,6 +106,7 @@ type ImportWrapper struct {
|
||||||
reportImportAttempts uint // attempts count if report function get error
|
reportImportAttempts uint // attempts count if report function get error
|
||||||
|
|
||||||
workingSegments map[int]*WorkingSegment // a map shard id to working segments
|
workingSegments map[int]*WorkingSegment // a map shard id to working segments
|
||||||
|
progressPercent int64 // working progress percent
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewImportWrapper(ctx context.Context, collectionSchema *schemapb.CollectionSchema, shardNum int32, segmentSize int64,
|
func NewImportWrapper(ctx context.Context, collectionSchema *schemapb.CollectionSchema, shardNum int32, segmentSize int64,
|
||||||
|
@ -291,7 +296,8 @@ func (p *ImportWrapper) Import(filePaths []string, options ImportOptions) error
|
||||||
printFieldsDataInfo(fields, "import wrapper: prepare to flush binlog data", filePaths)
|
printFieldsDataInfo(fields, "import wrapper: prepare to flush binlog data", filePaths)
|
||||||
return p.flushFunc(fields, shardID)
|
return p.flushFunc(fields, shardID)
|
||||||
}
|
}
|
||||||
parser, err := NewNumpyParser(p.ctx, p.collectionSchema, p.rowIDAllocator, p.shardNum, SingleBlockSize, p.chunkManager, flushFunc)
|
parser, err := NewNumpyParser(p.ctx, p.collectionSchema, p.rowIDAllocator, p.shardNum, SingleBlockSize,
|
||||||
|
p.chunkManager, flushFunc, p.updateProgressPercent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -326,6 +332,9 @@ func (p *ImportWrapper) reportPersisted(reportAttempts uint, tr *timerecord.Time
|
||||||
|
|
||||||
// report file process state
|
// report file process state
|
||||||
p.importResult.State = commonpb.ImportState_ImportPersisted
|
p.importResult.State = commonpb.ImportState_ImportPersisted
|
||||||
|
progressValue := strconv.Itoa(ProgressValueForPersist)
|
||||||
|
UpdateKVInfo(&p.importResult.Infos, ProgressPercent, progressValue)
|
||||||
|
|
||||||
log.Info("import wrapper: report import result", zap.Any("importResult", p.importResult))
|
log.Info("import wrapper: report import result", zap.Any("importResult", p.importResult))
|
||||||
// persist state task is valuable, retry more times in case fail this task only because of network error
|
// persist state task is valuable, retry more times in case fail this task only because of network error
|
||||||
reportErr := retry.Do(p.ctx, func() error {
|
reportErr := retry.Do(p.ctx, func() error {
|
||||||
|
@ -390,7 +399,7 @@ func (p *ImportWrapper) doBinlogImport(filePaths []string, tsStartPoint uint64,
|
||||||
return p.flushFunc(fields, shardID)
|
return p.flushFunc(fields, shardID)
|
||||||
}
|
}
|
||||||
parser, err := NewBinlogParser(p.ctx, p.collectionSchema, p.shardNum, SingleBlockSize, p.chunkManager, flushFunc,
|
parser, err := NewBinlogParser(p.ctx, p.collectionSchema, p.shardNum, SingleBlockSize, p.chunkManager, flushFunc,
|
||||||
tsStartPoint, tsEndPoint)
|
p.updateProgressPercent, tsStartPoint, tsEndPoint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -415,9 +424,14 @@ func (p *ImportWrapper) parseRowBasedJSON(filePath string, onlyValidate bool) er
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
|
size, err := p.chunkManager.Size(p.ctx, filePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// parse file
|
// parse file
|
||||||
reader := bufio.NewReader(file)
|
reader := bufio.NewReader(file)
|
||||||
parser := NewJSONParser(p.ctx, p.collectionSchema)
|
parser := NewJSONParser(p.ctx, p.collectionSchema, p.updateProgressPercent)
|
||||||
|
|
||||||
// if only validate, we input a empty flushFunc so that the consumer do nothing but only validation.
|
// if only validate, we input a empty flushFunc so that the consumer do nothing but only validation.
|
||||||
var flushFunc ImportFlushFunc
|
var flushFunc ImportFlushFunc
|
||||||
|
@ -438,7 +452,7 @@ func (p *ImportWrapper) parseRowBasedJSON(filePath string, onlyValidate bool) er
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: size}, consumer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -516,6 +530,17 @@ func (p *ImportWrapper) flushFunc(fields map[storage.FieldID]storage.FieldData,
|
||||||
segment.rowCount += int64(rowNum)
|
segment.rowCount += int64(rowNum)
|
||||||
segment.memSize += memSize
|
segment.memSize += memSize
|
||||||
|
|
||||||
|
// report working progress percent value to rootcoord
|
||||||
|
// if failed to report, ignore the error, the percent value might be improper but the task can be succeed
|
||||||
|
progressValue := strconv.Itoa(int(p.progressPercent))
|
||||||
|
UpdateKVInfo(&p.importResult.Infos, ProgressPercent, progressValue)
|
||||||
|
reportErr := retry.Do(p.ctx, func() error {
|
||||||
|
return p.reportFunc(p.importResult)
|
||||||
|
}, retry.Attempts(p.reportImportAttempts))
|
||||||
|
if reportErr != nil {
|
||||||
|
log.Warn("import wrapper: fail to report working progress percent value to RootCoord", zap.Error(reportErr))
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -555,3 +580,11 @@ func (p *ImportWrapper) closeAllWorkingSegments() error {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *ImportWrapper) updateProgressPercent(percent int64) {
|
||||||
|
// ignore illegal percent value
|
||||||
|
if percent < 0 || percent > 100 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
p.progressPercent = percent
|
||||||
|
}
|
||||||
|
|
|
@ -969,3 +969,20 @@ func Test_ImportWrapperReportPersisted(t *testing.T) {
|
||||||
err = wrapper.reportPersisted(2, tr)
|
err = wrapper.reportPersisted(2, tr)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_ImportWrapperUpdateProgressPercent(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, nil, nil, nil, nil)
|
||||||
|
assert.NotNil(t, wrapper)
|
||||||
|
assert.Equal(t, int64(0), wrapper.progressPercent)
|
||||||
|
|
||||||
|
wrapper.updateProgressPercent(5)
|
||||||
|
assert.Equal(t, int64(5), wrapper.progressPercent)
|
||||||
|
|
||||||
|
wrapper.updateProgressPercent(200)
|
||||||
|
assert.Equal(t, int64(5), wrapper.progressPercent)
|
||||||
|
|
||||||
|
wrapper.updateProgressPercent(100)
|
||||||
|
assert.Equal(t, int64(100), wrapper.progressPercent)
|
||||||
|
}
|
||||||
|
|
|
@ -37,15 +37,21 @@ const (
|
||||||
RowRootNode = "rows"
|
RowRootNode = "rows"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type IOReader struct {
|
||||||
|
r io.Reader
|
||||||
|
fileSize int64
|
||||||
|
}
|
||||||
|
|
||||||
type JSONParser struct {
|
type JSONParser struct {
|
||||||
ctx context.Context // for canceling parse process
|
ctx context.Context // for canceling parse process
|
||||||
bufRowCount int // max rows in a buffer
|
bufRowCount int // max rows in a buffer
|
||||||
fields map[string]int64 // fields need to be parsed
|
fields map[string]int64 // fields need to be parsed
|
||||||
name2FieldID map[string]storage.FieldID
|
name2FieldID map[string]storage.FieldID
|
||||||
|
updateProgressFunc func(percent int64) // update working progress percent value
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewJSONParser helper function to create a JSONParser
|
// NewJSONParser helper function to create a JSONParser
|
||||||
func NewJSONParser(ctx context.Context, collectionSchema *schemapb.CollectionSchema) *JSONParser {
|
func NewJSONParser(ctx context.Context, collectionSchema *schemapb.CollectionSchema, updateProgressFunc func(percent int64)) *JSONParser {
|
||||||
fields := make(map[string]int64)
|
fields := make(map[string]int64)
|
||||||
name2FieldID := make(map[string]storage.FieldID)
|
name2FieldID := make(map[string]storage.FieldID)
|
||||||
for i := 0; i < len(collectionSchema.Fields); i++ {
|
for i := 0; i < len(collectionSchema.Fields); i++ {
|
||||||
|
@ -64,10 +70,11 @@ func NewJSONParser(ctx context.Context, collectionSchema *schemapb.CollectionSch
|
||||||
}
|
}
|
||||||
|
|
||||||
parser := &JSONParser{
|
parser := &JSONParser{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
bufRowCount: 1024,
|
bufRowCount: 1024,
|
||||||
fields: fields,
|
fields: fields,
|
||||||
name2FieldID: name2FieldID,
|
name2FieldID: name2FieldID,
|
||||||
|
updateProgressFunc: updateProgressFunc,
|
||||||
}
|
}
|
||||||
adjustBufSize(parser, collectionSchema)
|
adjustBufSize(parser, collectionSchema)
|
||||||
|
|
||||||
|
@ -132,19 +139,31 @@ func (p *JSONParser) verifyRow(raw interface{}) (map[storage.FieldID]interface{}
|
||||||
return row, nil
|
return row, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *JSONParser) ParseRows(r io.Reader, handler JSONRowHandler) error {
|
func (p *JSONParser) ParseRows(reader *IOReader, handler JSONRowHandler) error {
|
||||||
if handler == nil {
|
if handler == nil || reader == nil {
|
||||||
log.Error("JSON parse handler is nil")
|
log.Error("JSON parse handler is nil")
|
||||||
return errors.New("JSON parse handler is nil")
|
return errors.New("JSON parse handler is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
dec := json.NewDecoder(r)
|
dec := json.NewDecoder(reader.r)
|
||||||
|
|
||||||
|
oldPercent := int64(0)
|
||||||
|
updateProgress := func() {
|
||||||
|
if p.updateProgressFunc != nil && reader.fileSize > 0 {
|
||||||
|
percent := (dec.InputOffset() * ProgressValueForPersist) / reader.fileSize
|
||||||
|
if percent > oldPercent { // avoid too many log
|
||||||
|
log.Debug("JSON parser: working progress", zap.Int64("offset", dec.InputOffset()),
|
||||||
|
zap.Int64("fileSize", reader.fileSize), zap.Int64("percent", percent))
|
||||||
|
}
|
||||||
|
oldPercent = percent
|
||||||
|
p.updateProgressFunc(percent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// treat number value as a string instead of a float64.
|
// treat number value as a string instead of a float64.
|
||||||
// by default, json lib treat all number values as float64, but if an int64 value
|
// by default, json lib treat all number values as float64, but if an int64 value
|
||||||
// has more than 15 digits, the value would be incorrect after converting from float64
|
// has more than 15 digits, the value would be incorrect after converting from float64
|
||||||
dec.UseNumber()
|
dec.UseNumber()
|
||||||
|
|
||||||
t, err := dec.Token()
|
t, err := dec.Token()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("JSON parser: failed to decode the JSON file", zap.Error(err))
|
log.Error("JSON parser: failed to decode the JSON file", zap.Error(err))
|
||||||
|
@ -166,7 +185,6 @@ func (p *JSONParser) ParseRows(r io.Reader, handler JSONRowHandler) error {
|
||||||
}
|
}
|
||||||
key := t.(string)
|
key := t.(string)
|
||||||
keyLower := strings.ToLower(key)
|
keyLower := strings.ToLower(key)
|
||||||
|
|
||||||
// the root key should be RowRootNode
|
// the root key should be RowRootNode
|
||||||
if keyLower != RowRootNode {
|
if keyLower != RowRootNode {
|
||||||
log.Error("JSON parser: invalid JSON format, the root key is not found", zap.String("RowRootNode", RowRootNode), zap.String("key", key))
|
log.Error("JSON parser: invalid JSON format, the root key is not found", zap.String("RowRootNode", RowRootNode), zap.String("key", key))
|
||||||
|
@ -199,6 +217,8 @@ func (p *JSONParser) ParseRows(r io.Reader, handler JSONRowHandler) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updateProgress()
|
||||||
|
|
||||||
buf = append(buf, row)
|
buf = append(buf, row)
|
||||||
if len(buf) >= p.bufRowCount {
|
if len(buf) >= p.bufRowCount {
|
||||||
isEmpty = false
|
isEmpty = false
|
||||||
|
@ -249,6 +269,8 @@ func (p *JSONParser) ParseRows(r io.Reader, handler JSONRowHandler) error {
|
||||||
return errors.New("row count is 0")
|
return errors.New("row count is 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updateProgress()
|
||||||
|
|
||||||
// send nil to notify the handler all have done
|
// send nil to notify the handler all have done
|
||||||
return handler.Handle(nil)
|
return handler.Handle(nil)
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,7 @@ func Test_AdjustBufSize(t *testing.T) {
|
||||||
|
|
||||||
// small row
|
// small row
|
||||||
schema := sampleSchema()
|
schema := sampleSchema()
|
||||||
parser := NewJSONParser(ctx, schema)
|
parser := NewJSONParser(ctx, schema, nil)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Greater(t, parser.bufRowCount, 0)
|
assert.Greater(t, parser.bufRowCount, 0)
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ func Test_AdjustBufSize(t *testing.T) {
|
||||||
schema.Fields[9].TypeParams = []*commonpb.KeyValuePair{
|
schema.Fields[9].TypeParams = []*commonpb.KeyValuePair{
|
||||||
{Key: "dim", Value: "32768"},
|
{Key: "dim", Value: "32768"},
|
||||||
}
|
}
|
||||||
parser = NewJSONParser(ctx, schema)
|
parser = NewJSONParser(ctx, schema, nil)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Greater(t, parser.bufRowCount, 0)
|
assert.Greater(t, parser.bufRowCount, 0)
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ func Test_AdjustBufSize(t *testing.T) {
|
||||||
AutoID: true,
|
AutoID: true,
|
||||||
Fields: []*schemapb.FieldSchema{},
|
Fields: []*schemapb.FieldSchema{},
|
||||||
}
|
}
|
||||||
parser = NewJSONParser(ctx, schema)
|
parser = NewJSONParser(ctx, schema, nil)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
assert.Greater(t, parser.bufRowCount, 0)
|
assert.Greater(t, parser.bufRowCount, 0)
|
||||||
}
|
}
|
||||||
|
@ -84,7 +84,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
schema := sampleSchema()
|
schema := sampleSchema()
|
||||||
parser := NewJSONParser(ctx, schema)
|
parser := NewJSONParser(ctx, schema, nil)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
|
|
||||||
// prepare test data
|
// prepare test data
|
||||||
|
@ -121,7 +121,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
t.Run("parse success", func(t *testing.T) {
|
t.Run("parse success", func(t *testing.T) {
|
||||||
// set bufRowCount = 4, means call handle() after reading 4 rows
|
// set bufRowCount = 4, means call handle() after reading 4 rows
|
||||||
parser.bufRowCount = 4
|
parser.bufRowCount = 4
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(len(strContent))}, consumer)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, len(content.Rows), len(consumer.rows))
|
assert.Equal(t, len(content.Rows), len(consumer.rows))
|
||||||
for i := 0; i < len(consumer.rows); i++ {
|
for i := 0; i < len(consumer.rows); i++ {
|
||||||
|
@ -188,7 +188,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
|
|
||||||
t.Run("error cases", func(t *testing.T) {
|
t.Run("error cases", func(t *testing.T) {
|
||||||
// handler is nil
|
// handler is nil
|
||||||
err = parser.ParseRows(reader, nil)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(0)}, nil)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// not a row-based format
|
// not a row-based format
|
||||||
|
@ -196,7 +196,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
"dummy":[]
|
"dummy":[]
|
||||||
}`)
|
}`)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(10)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// rows is not a list
|
// rows is not a list
|
||||||
|
@ -204,7 +204,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
"rows":
|
"rows":
|
||||||
}`)
|
}`)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(5)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// typo
|
// typo
|
||||||
|
@ -212,7 +212,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
"rows": [}
|
"rows": [}
|
||||||
}`)
|
}`)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(6)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// rows is not a list
|
// rows is not a list
|
||||||
|
@ -220,7 +220,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
"rows": {}
|
"rows": {}
|
||||||
}`)
|
}`)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(8)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// rows is not a list of list
|
// rows is not a list of list
|
||||||
|
@ -228,25 +228,25 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
"rows": [[]]
|
"rows": [[]]
|
||||||
}`)
|
}`)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(10)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// not valid json format
|
// not valid json format
|
||||||
reader = strings.NewReader(`[]`)
|
reader = strings.NewReader(`[]`)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(2)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// empty content
|
// empty content
|
||||||
reader = strings.NewReader(`{}`)
|
reader = strings.NewReader(`{}`)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(2)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// empty content
|
// empty content
|
||||||
reader = strings.NewReader(``)
|
reader = strings.NewReader(``)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(0)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// redundant field
|
// redundant field
|
||||||
|
@ -255,7 +255,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
{"dummy": 1, "FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]}
|
{"dummy": 1, "FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]}
|
||||||
]
|
]
|
||||||
}`)
|
}`)
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// field missed
|
// field missed
|
||||||
|
@ -264,7 +264,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
{"FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]}
|
{"FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]}
|
||||||
]
|
]
|
||||||
}`)
|
}`)
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// handle() error
|
// handle() error
|
||||||
|
@ -278,26 +278,26 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||||
consumer.handleErr = errors.New("error")
|
consumer.handleErr = errors.New("error")
|
||||||
reader = strings.NewReader(content)
|
reader = strings.NewReader(content)
|
||||||
parser.bufRowCount = 2
|
parser.bufRowCount = 2
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
reader = strings.NewReader(content)
|
reader = strings.NewReader(content)
|
||||||
parser.bufRowCount = 5
|
parser.bufRowCount = 5
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// row count is 0
|
// row count is 0
|
||||||
reader = strings.NewReader(`{
|
reader = strings.NewReader(`{
|
||||||
"rows":[]
|
"rows":[]
|
||||||
}`)
|
}`)
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
|
||||||
// canceled
|
// canceled
|
||||||
consumer.handleErr = nil
|
consumer.handleErr = nil
|
||||||
cancel()
|
cancel()
|
||||||
reader = strings.NewReader(content)
|
reader = strings.NewReader(content)
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -307,7 +307,10 @@ func Test_JSONParserParseRows_StrPK(t *testing.T) {
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
schema := strKeySchema()
|
schema := strKeySchema()
|
||||||
parser := NewJSONParser(ctx, schema)
|
updateProgress := func(percent int64) {
|
||||||
|
assert.Greater(t, percent, int64(0))
|
||||||
|
}
|
||||||
|
parser := NewJSONParser(ctx, schema, updateProgress)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
|
|
||||||
// prepare test data
|
// prepare test data
|
||||||
|
@ -337,7 +340,7 @@ func Test_JSONParserParseRows_StrPK(t *testing.T) {
|
||||||
handleCount: 0,
|
handleCount: 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
err = parser.ParseRows(reader, consumer)
|
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(len(binContent))}, consumer)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, len(content.Rows), len(consumer.rows))
|
assert.Equal(t, len(content.Rows), len(consumer.rows))
|
||||||
for i := 0; i < len(consumer.rows); i++ {
|
for i := 0; i < len(consumer.rows); i++ {
|
||||||
|
|
|
@ -53,14 +53,15 @@ func closeReaders(columnReaders []*NumpyColumnReader) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type NumpyParser struct {
|
type NumpyParser struct {
|
||||||
ctx context.Context // for canceling parse process
|
ctx context.Context // for canceling parse process
|
||||||
collectionSchema *schemapb.CollectionSchema // collection schema
|
collectionSchema *schemapb.CollectionSchema // collection schema
|
||||||
rowIDAllocator *allocator.IDAllocator // autoid allocator
|
rowIDAllocator *allocator.IDAllocator // autoid allocator
|
||||||
shardNum int32 // sharding number of the collection
|
shardNum int32 // sharding number of the collection
|
||||||
blockSize int64 // maximum size of a read block(unit:byte)
|
blockSize int64 // maximum size of a read block(unit:byte)
|
||||||
chunkManager storage.ChunkManager // storage interfaces to browse/read the files
|
chunkManager storage.ChunkManager // storage interfaces to browse/read the files
|
||||||
autoIDRange []int64 // auto-generated id range, for example: [1, 10, 20, 25] means id from 1 to 10 and 20 to 25
|
autoIDRange []int64 // auto-generated id range, for example: [1, 10, 20, 25] means id from 1 to 10 and 20 to 25
|
||||||
callFlushFunc ImportFlushFunc // call back function to flush segment
|
callFlushFunc ImportFlushFunc // call back function to flush segment
|
||||||
|
updateProgressFunc func(percent int64) // update working progress percent value
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewNumpyParser is helper function to create a NumpyParser
|
// NewNumpyParser is helper function to create a NumpyParser
|
||||||
|
@ -70,7 +71,8 @@ func NewNumpyParser(ctx context.Context,
|
||||||
shardNum int32,
|
shardNum int32,
|
||||||
blockSize int64,
|
blockSize int64,
|
||||||
chunkManager storage.ChunkManager,
|
chunkManager storage.ChunkManager,
|
||||||
flushFunc ImportFlushFunc) (*NumpyParser, error) {
|
flushFunc ImportFlushFunc,
|
||||||
|
updateProgressFunc func(percent int64)) (*NumpyParser, error) {
|
||||||
if collectionSchema == nil {
|
if collectionSchema == nil {
|
||||||
log.Error("Numper parser: collection schema is nil")
|
log.Error("Numper parser: collection schema is nil")
|
||||||
return nil, errors.New("collection schema is nil")
|
return nil, errors.New("collection schema is nil")
|
||||||
|
@ -92,14 +94,15 @@ func NewNumpyParser(ctx context.Context,
|
||||||
}
|
}
|
||||||
|
|
||||||
parser := &NumpyParser{
|
parser := &NumpyParser{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
collectionSchema: collectionSchema,
|
collectionSchema: collectionSchema,
|
||||||
rowIDAllocator: idAlloc,
|
rowIDAllocator: idAlloc,
|
||||||
shardNum: shardNum,
|
shardNum: shardNum,
|
||||||
blockSize: blockSize,
|
blockSize: blockSize,
|
||||||
chunkManager: chunkManager,
|
chunkManager: chunkManager,
|
||||||
autoIDRange: make([]int64, 0),
|
autoIDRange: make([]int64, 0),
|
||||||
callFlushFunc: flushFunc,
|
callFlushFunc: flushFunc,
|
||||||
|
updateProgressFunc: updateProgressFunc,
|
||||||
}
|
}
|
||||||
|
|
||||||
return parser, nil
|
return parser, nil
|
||||||
|
@ -362,12 +365,21 @@ func (p *NumpyParser) consume(columnReaders []*NumpyColumnReader) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updateProgress := func(readRowCount int) {
|
||||||
|
if p.updateProgressFunc != nil && len(columnReaders) != 0 && columnReaders[0].rowCount > 0 {
|
||||||
|
percent := (readRowCount * ProgressValueForPersist) / columnReaders[0].rowCount
|
||||||
|
log.Debug("Numper parser: working progress", zap.Int("readRowCount", readRowCount),
|
||||||
|
zap.Int("totalRowCount", columnReaders[0].rowCount), zap.Int("percent", percent))
|
||||||
|
p.updateProgressFunc(int64(percent))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// prepare shards
|
// prepare shards
|
||||||
shards := make([]map[storage.FieldID]storage.FieldData, 0, p.shardNum)
|
shards := make([]map[storage.FieldID]storage.FieldData, 0, p.shardNum)
|
||||||
for i := 0; i < int(p.shardNum); i++ {
|
for i := 0; i < int(p.shardNum); i++ {
|
||||||
segmentData := initSegmentData(p.collectionSchema)
|
segmentData := initSegmentData(p.collectionSchema)
|
||||||
if segmentData == nil {
|
if segmentData == nil {
|
||||||
log.Error("import wrapper: failed to initialize FieldData list")
|
log.Error("Numper parser: failed to initialize FieldData list")
|
||||||
return fmt.Errorf("failed to initialize FieldData list")
|
return fmt.Errorf("failed to initialize FieldData list")
|
||||||
}
|
}
|
||||||
shards = append(shards, segmentData)
|
shards = append(shards, segmentData)
|
||||||
|
@ -399,6 +411,7 @@ func (p *NumpyParser) consume(columnReaders []*NumpyColumnReader) error {
|
||||||
if readRowCount == 0 {
|
if readRowCount == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
updateProgress(readRowCount)
|
||||||
tr.Record("readData")
|
tr.Record("readData")
|
||||||
// split data to shards
|
// split data to shards
|
||||||
err = p.splitFieldsData(segmentData, shards)
|
err = p.splitFieldsData(segmentData, shards)
|
||||||
|
|
|
@ -52,7 +52,7 @@ func createNumpyParser(t *testing.T) *NumpyParser {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
parser, err := NewNumpyParser(ctx, schema, idAllocator, 2, 100, cm, flushFunc)
|
parser, err := NewNumpyParser(ctx, schema, idAllocator, 2, 100, cm, flushFunc, nil)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
return parser
|
return parser
|
||||||
|
@ -71,30 +71,30 @@ func findSchema(schema *schemapb.CollectionSchema, dt schemapb.DataType) *schema
|
||||||
func Test_NewNumpyParser(t *testing.T) {
|
func Test_NewNumpyParser(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
parser, err := NewNumpyParser(ctx, nil, nil, 2, 100, nil, nil)
|
parser, err := NewNumpyParser(ctx, nil, nil, 2, 100, nil, nil, nil)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
|
|
||||||
schema := sampleSchema()
|
schema := sampleSchema()
|
||||||
parser, err = NewNumpyParser(ctx, schema, nil, 2, 100, nil, nil)
|
parser, err = NewNumpyParser(ctx, schema, nil, 2, 100, nil, nil, nil)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
|
|
||||||
idAllocator := newIDAllocator(ctx, t, nil)
|
idAllocator := newIDAllocator(ctx, t, nil)
|
||||||
parser, err = NewNumpyParser(ctx, schema, idAllocator, 2, 100, nil, nil)
|
parser, err = NewNumpyParser(ctx, schema, idAllocator, 2, 100, nil, nil, nil)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
|
|
||||||
cm := createLocalChunkManager(t)
|
cm := createLocalChunkManager(t)
|
||||||
|
|
||||||
parser, err = NewNumpyParser(ctx, schema, idAllocator, 2, 100, cm, nil)
|
parser, err = NewNumpyParser(ctx, schema, idAllocator, 2, 100, cm, nil, nil)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Nil(t, parser)
|
assert.Nil(t, parser)
|
||||||
|
|
||||||
flushFunc := func(fields map[storage.FieldID]storage.FieldData, shardID int) error {
|
flushFunc := func(fields map[storage.FieldID]storage.FieldData, shardID int) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
parser, err = NewNumpyParser(ctx, schema, idAllocator, 2, 100, cm, flushFunc)
|
parser, err = NewNumpyParser(ctx, schema, idAllocator, 2, 100, cm, flushFunc, nil)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
}
|
}
|
||||||
|
@ -878,7 +878,10 @@ func Test_NumpyParserParse_perf(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
idAllocator := newIDAllocator(ctx, t, nil)
|
idAllocator := newIDAllocator(ctx, t, nil)
|
||||||
parser, err := NewNumpyParser(ctx, perfSchema(dim), idAllocator, shardNum, 16*1024*1024, cm, callFlushFunc)
|
updateProgress := func(percent int64) {
|
||||||
|
assert.Greater(t, percent, int64(0))
|
||||||
|
}
|
||||||
|
parser, err := NewNumpyParser(ctx, perfSchema(dim), idAllocator, shardNum, 16*1024*1024, cm, callFlushFunc, updateProgress)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.NotNil(t, parser)
|
assert.NotNil(t, parser)
|
||||||
parser.collectionSchema = perfSchema(dim)
|
parser.collectionSchema = perfSchema(dim)
|
||||||
|
|
Loading…
Reference in New Issue