mirror of https://github.com/milvus-io/milvus.git
Fix datanode panic due to concurrent compaction and delete processing (#27167)
Co-authored-by: xiaofan-luan <xiaofan.luan@zilliz.com>pull/27161/head
parent
0459a662e4
commit
4b2802033d
|
@ -76,7 +76,7 @@ type Channel interface {
|
||||||
listNewSegmentsStartPositions() []*datapb.SegmentStartPosition
|
listNewSegmentsStartPositions() []*datapb.SegmentStartPosition
|
||||||
transferNewSegments(segmentIDs []UniqueID)
|
transferNewSegments(segmentIDs []UniqueID)
|
||||||
updateSegmentPKRange(segID UniqueID, ids storage.FieldData)
|
updateSegmentPKRange(segID UniqueID, ids storage.FieldData)
|
||||||
mergeFlushedSegments(ctx context.Context, seg *Segment, planID UniqueID, compactedFrom []UniqueID) error
|
mergeFlushedSegments(ctx context.Context, seg *Segment, planID UniqueID, compactedFrom []UniqueID)
|
||||||
listCompactedSegmentIDs() map[UniqueID][]UniqueID
|
listCompactedSegmentIDs() map[UniqueID][]UniqueID
|
||||||
listSegmentIDsToSync(ts Timestamp) []UniqueID
|
listSegmentIDsToSync(ts Timestamp) []UniqueID
|
||||||
|
|
||||||
|
@ -678,7 +678,7 @@ func (c *ChannelMeta) getCollectionSchema(collID UniqueID, ts Timestamp) (*schem
|
||||||
return c.collSchema, nil
|
return c.collSchema, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *ChannelMeta) mergeFlushedSegments(ctx context.Context, seg *Segment, planID UniqueID, compactedFrom []UniqueID) error {
|
func (c *ChannelMeta) mergeFlushedSegments(ctx context.Context, seg *Segment, planID UniqueID, compactedFrom []UniqueID) {
|
||||||
log := log.Ctx(ctx).With(
|
log := log.Ctx(ctx).With(
|
||||||
zap.Int64("segmentID", seg.segmentID),
|
zap.Int64("segmentID", seg.segmentID),
|
||||||
zap.Int64("collectionID", seg.collectionID),
|
zap.Int64("collectionID", seg.collectionID),
|
||||||
|
@ -687,13 +687,6 @@ func (c *ChannelMeta) mergeFlushedSegments(ctx context.Context, seg *Segment, pl
|
||||||
zap.Int64("planID", planID),
|
zap.Int64("planID", planID),
|
||||||
zap.String("channelName", c.channelName))
|
zap.String("channelName", c.channelName))
|
||||||
|
|
||||||
if seg.collectionID != c.collectionID {
|
|
||||||
log.Warn("failed to mergeFlushedSegments, collection mismatch",
|
|
||||||
zap.Int64("current collection ID", seg.collectionID),
|
|
||||||
zap.Int64("expected collection ID", c.collectionID))
|
|
||||||
return merr.WrapErrParameterInvalid(c.collectionID, seg.collectionID, "collection not match")
|
|
||||||
}
|
|
||||||
|
|
||||||
var inValidSegments []UniqueID
|
var inValidSegments []UniqueID
|
||||||
for _, ID := range compactedFrom {
|
for _, ID := range compactedFrom {
|
||||||
// no such segments in channel or the segments are unflushed.
|
// no such segments in channel or the segments are unflushed.
|
||||||
|
@ -711,12 +704,6 @@ func (c *ChannelMeta) mergeFlushedSegments(ctx context.Context, seg *Segment, pl
|
||||||
log.Info("merge flushed segments")
|
log.Info("merge flushed segments")
|
||||||
c.segMu.Lock()
|
c.segMu.Lock()
|
||||||
defer c.segMu.Unlock()
|
defer c.segMu.Unlock()
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
log.Warn("the context has been closed", zap.Error(ctx.Err()))
|
|
||||||
return errors.New("invalid context")
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, ID := range compactedFrom {
|
for _, ID := range compactedFrom {
|
||||||
// the existent of the segments are already checked
|
// the existent of the segments are already checked
|
||||||
|
@ -733,8 +720,6 @@ func (c *ChannelMeta) mergeFlushedSegments(ctx context.Context, seg *Segment, pl
|
||||||
seg.setType(datapb.SegmentType_Flushed)
|
seg.setType(datapb.SegmentType_Flushed)
|
||||||
c.segments[seg.segmentID] = seg
|
c.segments[seg.segmentID] = seg
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// for tests only
|
// for tests only
|
||||||
|
|
|
@ -676,12 +676,7 @@ func TestChannelMeta_InterfaceMethod(t *testing.T) {
|
||||||
require.False(t, channel.hasSegment(3, true))
|
require.False(t, channel.hasSegment(3, true))
|
||||||
|
|
||||||
// tests start
|
// tests start
|
||||||
err := channel.mergeFlushedSegments(context.Background(), test.inSeg, 100, test.inCompactedFrom)
|
channel.mergeFlushedSegments(context.Background(), test.inSeg, 100, test.inCompactedFrom)
|
||||||
if test.isValid {
|
|
||||||
assert.NoError(t, err)
|
|
||||||
} else {
|
|
||||||
assert.Error(t, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if test.stored {
|
if test.stored {
|
||||||
assert.True(t, channel.hasSegment(3, true))
|
assert.True(t, channel.hasSegment(3, true))
|
||||||
|
|
|
@ -60,15 +60,8 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// RPCConnectionTimeout is used to set the timeout for rpc request
|
|
||||||
RPCConnectionTimeout = 30 * time.Second
|
|
||||||
|
|
||||||
// ConnectEtcdMaxRetryTime is used to limit the max retry time for connection etcd
|
// ConnectEtcdMaxRetryTime is used to limit the max retry time for connection etcd
|
||||||
ConnectEtcdMaxRetryTime = 100
|
ConnectEtcdMaxRetryTime = 100
|
||||||
|
|
||||||
// ImportCallTimeout is the timeout used in Import() method calls
|
|
||||||
// This value is equal to RootCoord's task expire time
|
|
||||||
ImportCallTimeout = 15 * 60 * time.Second
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var getFlowGraphServiceAttempts = uint(50)
|
var getFlowGraphServiceAttempts = uint(50)
|
||||||
|
|
|
@ -100,6 +100,7 @@ func (dn *deleteNode) Operate(in []Msg) []Msg {
|
||||||
log.Debug("Buffer delete request in DataNode", zap.String("traceID", traceID))
|
log.Debug("Buffer delete request in DataNode", zap.String("traceID", traceID))
|
||||||
tmpSegIDs, err := dn.bufferDeleteMsg(msg, fgMsg.timeRange, fgMsg.startPositions[0], fgMsg.endPositions[0])
|
tmpSegIDs, err := dn.bufferDeleteMsg(msg, fgMsg.timeRange, fgMsg.startPositions[0], fgMsg.endPositions[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// should not happen
|
||||||
// error occurs only when deleteMsg is misaligned, should not happen
|
// error occurs only when deleteMsg is misaligned, should not happen
|
||||||
log.Fatal("failed to buffer delete msg", zap.String("traceID", traceID), zap.Error(err))
|
log.Fatal("failed to buffer delete msg", zap.String("traceID", traceID), zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
"math"
|
"math"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
"go.uber.org/atomic"
|
"go.uber.org/atomic"
|
||||||
|
@ -485,6 +486,9 @@ func (ibNode *insertBufferNode) Sync(fgMsg *flowGraphMsg, seg2Upload []UniqueID,
|
||||||
task.dropped,
|
task.dropped,
|
||||||
endPosition)
|
endPosition)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if errors.Is(err, merr.ErrSegmentNotFound) {
|
||||||
|
return retry.Unrecoverable(err)
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -497,6 +501,15 @@ func (ibNode *insertBufferNode) Sync(fgMsg *flowGraphMsg, seg2Upload []UniqueID,
|
||||||
metrics.DataNodeAutoFlushBufferCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.TotalLabel).Inc()
|
metrics.DataNodeAutoFlushBufferCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.TotalLabel).Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if errors.Is(err, merr.ErrSegmentNotFound) {
|
||||||
|
if !segment.isValid() {
|
||||||
|
log.Info("try to flush a compacted segment, ignore..",
|
||||||
|
zap.Int64("segmentID", task.segmentID),
|
||||||
|
zap.Error(err))
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if merr.IsCanceledOrTimeout(err) {
|
if merr.IsCanceledOrTimeout(err) {
|
||||||
log.Warn("skip syncing buffer data for context done",
|
log.Warn("skip syncing buffer data for context done",
|
||||||
zap.Int64("segmentID", task.segmentID),
|
zap.Int64("segmentID", task.segmentID),
|
||||||
|
@ -504,6 +517,7 @@ func (ibNode *insertBufferNode) Sync(fgMsg *flowGraphMsg, seg2Upload []UniqueID,
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Fatal("insertBufferNode failed to flushBufferData",
|
log.Fatal("insertBufferNode failed to flushBufferData",
|
||||||
zap.Int64("segmentID", task.segmentID),
|
zap.Int64("segmentID", task.segmentID),
|
||||||
zap.Error(err),
|
zap.Error(err),
|
||||||
|
|
|
@ -21,8 +21,10 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"go.uber.org/atomic"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||||
|
@ -48,8 +50,17 @@ type ttNode struct {
|
||||||
BaseNode
|
BaseNode
|
||||||
vChannelName string
|
vChannelName string
|
||||||
channel Channel
|
channel Channel
|
||||||
lastUpdateTime time.Time
|
lastUpdateTime *atomic.Time
|
||||||
dataCoord types.DataCoord
|
dataCoord types.DataCoord
|
||||||
|
|
||||||
|
updateCPLock sync.Mutex
|
||||||
|
notifyChannel chan checkPoint
|
||||||
|
closeChannel chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
type checkPoint struct {
|
||||||
|
curTs time.Time
|
||||||
|
pos *msgpb.MsgPosition
|
||||||
}
|
}
|
||||||
|
|
||||||
// Name returns node name, implementing flowgraph.Node
|
// Name returns node name, implementing flowgraph.Node
|
||||||
|
@ -72,43 +83,45 @@ func (ttn *ttNode) IsValidInMsg(in []Msg) bool {
|
||||||
// Operate handles input messages, implementing flowgraph.Node
|
// Operate handles input messages, implementing flowgraph.Node
|
||||||
func (ttn *ttNode) Operate(in []Msg) []Msg {
|
func (ttn *ttNode) Operate(in []Msg) []Msg {
|
||||||
fgMsg := in[0].(*flowGraphMsg)
|
fgMsg := in[0].(*flowGraphMsg)
|
||||||
|
curTs, _ := tsoutil.ParseTS(fgMsg.timeRange.timestampMax)
|
||||||
if fgMsg.IsCloseMsg() {
|
if fgMsg.IsCloseMsg() {
|
||||||
if len(fgMsg.endPositions) > 0 {
|
if len(fgMsg.endPositions) > 0 {
|
||||||
|
close(ttn.closeChannel)
|
||||||
channelPos := ttn.channel.getChannelCheckpoint(fgMsg.endPositions[0])
|
channelPos := ttn.channel.getChannelCheckpoint(fgMsg.endPositions[0])
|
||||||
log.Info("flowgraph is closing, force update channel CP",
|
log.Info("flowgraph is closing, force update channel CP",
|
||||||
zap.Time("cpTs", tsoutil.PhysicalTime(channelPos.GetTimestamp())),
|
zap.Time("cpTs", tsoutil.PhysicalTime(channelPos.GetTimestamp())),
|
||||||
zap.String("channel", channelPos.GetChannelName()))
|
zap.String("channel", channelPos.GetChannelName()))
|
||||||
ttn.updateChannelCP(channelPos)
|
ttn.updateChannelCP(channelPos, curTs)
|
||||||
}
|
}
|
||||||
return in
|
return in
|
||||||
}
|
}
|
||||||
|
|
||||||
curTs, _ := tsoutil.ParseTS(fgMsg.timeRange.timestampMax)
|
// Do not block and async updateCheckPoint
|
||||||
channelPos := ttn.channel.getChannelCheckpoint(fgMsg.endPositions[0])
|
channelPos := ttn.channel.getChannelCheckpoint(fgMsg.endPositions[0])
|
||||||
log := log.With(zap.String("channel", ttn.vChannelName),
|
nonBlockingNotify := func() {
|
||||||
zap.Time("cpTs", tsoutil.PhysicalTime(channelPos.GetTimestamp())))
|
select {
|
||||||
if curTs.Sub(ttn.lastUpdateTime) >= updateChanCPInterval {
|
case ttn.notifyChannel <- checkPoint{curTs, channelPos}:
|
||||||
if err := ttn.updateChannelCP(channelPos); err == nil {
|
default:
|
||||||
ttn.lastUpdateTime = curTs
|
|
||||||
log.Info("update channel cp periodically")
|
|
||||||
return []Msg{}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if curTs.Sub(ttn.lastUpdateTime.Load()) >= updateChanCPInterval {
|
||||||
|
nonBlockingNotify()
|
||||||
|
return []Msg{}
|
||||||
|
}
|
||||||
|
|
||||||
if channelPos.GetTimestamp() >= ttn.channel.getFlushTs() {
|
if channelPos.GetTimestamp() >= ttn.channel.getFlushTs() {
|
||||||
if err := ttn.updateChannelCP(channelPos); err == nil {
|
nonBlockingNotify()
|
||||||
ttn.lastUpdateTime = curTs
|
|
||||||
log.Info("update channel cp at updateTs", zap.Time("updateTs", tsoutil.PhysicalTime(ttn.channel.getFlushTs())))
|
|
||||||
ttn.channel.setFlushTs(math.MaxUint64)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return []Msg{}
|
return []Msg{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ttn *ttNode) updateChannelCP(channelPos *msgpb.MsgPosition) error {
|
func (ttn *ttNode) updateChannelCP(channelPos *msgpb.MsgPosition, curTs time.Time) error {
|
||||||
channelCPTs, _ := tsoutil.ParseTS(channelPos.GetTimestamp())
|
ttn.updateCPLock.Lock()
|
||||||
|
defer ttn.updateCPLock.Unlock()
|
||||||
|
|
||||||
|
channelCPTs, _ := tsoutil.ParseTS(channelPos.GetTimestamp())
|
||||||
|
// TODO, change to ETCD operation, avoid datacoord operation
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), updateChanCPTimeout)
|
ctx, cancel := context.WithTimeout(context.Background(), updateChanCPTimeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
resp, err := ttn.dataCoord.UpdateChannelCheckpoint(ctx, &datapb.UpdateChannelCheckpointRequest{
|
resp, err := ttn.dataCoord.UpdateChannelCheckpoint(ctx, &datapb.UpdateChannelCheckpointRequest{
|
||||||
|
@ -124,6 +137,11 @@ func (ttn *ttNode) updateChannelCP(channelPos *msgpb.MsgPosition) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ttn.lastUpdateTime.Store(curTs)
|
||||||
|
// channelPos ts > flushTs means we could stop flush.
|
||||||
|
if channelPos.GetTimestamp() >= ttn.channel.getFlushTs() {
|
||||||
|
ttn.channel.setFlushTs(math.MaxUint64)
|
||||||
|
}
|
||||||
log.Info("UpdateChannelCheckpoint success",
|
log.Info("UpdateChannelCheckpoint success",
|
||||||
zap.String("channel", ttn.vChannelName),
|
zap.String("channel", ttn.vChannelName),
|
||||||
zap.Uint64("cpTs", channelPos.GetTimestamp()),
|
zap.Uint64("cpTs", channelPos.GetTimestamp()),
|
||||||
|
@ -140,9 +158,23 @@ func newTTNode(config *nodeConfig, dc types.DataCoord) (*ttNode, error) {
|
||||||
BaseNode: baseNode,
|
BaseNode: baseNode,
|
||||||
vChannelName: config.vChannelName,
|
vChannelName: config.vChannelName,
|
||||||
channel: config.channel,
|
channel: config.channel,
|
||||||
lastUpdateTime: time.Time{}, // set to Zero to update channel checkpoint immediately after fg started
|
lastUpdateTime: atomic.NewTime(time.Time{}), // set to Zero to update channel checkpoint immediately after fg started
|
||||||
dataCoord: dc,
|
dataCoord: dc,
|
||||||
|
notifyChannel: make(chan checkPoint, 1),
|
||||||
|
closeChannel: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check point updater
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-tt.closeChannel:
|
||||||
|
return
|
||||||
|
case cp := <-tt.notifyChannel:
|
||||||
|
tt.updateChannelCP(cp.pos, cp.curTs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
return tt, nil
|
return tt, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -578,7 +578,7 @@ func (m *rendezvousFlushManager) injectFlush(injection *taskInjection, segments
|
||||||
// fetch meta info for segment
|
// fetch meta info for segment
|
||||||
func (m *rendezvousFlushManager) getSegmentMeta(segmentID UniqueID, pos *msgpb.MsgPosition) (UniqueID, UniqueID, *etcdpb.CollectionMeta, error) {
|
func (m *rendezvousFlushManager) getSegmentMeta(segmentID UniqueID, pos *msgpb.MsgPosition) (UniqueID, UniqueID, *etcdpb.CollectionMeta, error) {
|
||||||
if !m.hasSegment(segmentID, true) {
|
if !m.hasSegment(segmentID, true) {
|
||||||
return -1, -1, nil, fmt.Errorf("no such segment %d in the channel", segmentID)
|
return -1, -1, nil, merr.WrapErrSegmentNotFound(segmentID, "segment not found during flush")
|
||||||
}
|
}
|
||||||
|
|
||||||
// fetch meta information of segment
|
// fetch meta information of segment
|
||||||
|
|
|
@ -362,6 +362,8 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments
|
||||||
oneSegment int64
|
oneSegment int64
|
||||||
channel Channel
|
channel Channel
|
||||||
err error
|
err error
|
||||||
|
ds *dataSyncService
|
||||||
|
ok bool
|
||||||
)
|
)
|
||||||
|
|
||||||
for _, fromSegment := range req.GetCompactedFrom() {
|
for _, fromSegment := range req.GetCompactedFrom() {
|
||||||
|
@ -370,6 +372,11 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments
|
||||||
log.Ctx(ctx).Warn("fail to get the channel", zap.Int64("segment", fromSegment), zap.Error(err))
|
log.Ctx(ctx).Warn("fail to get the channel", zap.Int64("segment", fromSegment), zap.Error(err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
ds, ok = node.flowgraphManager.getFlowgraphService(channel.getChannelName(fromSegment))
|
||||||
|
if !ok {
|
||||||
|
log.Ctx(ctx).Warn("fail to find flow graph service", zap.Int64("segment", fromSegment))
|
||||||
|
continue
|
||||||
|
}
|
||||||
oneSegment = fromSegment
|
oneSegment = fromSegment
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -392,9 +399,9 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments
|
||||||
return merr.Status(err), nil
|
return merr.Status(err), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := channel.mergeFlushedSegments(ctx, targetSeg, req.GetPlanID(), req.GetCompactedFrom()); err != nil {
|
ds.fg.Blockall()
|
||||||
return merr.Status(err), nil
|
defer ds.fg.Unblock()
|
||||||
}
|
channel.mergeFlushedSegments(ctx, targetSeg, req.GetPlanID(), req.GetCompactedFrom())
|
||||||
node.compactionExecutor.injectDone(req.GetPlanID(), true)
|
node.compactionExecutor.injectDone(req.GetPlanID(), true)
|
||||||
return merr.Status(nil), nil
|
return merr.Status(nil), nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -701,13 +701,7 @@ func (s *DataNodeServicesSuite) TestSyncSegments() {
|
||||||
CompactedTo: 102,
|
CompactedTo: 102,
|
||||||
NumOfRows: 100,
|
NumOfRows: 100,
|
||||||
}
|
}
|
||||||
cancelCtx, cancel := context.WithCancel(context.Background())
|
status, err := s.node.SyncSegments(s.ctx, req)
|
||||||
cancel()
|
|
||||||
status, err := s.node.SyncSegments(cancelCtx, req)
|
|
||||||
s.Assert().NoError(err)
|
|
||||||
s.Assert().False(merr.Ok(status))
|
|
||||||
|
|
||||||
status, err = s.node.SyncSegments(s.ctx, req)
|
|
||||||
s.Assert().NoError(err)
|
s.Assert().NoError(err)
|
||||||
s.Assert().True(merr.Ok(status))
|
s.Assert().True(merr.Ok(status))
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,8 @@ const (
|
||||||
// TODO: better to be configured
|
// TODO: better to be configured
|
||||||
nodeCtxTtInterval = 2 * time.Minute
|
nodeCtxTtInterval = 2 * time.Minute
|
||||||
enableTtChecker = true
|
enableTtChecker = true
|
||||||
|
// blockAll should wait no more than 10 seconds
|
||||||
|
blockAllWait = 10 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// Node is the interface defines the behavior of flowgraph
|
// Node is the interface defines the behavior of flowgraph
|
||||||
|
@ -74,7 +76,13 @@ func (nodeCtx *nodeCtx) Start() {
|
||||||
func (nodeCtx *nodeCtx) Block() {
|
func (nodeCtx *nodeCtx) Block() {
|
||||||
// input node operate function will be blocking
|
// input node operate function will be blocking
|
||||||
if !nodeCtx.node.IsInputNode() {
|
if !nodeCtx.node.IsInputNode() {
|
||||||
|
startTs := time.Now()
|
||||||
nodeCtx.blockMutex.Lock()
|
nodeCtx.blockMutex.Lock()
|
||||||
|
if time.Since(startTs) >= blockAllWait {
|
||||||
|
log.Warn("flow graph wait for long time",
|
||||||
|
zap.String("name", nodeCtx.node.Name()),
|
||||||
|
zap.Duration("wait time", time.Since(startTs)))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -134,3 +134,13 @@ func TestContextCancel(t *testing.T) {
|
||||||
assert.True(t, merr.IsCanceledOrTimeout(err))
|
assert.True(t, merr.IsCanceledOrTimeout(err))
|
||||||
t.Log(err)
|
t.Log(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestWrap(t *testing.T) {
|
||||||
|
err := merr.WrapErrSegmentNotFound(1, "failed to get Segment")
|
||||||
|
assert.True(t, errors.Is(err, merr.ErrSegmentNotFound))
|
||||||
|
assert.True(t, IsRecoverable(err))
|
||||||
|
err2 := Unrecoverable(err)
|
||||||
|
fmt.Println(err2)
|
||||||
|
assert.True(t, errors.Is(err2, merr.ErrSegmentNotFound))
|
||||||
|
assert.False(t, IsRecoverable(err2))
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue