fix: backoff will retry infinitely after reaching max elapse (#40589)

issue: #40588

Signed-off-by: chyezh <chyezh@outlook.com>
pull/40643/head
Zhen Ye 2025-03-13 16:24:06 +08:00 committed by GitHub
parent e93d53ca28
commit f6fb4bc442
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 35 additions and 40 deletions

View File

@ -130,6 +130,7 @@ func (rc *resumableConsumerImpl) createNewConsumer(opts *handler.ConsumerOptions
backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for {
// Create a new consumer.
// a underlying stream consumer life time should be equal to the resumable producer.

View File

@ -152,7 +152,8 @@ func (p *ResumableProducer) waitUntilUnavailable(producer handler.Producer) erro
func (p *ResumableProducer) createNewProducer() (producer.Producer, error) {
backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 2 * time.Second
backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for {
// Create a new producer.
// a underlying stream producer life time should be equal to the resumable producer.

View File

@ -27,6 +27,7 @@ import (
"github.com/cockroachdb/errors"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/util/streamingutil"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/metrics"
"github.com/milvus-io/milvus/pkg/v2/mq/common"
@ -179,19 +180,23 @@ func newDmlChannels(initCtx context.Context, factory msgstream.Factory, chanName
}
for i, name := range names {
ms, err := factory.NewMsgStream(initCtx)
if err != nil {
log.Ctx(initCtx).Error("Failed to add msgstream",
zap.String("name", name),
zap.Error(err))
panic("Failed to add msgstream")
}
var ms msgstream.MsgStream
if !streamingutil.IsStreamingServiceEnabled() {
var err error
ms, err = factory.NewMsgStream(initCtx)
if err != nil {
log.Ctx(initCtx).Error("Failed to add msgstream",
zap.String("name", name),
zap.Error(err))
panic("Failed to add msgstream")
}
if params.PreCreatedTopicEnabled.GetAsBool() {
d.checkPreCreatedTopic(initCtx, factory, name)
}
if params.PreCreatedTopicEnabled.GetAsBool() {
d.checkPreCreatedTopic(initCtx, factory, name)
}
ms.AsProducer(initCtx, []string{name})
ms.AsProducer(initCtx, []string{name})
}
dms := &dmlMsgStream{
ms: ms,
refcnt: 0,

View File

@ -155,6 +155,9 @@ type handlerCreateFunc func(ctx context.Context, assign *types.PChannelInfoAssig
func (hc *handlerClientImpl) createHandlerAfterStreamingNodeReady(ctx context.Context, logger *log.MLogger, pchannel string, create handlerCreateFunc) (any, error) {
// TODO: backoff should be configurable.
backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for {
assign := hc.watcher.Get(ctx, pchannel)
if assign != nil {

View File

@ -117,6 +117,7 @@ func (m *pchannelCheckpointManager) background(previous message.MessageID) {
backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = 100 * time.Millisecond
backoff.MaxInterval = 10 * time.Second
backoff.MaxElapsedTime = 0
for {
current, err := m.blockUntilCheckpointUpdate(previous)
if err != nil {

View File

@ -2,6 +2,7 @@ package lazygrpc
import (
"context"
"time"
"github.com/cenkalti/backoff/v4"
"github.com/cockroachdb/errors"
@ -50,6 +51,11 @@ type connImpl struct {
func (c *connImpl) initialize() {
defer c.initializationNotifier.Finish(struct{}{})
newBackOff := backoff.NewExponentialBackOff()
newBackOff.InitialInterval = 100 * time.Millisecond
newBackOff.MaxInterval = 10 * time.Second
newBackOff.MaxElapsedTime = 0
backoff.Retry(func() error {
conn, err := c.dialer(c.initializationNotifier.Context())
if err != nil {
@ -62,7 +68,7 @@ func (c *connImpl) initialize() {
}
c.conn.Set(conn)
return nil
}, backoff.NewExponentialBackOff())
}, newBackOff)
}
func (c *connImpl) GetConn(ctx context.Context) (*grpc.ClientConn, error) {

View File

@ -55,13 +55,11 @@ type BackoffTimer struct {
func (t *BackoffTimer) EnableBackoff() {
if t.backoff == nil {
cfg := t.configFetcher.BackoffConfig()
defaultInterval := t.configFetcher.DefaultInterval()
backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = cfg.InitialInterval
backoff.Multiplier = cfg.Multiplier
backoff.MaxInterval = cfg.MaxInterval
backoff.MaxElapsedTime = defaultInterval
backoff.Stop = defaultInterval
backoff.MaxElapsedTime = 0
backoff.Reset()
t.backoff = backoff
}
@ -72,14 +70,6 @@ func (t *BackoffTimer) DisableBackoff() {
t.backoff = nil
}
// IsBackoffStopped returns the elapsed time of backoff
func (t *BackoffTimer) IsBackoffStopped() bool {
if t.backoff != nil {
return t.backoff.GetElapsedTime() > t.backoff.MaxElapsedTime
}
return true
}
// NextTimer returns the next timer and the duration of the timer
func (t *BackoffTimer) NextTimer() (<-chan time.Time, time.Duration) {
nextBackoff := t.NextInterval()
@ -98,13 +88,11 @@ func (t *BackoffTimer) NextInterval() time.Duration {
// NewBackoffWithInstant creates a new backoff with instant
func NewBackoffWithInstant(fetcher BackoffTimerConfigFetcher) *BackoffWithInstant {
cfg := fetcher.BackoffConfig()
defaultInterval := fetcher.DefaultInterval()
backoff := backoff.NewExponentialBackOff()
backoff.InitialInterval = cfg.InitialInterval
backoff.Multiplier = cfg.Multiplier
backoff.MaxInterval = cfg.MaxInterval
backoff.MaxElapsedTime = defaultInterval
backoff.Stop = defaultInterval
backoff.MaxElapsedTime = 0
backoff.Reset()
return &BackoffWithInstant{
backoff: backoff,

View File

@ -21,24 +21,14 @@ func TestBackoffTimer(t *testing.T) {
assert.Equal(t, time.Second, b.NextInterval())
assert.Equal(t, time.Second, b.NextInterval())
assert.Equal(t, time.Second, b.NextInterval())
assert.True(t, b.IsBackoffStopped())
b.EnableBackoff()
assert.False(t, b.IsBackoffStopped())
timer, backoff := b.NextTimer()
assert.Less(t, backoff, 200*time.Millisecond)
for {
<-timer
if b.IsBackoffStopped() {
break
}
timer, _ = b.NextTimer()
}
assert.True(t, b.IsBackoffStopped())
assert.Equal(t, time.Second, b.NextInterval())
<-timer
_, backoff = b.NextTimer()
assert.NotZero(t, backoff)
b.DisableBackoff()
assert.Equal(t, time.Second, b.NextInterval())
assert.True(t, b.IsBackoffStopped())
}
}