mirror of https://github.com/milvus-io/milvus.git
174 lines
6.4 KiB
Go
174 lines
6.4 KiB
Go
package walmanager
|
|
|
|
import (
|
|
"context"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/internal/streamingnode/server/resource"
|
|
"github.com/milvus-io/milvus/internal/streamingnode/server/wal"
|
|
"github.com/milvus-io/milvus/internal/util/streamingutil/status"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/streaming/util/types"
|
|
)
|
|
|
|
// newWALLifetime create a WALLifetime with opener.
|
|
func newWALLifetime(opener wal.Opener, channel string) *walLifetime {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
l := &walLifetime{
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
channel: channel,
|
|
finish: make(chan struct{}),
|
|
opener: opener,
|
|
statePair: newWALStatePair(),
|
|
logger: log.With(zap.String("channel", channel)),
|
|
}
|
|
go l.backgroundTask()
|
|
return l
|
|
}
|
|
|
|
// walLifetime is the lifetime management of a wal.
|
|
// It promise a wal is keep state consistency in distributed environment.
|
|
// All operation on wal management will be sorted with following rules:
|
|
// (term, available) illuminate the state of wal.
|
|
// term is always increasing, available is always before unavailable in same term, such as:
|
|
// (-1, false) -> (0, true) -> (1, true) -> (2, true) -> (3, false) -> (7, true) -> ...
|
|
type walLifetime struct {
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
channel string
|
|
|
|
finish chan struct{}
|
|
opener wal.Opener
|
|
statePair *walStatePair
|
|
logger *log.MLogger
|
|
}
|
|
|
|
// GetWAL returns a available wal instance for the channel.
|
|
// Return nil if the wal is not available now.
|
|
func (w *walLifetime) GetWAL() wal.WAL {
|
|
return w.statePair.GetWAL()
|
|
}
|
|
|
|
// Open opens a wal instance for the channel on this Manager.
|
|
func (w *walLifetime) Open(ctx context.Context, channel types.PChannelInfo) error {
|
|
// Set expected WAL state to available at given term.
|
|
expected := newAvailableExpectedState(ctx, channel)
|
|
if !w.statePair.SetExpectedState(expected) {
|
|
return status.NewIgnoreOperation("channel %s with expired term %d, cannot change expected state for open", channel.Name, channel.Term)
|
|
}
|
|
|
|
// Wait until the WAL state is ready or term expired or error occurs.
|
|
return w.statePair.WaitCurrentStateReachExpected(ctx, expected)
|
|
}
|
|
|
|
// Remove removes the wal instance for the channel on this Manager.
|
|
func (w *walLifetime) Remove(ctx context.Context, term int64) error {
|
|
// Set expected WAL state to unavailable at given term.
|
|
expected := newUnavailableExpectedState(term)
|
|
if !w.statePair.SetExpectedState(expected) {
|
|
return status.NewIgnoreOperation("expired term %d, cannot change expected state for remove", term)
|
|
}
|
|
|
|
// Wait until the WAL state is ready or term expired or error occurs.
|
|
return w.statePair.WaitCurrentStateReachExpected(ctx, expected)
|
|
}
|
|
|
|
// Close closes the wal lifetime.
|
|
func (w *walLifetime) Close() {
|
|
// Close all background task.
|
|
w.cancel()
|
|
<-w.finish
|
|
|
|
// No background task is running now, close current wal if needed.
|
|
currentState := w.statePair.GetCurrentState()
|
|
logger := log.With(zap.String("current", toStateString(currentState)))
|
|
if oldWAL := currentState.GetWAL(); oldWAL != nil {
|
|
oldWAL.Close()
|
|
logger.Info("close current term wal done at wal life time close")
|
|
}
|
|
logger.Info("wal lifetime closed")
|
|
}
|
|
|
|
// backgroundTask is the background task for wal manager.
|
|
// wal open/close operation is executed in background task with single goroutine.
|
|
func (w *walLifetime) backgroundTask() {
|
|
defer func() {
|
|
w.logger.Info("wal lifetime background task exit")
|
|
close(w.finish)
|
|
}()
|
|
|
|
// wait for expectedState change.
|
|
expectedState := initialExpectedWALState
|
|
for {
|
|
// single wal open/close operation should be serialized.
|
|
if err := w.statePair.WaitExpectedStateChanged(w.ctx, expectedState); err != nil {
|
|
// context canceled. break the background task.
|
|
return
|
|
}
|
|
expectedState = w.statePair.GetExpectedState()
|
|
w.logger.Info("expected state changed, do a life cycle", zap.String("expected", toStateString(expectedState)))
|
|
w.doLifetimeChanged(expectedState)
|
|
}
|
|
}
|
|
|
|
// doLifetimeChanged executes the wal open/close operation once.
|
|
func (w *walLifetime) doLifetimeChanged(expectedState expectedWALState) {
|
|
currentState := w.statePair.GetCurrentState()
|
|
logger := w.logger.With(zap.String("expected", toStateString(expectedState)), zap.String("current", toStateString(currentState)))
|
|
|
|
// Filter the expired expectedState.
|
|
if !isStateBefore(currentState, expectedState) {
|
|
// Happen at: the unavailable expected state at current term, but current wal open operation is failed.
|
|
logger.Info("current state is not before expected state, do nothing")
|
|
return
|
|
}
|
|
|
|
// !!! Even if the expected state is canceled (context.Context.Err()), following operation must be executed.
|
|
// Otherwise a dead lock may be caused by unexpected rpc sequence.
|
|
// because new Current state after these operation must be same or greater than expected state.
|
|
|
|
// term must be increasing or available -> unavailable, close current term wal is always applied.
|
|
term := currentState.Term()
|
|
if oldWAL := currentState.GetWAL(); oldWAL != nil {
|
|
resource.Resource().Flusher().UnregisterPChannel(w.channel)
|
|
oldWAL.Close()
|
|
logger.Info("close current term wal done")
|
|
// Push term to current state unavailable and open a new wal.
|
|
// -> (currentTerm,false)
|
|
w.statePair.SetCurrentState(newUnavailableCurrentState(term, nil))
|
|
}
|
|
|
|
// If expected state is unavailable, change term to expected state and return.
|
|
if !expectedState.Available() {
|
|
// -> (expectedTerm,false)
|
|
w.statePair.SetCurrentState(newUnavailableCurrentState(expectedState.Term(), nil))
|
|
return
|
|
}
|
|
|
|
// If expected state is available, open a new wal.
|
|
// TODO: merge the expectedState and expected state context together.
|
|
l, err := w.opener.Open(expectedState.Context(), &wal.OpenOption{
|
|
Channel: expectedState.GetPChannelInfo(),
|
|
})
|
|
if err != nil {
|
|
logger.Warn("open new wal fail", zap.Error(err))
|
|
// Open new wal at expected term failed, push expected term to current state unavailable.
|
|
// -> (expectedTerm,false)
|
|
w.statePair.SetCurrentState(newUnavailableCurrentState(expectedState.Term(), err))
|
|
return
|
|
}
|
|
logger.Info("open new wal done")
|
|
err = resource.Resource().Flusher().RegisterPChannel(w.channel, l)
|
|
if err != nil {
|
|
logger.Warn("open flusher fail", zap.Error(err))
|
|
w.statePair.SetCurrentState(newUnavailableCurrentState(expectedState.Term(), err))
|
|
// wal is opened, if register flusher failure, we should close the wal.
|
|
l.Close()
|
|
return
|
|
}
|
|
// -> (expectedTerm,true)
|
|
w.statePair.SetCurrentState(newAvailableCurrentState(l))
|
|
}
|