mirror of https://github.com/milvus-io/milvus.git
112 lines
4.3 KiB
Go
112 lines
4.3 KiB
Go
package flusherimpl
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/internal/streamingnode/server/resource"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/rootcoordpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
|
|
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message/adaptor"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/conc"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/retry"
|
|
)
|
|
|
|
// getVchannels gets the vchannels of current pchannel.
|
|
func (impl *WALFlusherImpl) getVchannels(ctx context.Context, pchannel string) ([]string, error) {
|
|
var vchannels []string
|
|
rc, err := resource.Resource().RootCoordClient().GetWithContext(ctx)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "when wait for rootcoord client ready")
|
|
}
|
|
retryCnt := -1
|
|
if err := retry.Do(ctx, func() error {
|
|
retryCnt++
|
|
resp, err := rc.GetPChannelInfo(ctx, &rootcoordpb.GetPChannelInfoRequest{
|
|
Pchannel: pchannel,
|
|
})
|
|
if err = merr.CheckRPCCall(resp, err); err != nil {
|
|
log.Warn("get pchannel info failed", zap.Error(err), zap.Int("retryCnt", retryCnt))
|
|
return err
|
|
}
|
|
for _, collection := range resp.GetCollections() {
|
|
vchannels = append(vchannels, collection.Vchannel)
|
|
}
|
|
return nil
|
|
}, retry.AttemptAlways()); err != nil {
|
|
return nil, errors.Wrapf(err, "when get existed vchannels of pchannel")
|
|
}
|
|
return vchannels, nil
|
|
}
|
|
|
|
// getRecoveryInfos gets the recovery info of the vchannels from datacoord
|
|
func (impl *WALFlusherImpl) getRecoveryInfos(ctx context.Context, vchannel []string) (map[string]*datapb.GetChannelRecoveryInfoResponse, map[string]message.MessageID, error) {
|
|
futures := make([]*conc.Future[interface{}], 0, len(vchannel))
|
|
for _, v := range vchannel {
|
|
v := v
|
|
future := GetExecPool().Submit(func() (interface{}, error) {
|
|
return impl.getRecoveryInfo(ctx, v)
|
|
})
|
|
futures = append(futures, future)
|
|
}
|
|
recoveryInfos := make(map[string]*datapb.GetChannelRecoveryInfoResponse, len(futures))
|
|
for i, future := range futures {
|
|
resp, err := future.Await()
|
|
if err == nil {
|
|
recoveryInfos[vchannel[i]] = resp.(*datapb.GetChannelRecoveryInfoResponse)
|
|
continue
|
|
}
|
|
if errors.Is(err, errChannelLifetimeUnrecoverable) {
|
|
impl.logger.Warn("channel has been dropped, skip to recover flusher for vchannel", zap.String("vchannel", vchannel[i]))
|
|
continue
|
|
}
|
|
return nil, nil, errors.Wrapf(err, "when get recovery info of vchannel %s", vchannel[i])
|
|
}
|
|
messageIDs := make(map[string]message.MessageID, len(recoveryInfos))
|
|
for v, info := range recoveryInfos {
|
|
messageIDs[v] = adaptor.MustGetMessageIDFromMQWrapperIDBytes(impl.wal.Get().WALName(), info.GetInfo().GetSeekPosition().GetMsgID())
|
|
}
|
|
return recoveryInfos, messageIDs, nil
|
|
}
|
|
|
|
// getRecoveryInfo gets the recovery info of the vchannel.
|
|
func (impl *WALFlusherImpl) getRecoveryInfo(ctx context.Context, vchannel string) (*datapb.GetChannelRecoveryInfoResponse, error) {
|
|
var resp *datapb.GetChannelRecoveryInfoResponse
|
|
retryCnt := -1
|
|
err := retry.Do(ctx, func() error {
|
|
retryCnt++
|
|
dc, err := resource.Resource().DataCoordClient().GetWithContext(ctx)
|
|
if err != nil {
|
|
// Should never failed at here.
|
|
return err
|
|
}
|
|
resp, err = dc.GetChannelRecoveryInfo(ctx, &datapb.GetChannelRecoveryInfoRequest{Vchannel: vchannel})
|
|
err = merr.CheckRPCCall(resp, err)
|
|
if errors.Is(err, merr.ErrChannelNotAvailable) {
|
|
impl.logger.Warn("channel not available because of collection dropped", zap.String("vchannel", vchannel), zap.Int("retryCnt", retryCnt))
|
|
return retry.Unrecoverable(errChannelLifetimeUnrecoverable)
|
|
}
|
|
if err != nil {
|
|
impl.logger.Warn("get channel recovery info failed", zap.Error(err), zap.String("vchannel", vchannel), zap.Int("retryCnt", retryCnt))
|
|
return err
|
|
}
|
|
// The channel has been dropped, skip to recover it.
|
|
if isDroppedChannel(resp) {
|
|
impl.logger.Info("channel has been dropped, the vchannel can not be recovered", zap.String("vchannel", vchannel))
|
|
return retry.Unrecoverable(errChannelLifetimeUnrecoverable)
|
|
}
|
|
return nil
|
|
}, retry.AttemptAlways())
|
|
return resp, err
|
|
}
|
|
|
|
func isDroppedChannel(resp *datapb.GetChannelRecoveryInfoResponse) bool {
|
|
return len(resp.GetInfo().GetSeekPosition().GetMsgID()) == 0 && resp.GetInfo().GetSeekPosition().GetTimestamp() == math.MaxUint64
|
|
}
|