influxdb/_vendor/raft/peer.go

321 lines
8.7 KiB
Go
Raw Normal View History

2013-04-28 04:51:17 +00:00
package raft
2013-05-05 19:36:23 +00:00
import (
"sync"
2013-05-05 20:26:04 +00:00
"time"
2013-05-05 19:36:23 +00:00
)
2013-04-28 04:51:17 +00:00
//------------------------------------------------------------------------------
//
// Typedefs
//
//------------------------------------------------------------------------------
// A peer is a reference to another server involved in the consensus protocol.
type Peer struct {
server *server
Name string `json:"name"`
ConnectionString string `json:"connectionString"`
prevLogIndex uint64
stopChan chan bool
heartbeatInterval time.Duration
2014-02-28 16:20:00 +00:00
lastActivity time.Time
sync.RWMutex
2013-04-28 04:51:17 +00:00
}
//------------------------------------------------------------------------------
//
// Constructor
//
//------------------------------------------------------------------------------
// Creates a new peer.
func newPeer(server *server, name string, connectionString string, heartbeatInterval time.Duration) *Peer {
2013-07-07 20:21:04 +00:00
return &Peer{
server: server,
Name: name,
ConnectionString: connectionString,
heartbeatInterval: heartbeatInterval,
2013-05-05 19:36:23 +00:00
}
2013-04-28 04:51:17 +00:00
}
//------------------------------------------------------------------------------
//
// Accessors
//
//------------------------------------------------------------------------------
2013-05-05 20:26:04 +00:00
// Sets the heartbeat timeout.
func (p *Peer) setHeartbeatInterval(duration time.Duration) {
p.heartbeatInterval = duration
2013-05-05 20:26:04 +00:00
}
2013-07-07 22:12:24 +00:00
//--------------------------------------
// Prev log index
//--------------------------------------
// Retrieves the previous log index.
func (p *Peer) getPrevLogIndex() uint64 {
p.RLock()
defer p.RUnlock()
2013-07-07 22:12:24 +00:00
return p.prevLogIndex
}
// Sets the previous log index.
func (p *Peer) setPrevLogIndex(value uint64) {
p.Lock()
defer p.Unlock()
2013-07-07 22:12:24 +00:00
p.prevLogIndex = value
}
func (p *Peer) setLastActivity(now time.Time) {
p.Lock()
defer p.Unlock()
p.lastActivity = now
}
2013-05-05 19:36:23 +00:00
//------------------------------------------------------------------------------
//
// Methods
//
//------------------------------------------------------------------------------
//--------------------------------------
2013-07-07 20:21:04 +00:00
// Heartbeat
//--------------------------------------
2013-07-07 20:21:04 +00:00
// Starts the peer heartbeat.
func (p *Peer) startHeartbeat() {
p.stopChan = make(chan bool)
2013-07-07 20:21:04 +00:00
c := make(chan bool)
p.setLastActivity(time.Now())
p.server.routineGroup.Add(1)
go func() {
defer p.server.routineGroup.Done()
p.heartbeat(c)
}()
2013-07-07 20:55:55 +00:00
<-c
2013-07-07 20:21:04 +00:00
}
// Stops the peer heartbeat.
func (p *Peer) stopHeartbeat(flush bool) {
p.setLastActivity(time.Time{})
p.stopChan <- flush
}
2014-02-28 16:20:00 +00:00
// LastActivity returns the last time any response was received from the peer.
func (p *Peer) LastActivity() time.Time {
p.RLock()
defer p.RUnlock()
2014-02-28 16:20:00 +00:00
return p.lastActivity
}
2013-06-26 18:25:22 +00:00
//--------------------------------------
// Copying
//--------------------------------------
// Clones the state of the peer. The clone is not attached to a server and
// the heartbeat timer will not exist.
func (p *Peer) clone() *Peer {
p.Lock()
defer p.Unlock()
2013-06-26 18:25:22 +00:00
return &Peer{
2013-08-15 23:35:01 +00:00
Name: p.Name,
ConnectionString: p.ConnectionString,
prevLogIndex: p.prevLogIndex,
2014-02-28 16:20:00 +00:00
lastActivity: p.lastActivity,
2013-06-26 18:25:22 +00:00
}
}
//--------------------------------------
2013-07-07 20:21:04 +00:00
// Heartbeat
//--------------------------------------
2013-07-07 20:21:04 +00:00
// Listens to the heartbeat timeout and flushes an AppendEntries RPC.
func (p *Peer) heartbeat(c chan bool) {
stopChan := p.stopChan
2013-07-07 20:21:04 +00:00
c <- true
2013-05-05 19:36:23 +00:00
ticker := time.Tick(p.heartbeatInterval)
2013-11-27 04:28:50 +00:00
debugln("peer.heartbeat: ", p.Name, p.heartbeatInterval)
2013-07-09 02:55:00 +00:00
2013-07-07 20:21:04 +00:00
for {
select {
case flush := <-stopChan:
if flush {
// before we can safely remove a node
// we must flush the remove command to the node first
2013-08-13 04:01:30 +00:00
p.flush()
debugln("peer.heartbeat.stop.with.flush: ", p.Name)
return
} else {
debugln("peer.heartbeat.stop: ", p.Name)
return
}
2013-11-27 04:28:50 +00:00
case <-ticker:
2014-01-21 16:24:28 +00:00
start := time.Now()
2013-08-13 04:01:30 +00:00
p.flush()
2014-01-21 16:24:28 +00:00
duration := time.Now().Sub(start)
p.server.DispatchEvent(newEvent(HeartbeatEventType, duration, nil))
2013-07-07 20:21:04 +00:00
}
2013-07-01 15:46:53 +00:00
}
}
2013-08-13 04:01:30 +00:00
func (p *Peer) flush() {
debugln("peer.heartbeat.flush: ", p.Name)
prevLogIndex := p.getPrevLogIndex()
term := p.server.currentTerm
entries, prevLogTerm := p.server.log.getEntriesAfter(prevLogIndex, p.server.maxLogEntriesPerRequest)
if entries != nil {
p.sendAppendEntriesRequest(newAppendEntriesRequest(term, prevLogIndex, prevLogTerm, p.server.log.CommitIndex(), p.server.name, entries))
} else {
p.sendSnapshotRequest(newSnapshotRequest(p.server.name, p.server.snapshot))
}
}
2013-07-07 20:21:04 +00:00
//--------------------------------------
// Append Entries
//--------------------------------------
2013-06-05 05:56:59 +00:00
2013-07-07 20:21:04 +00:00
// Sends an AppendEntries request to the peer through the transport.
func (p *Peer) sendAppendEntriesRequest(req *AppendEntriesRequest) {
tracef("peer.append.send: %s->%s [prevLog:%v length: %v]\n",
p.server.Name(), p.Name, req.PrevLogIndex, len(req.Entries))
2013-07-07 22:12:24 +00:00
resp := p.server.Transporter().SendAppendEntriesRequest(p.server, p, req)
2013-05-05 19:36:23 +00:00
if resp == nil {
p.server.DispatchEvent(newEvent(HeartbeatIntervalEventType, p, nil))
debugln("peer.append.timeout: ", p.server.Name(), "->", p.Name)
2013-07-07 20:21:04 +00:00
return
2013-05-05 19:36:23 +00:00
}
traceln("peer.append.resp: ", p.server.Name(), "<-", p.Name)
2013-05-05 20:26:04 +00:00
p.setLastActivity(time.Now())
2013-07-07 20:21:04 +00:00
// If successful then update the previous log index.
p.Lock()
2014-01-12 07:40:55 +00:00
if resp.Success() {
2013-05-05 19:36:23 +00:00
if len(req.Entries) > 0 {
p.prevLogIndex = req.Entries[len(req.Entries)-1].GetIndex()
// if peer append a log entry from the current term
// we set append to true
if req.Entries[len(req.Entries)-1].GetTerm() == p.server.currentTerm {
resp.append = true
}
2013-05-05 19:36:23 +00:00
}
traceln("peer.append.resp.success: ", p.Name, "; idx =", p.prevLogIndex)
2013-07-07 20:55:55 +00:00
// If it was unsuccessful then decrement the previous log index and
// we'll try again next time.
2013-07-07 20:21:04 +00:00
} else {
if resp.Term() > p.server.Term() {
2014-01-23 16:50:09 +00:00
// this happens when there is a new leader comes up that this *leader* has not
// known yet.
2014-01-23 16:45:41 +00:00
// this server can know until the new leader send a ae with higher term
2014-01-23 16:50:09 +00:00
// or this server finish processing this response.
2014-01-23 16:45:41 +00:00
debugln("peer.append.resp.not.update: new.leader.found")
} else if resp.Term() == req.Term && resp.CommitIndex() >= p.prevLogIndex {
// we may miss a response from peer
// so maybe the peer has committed the logs we just sent
// but we did not receive the successful reply and did not increase
// the prevLogIndex
// peer failed to truncate the log and sent a fail reply at this time
// we just need to update peer's prevLog index to commitIndex
2014-01-12 07:40:55 +00:00
p.prevLogIndex = resp.CommitIndex()
debugln("peer.append.resp.update: ", p.Name, "; idx =", p.prevLogIndex)
} else if p.prevLogIndex > 0 {
// Decrement the previous log index down until we find a match. Don't
// let it go below where the peer's commit index is though. That's a
// problem.
p.prevLogIndex--
// if it not enough, we directly decrease to the index of the
2014-01-12 07:40:55 +00:00
if p.prevLogIndex > resp.Index() {
p.prevLogIndex = resp.Index()
}
debugln("peer.append.resp.decrement: ", p.Name, "; idx =", p.prevLogIndex)
2013-05-05 19:36:23 +00:00
}
2013-07-07 20:21:04 +00:00
}
p.Unlock()
2013-05-05 19:36:23 +00:00
// Attach the peer to resp, thus server can know where it comes from
2013-08-15 23:35:01 +00:00
resp.peer = p.Name
2013-07-07 20:21:04 +00:00
// Send response to server for processing.
p.server.sendAsync(resp)
2013-07-07 20:21:04 +00:00
}
// Sends an Snapshot request to the peer through the transport.
func (p *Peer) sendSnapshotRequest(req *SnapshotRequest) {
2013-08-15 23:35:01 +00:00
debugln("peer.snap.send: ", p.Name)
2013-07-07 20:21:04 +00:00
2013-07-07 22:12:24 +00:00
resp := p.server.Transporter().SendSnapshotRequest(p.server, p, req)
2013-07-07 20:21:04 +00:00
if resp == nil {
2013-08-15 23:35:01 +00:00
debugln("peer.snap.timeout: ", p.Name)
2013-07-07 20:21:04 +00:00
return
}
2013-07-07 20:21:04 +00:00
2013-08-15 23:35:01 +00:00
debugln("peer.snap.recv: ", p.Name)
2013-07-07 20:21:04 +00:00
2013-07-17 00:40:19 +00:00
// If successful, the peer should have been to snapshot state
// Send it the snapshot!
p.setLastActivity(time.Now())
2013-07-07 20:21:04 +00:00
if resp.Success {
p.sendSnapshotRecoveryRequest()
2013-07-07 20:21:04 +00:00
} else {
2013-08-15 23:35:01 +00:00
debugln("peer.snap.failed: ", p.Name)
2013-07-17 00:40:19 +00:00
return
2013-07-07 20:21:04 +00:00
}
2013-07-07 20:55:55 +00:00
}
// Sends an Snapshot Recovery request to the peer through the transport.
func (p *Peer) sendSnapshotRecoveryRequest() {
req := newSnapshotRecoveryRequest(p.server.name, p.server.snapshot)
2013-08-15 23:35:01 +00:00
debugln("peer.snap.recovery.send: ", p.Name)
resp := p.server.Transporter().SendSnapshotRecoveryRequest(p.server, p, req)
2013-09-17 23:18:50 +00:00
if resp == nil {
debugln("peer.snap.recovery.timeout: ", p.Name)
return
}
p.setLastActivity(time.Now())
if resp.Success {
p.prevLogIndex = req.LastIndex
} else {
2013-08-15 23:35:01 +00:00
debugln("peer.snap.recovery.failed: ", p.Name)
return
}
2014-01-12 07:40:55 +00:00
p.server.sendAsync(resp)
2013-05-05 19:36:23 +00:00
}
//--------------------------------------
2013-07-07 20:21:04 +00:00
// Vote Requests
//--------------------------------------
2013-07-07 20:21:04 +00:00
// send VoteRequest Request
func (p *Peer) sendVoteRequest(req *RequestVoteRequest, c chan *RequestVoteResponse) {
2013-08-15 23:35:01 +00:00
debugln("peer.vote: ", p.server.Name(), "->", p.Name)
2013-07-07 20:21:04 +00:00
req.peer = p
2013-07-07 22:12:24 +00:00
if resp := p.server.Transporter().SendVoteRequest(p.server, p, req); resp != nil {
debugln("peer.vote.recv: ", p.server.Name(), "<-", p.Name)
p.setLastActivity(time.Now())
2013-07-07 20:21:04 +00:00
resp.peer = p
c <- resp
} else {
debugln("peer.vote.failed: ", p.server.Name(), "<-", p.Name)
}
}