influxdb/server.go

906 lines
23 KiB
Go
Raw Normal View History

2013-04-14 21:37:33 +00:00
package raft
2013-04-17 02:28:08 +00:00
import (
"errors"
"fmt"
"sync"
2013-04-28 04:51:17 +00:00
"time"
2013-06-03 21:58:12 +00:00
"os"
"sort"
"bufio"
"path"
2013-04-17 02:28:08 +00:00
)
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Constants
//
//------------------------------------------------------------------------------
const (
2013-04-17 02:32:49 +00:00
Stopped = "stopped"
Follower = "follower"
Candidate = "candidate"
Leader = "leader"
)
const (
2013-04-28 04:51:17 +00:00
DefaultHeartbeatTimeout = 50 * time.Millisecond
2013-04-28 21:23:21 +00:00
DefaultElectionTimeout = 150 * time.Millisecond
2013-04-14 21:37:33 +00:00
)
//------------------------------------------------------------------------------
//
// Errors
//
//------------------------------------------------------------------------------
var NotLeaderError = errors.New("raft.Server: Not current leader")
var DuplicatePeerError = errors.New("raft.Server: Duplicate peer")
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Typedefs
//
//------------------------------------------------------------------------------
// A server is involved in the consensus protocol and can act as a follower,
// candidate or a leader.
type Server struct {
2013-04-28 21:23:21 +00:00
name string
path string
state string
2013-06-03 02:43:40 +00:00
transporter Transporter
context interface{}
2013-04-28 21:23:21 +00:00
currentTerm uint64
votedFor string
log *Log
2013-06-03 21:58:12 +00:00
leader string
2013-04-28 21:23:21 +00:00
peers map[string]*Peer
mutex sync.Mutex
2013-05-05 20:01:06 +00:00
electionTimer *Timer
2013-05-05 20:26:04 +00:00
heartbeatTimeout time.Duration
2013-06-03 21:58:12 +00:00
currentSnapshot *Snapshot
lastSnapshot *Snapshot
2013-06-05 05:56:59 +00:00
machineState int //TODO CHANGE THIS TO INTERFACE: recovery and store
2013-04-14 21:37:33 +00:00
}
//------------------------------------------------------------------------------
//
// Constructor
//
//------------------------------------------------------------------------------
2013-04-17 02:28:08 +00:00
// Creates a new server with a log at the given path.
2013-06-03 02:43:40 +00:00
func NewServer(name string, path string, transporter Transporter, context interface{}) (*Server, error) {
2013-04-17 02:28:08 +00:00
if name == "" {
return nil, errors.New("raft.Server: Name cannot be blank")
}
2013-05-28 19:57:38 +00:00
if transporter == nil {
panic("raft.Server: Transporter required")
}
2013-04-17 02:28:08 +00:00
s := &Server{
2013-05-10 14:47:24 +00:00
name: name,
path: path,
2013-05-28 19:57:38 +00:00
transporter: transporter,
2013-06-03 02:43:40 +00:00
context: context,
2013-05-10 14:47:24 +00:00
state: Stopped,
peers: make(map[string]*Peer),
log: NewLog(),
electionTimer: NewTimer(DefaultElectionTimeout, DefaultElectionTimeout*2),
2013-05-05 20:26:04 +00:00
heartbeatTimeout: DefaultHeartbeatTimeout,
2013-04-14 21:37:33 +00:00
}
2013-04-30 04:13:50 +00:00
// Setup apply function.
s.log.ApplyFunc = func(c Command) error {
err := c.Apply(s)
return err
2013-04-30 04:13:50 +00:00
}
2013-04-17 02:28:08 +00:00
return s, nil
}
//------------------------------------------------------------------------------
//
// Accessors
//
//------------------------------------------------------------------------------
2013-05-01 05:21:56 +00:00
//--------------------------------------
// General
//--------------------------------------
2013-04-17 02:28:08 +00:00
// Retrieves the name of the server.
func (s *Server) Name() string {
return s.name
}
// Retrieves the storage path for the server.
func (s *Server) Path() string {
return s.path
}
2013-05-05 19:36:23 +00:00
2013-06-03 13:51:52 +00:00
func (s *Server) Leader() string {
2013-06-03 21:58:12 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
return s.leader
}
2013-05-28 19:57:38 +00:00
// Retrieves the object that transports requests.
func (s *Server) Transporter() Transporter {
return s.transporter
}
2013-06-03 02:43:40 +00:00
// Retrieves the context passed into the constructor.
func (s *Server) Context() interface{} {
return s.context
}
2013-04-17 02:28:08 +00:00
// Retrieves the log path for the server.
func (s *Server) LogPath() string {
return fmt.Sprintf("%s/log", s.path)
}
// Retrieves the current state of the server.
2013-04-17 02:32:49 +00:00
func (s *Server) State() string {
2013-05-08 03:56:32 +00:00
s.mutex.Lock()
2013-05-25 05:37:56 +00:00
defer s.mutex.Unlock()
2013-04-17 02:28:08 +00:00
return s.state
2013-04-14 21:37:33 +00:00
}
2013-05-03 04:16:39 +00:00
// Retrieves the name of the candidate this server voted for in this term.
func (s *Server) VotedFor() string {
s.mutex.Lock()
2013-05-25 05:37:56 +00:00
defer s.mutex.Unlock()
2013-05-03 04:16:39 +00:00
return s.votedFor
}
2013-05-08 03:56:32 +00:00
// Retrieves whether the server's log has no entries.
func (s *Server) IsLogEmpty() bool {
2013-06-03 19:13:38 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
2013-05-08 03:56:32 +00:00
return s.log.IsEmpty()
}
2013-05-08 20:22:08 +00:00
// A list of all the log entries. This should only be used for debugging purposes.
func (s *Server) LogEntries() []*LogEntry {
2013-06-03 19:13:38 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
2013-05-08 20:22:08 +00:00
if s.log != nil {
return s.log.entries
}
return nil
}
2013-06-03 19:13:38 +00:00
// A reference to the command name of the last entry.
func (s *Server) LastCommandName() string {
s.mutex.Lock()
defer s.mutex.Unlock()
if s.log != nil {
return s.log.LastCommandName()
}
return ""
}
2013-05-01 05:21:56 +00:00
//--------------------------------------
// Membership
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Retrieves the number of member servers in the consensus.
2013-05-01 05:11:23 +00:00
func (s *Server) MemberCount() int {
count := 1
2013-04-28 21:23:21 +00:00
for _, _ = range s.peers {
2013-04-28 04:51:17 +00:00
count++
}
return count
}
// Retrieves the number of servers required to make a quorum.
2013-05-01 05:11:23 +00:00
func (s *Server) QuorumSize() int {
2013-04-28 04:51:17 +00:00
return (s.MemberCount() / 2) + 1
}
2013-05-01 05:21:56 +00:00
//--------------------------------------
// Election timeout
//--------------------------------------
// Retrieves the election timeout.
func (s *Server) ElectionTimeout() time.Duration {
2013-05-05 20:01:06 +00:00
return s.electionTimer.MinDuration()
2013-05-01 05:21:56 +00:00
}
// Sets the election timeout.
func (s *Server) SetElectionTimeout(duration time.Duration) {
2013-05-05 20:01:06 +00:00
s.electionTimer.SetMinDuration(duration)
s.electionTimer.SetMaxDuration(duration * 2)
2013-05-01 05:21:56 +00:00
}
//--------------------------------------
// Heartbeat timeout
//--------------------------------------
// Retrieves the heartbeat timeout.
func (s *Server) HeartbeatTimeout() time.Duration {
return s.heartbeatTimeout
}
// Sets the heartbeat timeout.
func (s *Server) SetHeartbeatTimeout(duration time.Duration) {
s.mutex.Lock()
defer s.mutex.Unlock()
s.heartbeatTimeout = duration
for _, peer := range s.peers {
peer.SetHeartbeatTimeout(duration)
}
}
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Methods
//
//------------------------------------------------------------------------------
2013-04-17 02:28:08 +00:00
//--------------------------------------
// State
//--------------------------------------
// Starts the server with a log at the given path.
func (s *Server) Start() error {
s.mutex.Lock()
defer s.mutex.Unlock()
// Exit if the server is already running.
if s.Running() {
return errors.New("raft.Server: Server already running")
}
2013-04-28 21:23:21 +00:00
2013-06-05 00:02:45 +00:00
// create snapshot dir if not exist
os.Mkdir(s.path + "/snapshot", 0700)
2013-06-03 21:58:12 +00:00
2013-06-05 05:56:59 +00:00
// ## open recovery from the newest snapShot
//s.LoadSnapshot()
2013-06-03 21:58:12 +00:00
2013-04-17 02:28:08 +00:00
// Initialize the log and load it up.
if err := s.log.Open(s.LogPath()); err != nil {
s.unload()
2013-04-28 21:23:21 +00:00
return fmt.Errorf("raft.Server: %v", err)
2013-04-17 02:28:08 +00:00
}
2013-05-10 03:50:57 +00:00
// Update the term to the last term in the log.
s.currentTerm = s.log.CurrentTerm()
2013-04-17 02:32:49 +00:00
// Update the state.
s.state = Follower
for _, peer := range s.peers {
peer.pause()
}
// Start the election timeout.
2013-06-04 13:38:02 +00:00
c := make(chan bool)
go s.electionTimeoutFunc(c)
<- c
2013-04-17 02:28:08 +00:00
return nil
}
// Shuts down the server.
func (s *Server) Stop() {
s.mutex.Lock()
defer s.mutex.Unlock()
s.unload()
}
// Unloads the server.
func (s *Server) unload() {
2013-05-27 00:02:31 +00:00
// Kill the election timer.
2013-06-03 23:16:50 +00:00
if s.electionTimer != nil {
s.electionTimer.Stop()
s.electionTimer = nil
}
2013-05-10 14:47:24 +00:00
2013-05-27 00:02:31 +00:00
// Remove peers.
for _, peer := range s.peers {
peer.stop()
}
s.peers = make(map[string]*Peer)
// Close the log.
2013-04-17 02:28:08 +00:00
if s.log != nil {
s.log.Close()
s.log = nil
}
2013-04-17 02:32:49 +00:00
s.state = Stopped
2013-04-17 02:28:08 +00:00
}
// Checks if the server is currently running.
func (s *Server) Running() bool {
return s.state != Stopped
}
//--------------------------------------
// Initialization
//--------------------------------------
// Initializes the server to become leader of a new cluster. This function
// will fail if there is an existing log or the server is already a member in
// an existing cluster.
func (s *Server) Initialize() error {
s.mutex.Lock()
defer s.mutex.Unlock()
// Exit if the server is not running.
if !s.Running() {
2013-06-03 02:43:40 +00:00
return errors.New("raft.Server: Cannot initialize while stopped")
} else if s.MemberCount() > 1 {
2013-06-03 02:43:40 +00:00
return errors.New("raft.Server: Cannot initialize; already in membership")
}
// Promote to leader.
s.currentTerm++
s.state = Leader
2013-06-03 21:58:12 +00:00
s.leader = s.name
s.electionTimer.Pause()
return nil
}
2013-04-17 02:28:08 +00:00
//--------------------------------------
// Commands
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Attempts to execute a command and replicate it. The function will return
// when the command has been successfully committed or an error has occurred.
func (s *Server) Do(command Command) error {
s.mutex.Lock()
defer s.mutex.Unlock()
err := s.do(command)
return err
2013-04-28 04:51:17 +00:00
}
// This function is the low-level interface to execute commands. This function
// does not obtain a lock so one must be obtained before executing.
func (s *Server) do(command Command) error {
if s.state != Leader {
return NotLeaderError
}
2013-05-27 02:06:08 +00:00
2013-05-05 19:36:23 +00:00
// Capture the term that this command is executing within.
currentTerm := s.currentTerm
2013-06-06 03:25:17 +00:00
// // TEMP to solve the issue 18
// for _, peer := range s.peers {
// peer.pause()
// }
2013-06-05 00:02:45 +00:00
2013-05-05 19:36:23 +00:00
// Add a new entry to the log.
entry := s.log.CreateEntry(s.currentTerm, command)
if err := s.log.AppendEntry(entry); err != nil {
return err
}
2013-05-05 20:26:04 +00:00
2013-05-05 19:36:23 +00:00
// Flush the entries to the peers.
c := make(chan bool, len(s.peers))
for _, _peer := range s.peers {
peer := _peer
go func() {
2013-06-05 05:56:59 +00:00
2013-06-06 03:25:17 +00:00
term, success, err := peer.flush(true)
2013-06-05 05:56:59 +00:00
2013-05-05 19:36:23 +00:00
// Demote if we encounter a higher term.
if err != nil {
2013-06-05 05:56:59 +00:00
2013-05-05 19:36:23 +00:00
return
} else if term > currentTerm {
2013-06-03 23:16:50 +00:00
s.mutex.Lock()
2013-05-05 19:36:23 +00:00
s.setCurrentTerm(term)
2013-06-05 00:22:09 +00:00
2013-06-03 23:16:50 +00:00
if s.electionTimer != nil {
s.electionTimer.Reset()
}
s.mutex.Unlock()
2013-06-05 00:22:09 +00:00
2013-05-05 19:36:23 +00:00
return
}
2013-05-05 20:26:04 +00:00
2013-05-05 19:36:23 +00:00
// If we successfully replicated the log then send a success to the channel.
if success {
c <- true
}
}()
}
// Wait for a quorum to confirm and commit entry.
responseCount := 1
committed := false
loop:
for {
// If we received enough votes then stop waiting for more votes.
if responseCount >= s.QuorumSize() {
committed = true
2013-06-06 03:25:17 +00:00
// for _, peer := range s.peers {
// peer.resume()
// }
2013-05-05 19:36:23 +00:00
break
}
// Collect votes from peers.
select {
case <-c:
// Exit if our term has changed.
if s.currentTerm > currentTerm {
return fmt.Errorf("raft.Server: Higher term discovered, stepping down: (%v > %v)", s.currentTerm, currentTerm)
}
responseCount++
2013-05-27 00:02:31 +00:00
case <-afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2):
2013-06-06 03:25:17 +00:00
// for _, peer := range s.peers {
// peer.resume()
// }
2013-05-05 19:36:23 +00:00
break loop
}
}
2013-05-05 20:26:04 +00:00
2013-05-05 19:36:23 +00:00
// Commit to log and flush to peers again.
if committed {
return s.log.SetCommitIndex(entry.Index)
2013-05-05 19:36:23 +00:00
}
2013-04-28 04:51:17 +00:00
return nil
}
// Appends a log entry from the leader to this server.
2013-04-30 04:13:50 +00:00
func (s *Server) AppendEntries(req *AppendEntriesRequest) (*AppendEntriesResponse, error) {
2013-04-28 04:51:17 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
// If the server is stopped then reject it.
if !s.Running() {
2013-05-10 14:47:24 +00:00
return NewAppendEntriesResponse(s.currentTerm, false, 0), fmt.Errorf("raft.Server: Server stopped")
}
2013-04-30 04:13:50 +00:00
// If the request is coming from an old term then reject it.
if req.Term < s.currentTerm {
2013-05-10 14:47:24 +00:00
return NewAppendEntriesResponse(s.currentTerm, false, s.log.CommitIndex()), fmt.Errorf("raft.Server: Stale request term")
2013-04-30 04:13:50 +00:00
}
s.setCurrentTerm(req.Term)
2013-06-03 13:51:52 +00:00
// Update the current leader.
2013-06-03 21:58:12 +00:00
s.leader = req.LeaderName
2013-04-30 04:13:50 +00:00
// Reset election timeout.
2013-06-03 23:16:50 +00:00
if s.electionTimer != nil {
s.electionTimer.Reset()
}
2013-06-05 05:56:59 +00:00
2013-04-30 04:13:50 +00:00
// Reject if log doesn't contain a matching previous entry.
if err := s.log.Truncate(req.PrevLogIndex, req.PrevLogTerm); err != nil {
2013-05-10 14:47:24 +00:00
return NewAppendEntriesResponse(s.currentTerm, false, s.log.CommitIndex()), err
2013-04-30 04:13:50 +00:00
}
2013-06-05 05:56:59 +00:00
2013-04-30 04:13:50 +00:00
// Append entries to the log.
if err := s.log.AppendEntries(req.Entries); err != nil {
2013-05-10 14:47:24 +00:00
return NewAppendEntriesResponse(s.currentTerm, false, s.log.CommitIndex()), err
2013-04-30 04:13:50 +00:00
}
2013-06-05 05:56:59 +00:00
2013-04-30 04:13:50 +00:00
// Commit up to the commit index.
if err := s.log.SetCommitIndex(req.CommitIndex); err != nil {
2013-05-10 14:47:24 +00:00
return NewAppendEntriesResponse(s.currentTerm, false, s.log.CommitIndex()), err
2013-06-05 00:02:45 +00:00
}
2013-06-05 05:56:59 +00:00
2013-05-10 14:47:24 +00:00
return NewAppendEntriesResponse(s.currentTerm, true, s.log.CommitIndex()), nil
2013-04-28 04:51:17 +00:00
}
2013-05-05 19:36:23 +00:00
// Creates an AppendEntries request.
2013-05-28 19:57:38 +00:00
func (s *Server) createAppendEntriesRequest(prevLogIndex uint64) *AppendEntriesRequest {
2013-05-05 19:36:23 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
return s.createInternalAppendEntriesRequest(prevLogIndex)
}
// Creates an AppendEntries request without a lock.
2013-05-28 19:57:38 +00:00
func (s *Server) createInternalAppendEntriesRequest(prevLogIndex uint64) *AppendEntriesRequest {
2013-05-05 19:36:23 +00:00
if s.log == nil {
2013-05-28 19:57:38 +00:00
return nil
2013-05-05 19:36:23 +00:00
}
entries, prevLogTerm := s.log.GetEntriesAfter(prevLogIndex)
req := NewAppendEntriesRequest(s.currentTerm, s.name, prevLogIndex, prevLogTerm, entries, s.log.CommitIndex())
2013-05-28 19:57:38 +00:00
return req
2013-05-05 19:36:23 +00:00
}
2013-04-28 21:23:21 +00:00
//--------------------------------------
2013-05-01 05:11:23 +00:00
// Promotion
//--------------------------------------
// Promotes the server to a candidate and then requests votes from peers. If
// enough votes are received then the server becomes the leader. If this
// server is elected then true is returned. If another server is elected then
// false is returned.
2013-05-01 05:21:56 +00:00
func (s *Server) promote() (bool, error) {
2013-06-06 03:25:17 +00:00
2013-05-01 05:11:23 +00:00
for {
// Start a new election.
2013-06-03 23:16:50 +00:00
term, lastLogIndex, lastLogTerm, err := s.promoteToCandidate()
if err != nil {
return false, err
}
2013-05-01 05:11:23 +00:00
// Request votes from each of our peers.
c := make(chan *RequestVoteResponse, len(s.peers))
for _, _peer := range s.peers {
peer := _peer
go func() {
req := NewRequestVoteRequest(term, s.name, lastLogIndex, lastLogTerm)
req.peer = peer
2013-05-28 19:57:38 +00:00
if resp, _ := s.transporter.SendVoteRequest(s, peer, req); resp != nil {
2013-05-10 03:50:57 +00:00
resp.peer = peer
c <- resp
}
2013-05-01 05:11:23 +00:00
}()
}
// Collect votes until we have a quorum.
votes := map[string]bool{}
elected := false
2013-05-01 05:21:56 +00:00
loop:
2013-05-01 05:11:23 +00:00
for {
// Add up all our votes.
votesGranted := 1
for _, value := range votes {
if value {
votesGranted++
}
}
// If we received enough votes then stop waiting for more votes.
if votesGranted >= s.QuorumSize() {
elected = true
break
}
// Collect votes from peers.
select {
case resp := <-c:
2013-05-01 05:21:56 +00:00
if resp != nil {
// Step down if we discover a higher term.
if resp.Term > term {
2013-06-03 23:16:50 +00:00
s.mutex.Lock()
2013-05-01 05:21:56 +00:00
s.setCurrentTerm(term)
2013-06-03 23:16:50 +00:00
if s.electionTimer != nil {
s.electionTimer.Reset()
}
s.mutex.Unlock()
2013-05-01 05:21:56 +00:00
return false, fmt.Errorf("raft.Server: Higher term discovered, stepping down: (%v > %v)", resp.Term, term)
}
2013-05-01 05:11:23 +00:00
votes[resp.peer.Name()] = resp.VoteGranted
}
2013-05-27 00:02:31 +00:00
case <-afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2):
2013-05-01 05:21:56 +00:00
break loop
2013-05-01 05:11:23 +00:00
}
}
// If we received enough votes then promote to leader and stop this election.
if elected && s.promoteToLeader(term, lastLogIndex, lastLogTerm) {
break
}
// If we are no longer in the same term then another server must have been elected.
2013-05-08 03:56:32 +00:00
s.mutex.Lock()
2013-05-01 05:11:23 +00:00
if s.currentTerm != term {
2013-05-08 03:56:32 +00:00
s.mutex.Unlock()
2013-05-01 05:21:56 +00:00
return false, fmt.Errorf("raft.Server: Term changed during election, stepping down: (%v > %v)", s.currentTerm, term)
2013-05-01 05:11:23 +00:00
}
2013-05-08 03:56:32 +00:00
s.mutex.Unlock()
2013-05-01 05:11:23 +00:00
}
2013-05-01 05:21:56 +00:00
return true, nil
2013-05-01 05:11:23 +00:00
}
// Promotes the server to a candidate and increases the election term. The
// term and log state are returned for use in the RPCs.
2013-06-03 23:16:50 +00:00
func (s *Server) promoteToCandidate() (uint64, uint64, uint64, error) {
2013-05-01 05:11:23 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
2013-06-03 23:16:50 +00:00
// Ignore promotion if the server is not a follower.
if s.state != Follower && s.state != Candidate {
return 0, 0, 0, fmt.Errorf("raft: Invalid promotion state: %s", s.state)
}
2013-05-01 05:21:56 +00:00
// Move server to become a candidate, increase our term & vote for ourself.
2013-05-01 05:11:23 +00:00
s.state = Candidate
s.currentTerm++
2013-05-01 05:21:56 +00:00
s.votedFor = s.name
2013-06-03 21:58:12 +00:00
s.leader = ""
2013-05-01 05:11:23 +00:00
// Pause the election timer while we're a candidate.
s.electionTimer.Pause()
// Return server state so we can check for it during leader promotion.
2013-06-03 23:16:50 +00:00
lastLogIndex, lastLogTerm := s.log.CommitInfo()
return s.currentTerm, lastLogIndex, lastLogTerm, nil
2013-05-01 05:11:23 +00:00
}
// Promotes the server from a candidate to a leader. This can only occur if
// the server is in the state that it assumed when the candidate election
// began. This is because another server may have won the election and caused
// the state to change.
func (s *Server) promoteToLeader(term uint64, lastLogIndex uint64, lastLogTerm uint64) bool {
s.mutex.Lock()
defer s.mutex.Unlock()
// Ignore promotion if we are not a candidate.
if s.state != Candidate {
return false
}
// Disallow promotion if the term or log does not match what we currently have.
logIndex, logTerm := s.log.CommitInfo()
if s.currentTerm != term || logIndex != lastLogIndex || logTerm != lastLogTerm {
return false
}
// Move server to become a leader and begin peer heartbeats.
2013-05-01 05:11:23 +00:00
s.state = Leader
2013-06-03 21:58:12 +00:00
s.leader = s.name
for _, peer := range s.peers {
peer.resume()
}
2013-05-01 05:21:56 +00:00
2013-05-01 05:11:23 +00:00
return true
}
//--------------------------------------
// Request Vote
2013-04-28 21:23:21 +00:00
//--------------------------------------
// Requests a vote from a server. A vote can be obtained if the vote's term is
// at the server's current term and the server has not made a vote yet. A vote
// can also be obtained if the term is greater than the server's current term.
func (s *Server) RequestVote(req *RequestVoteRequest) (*RequestVoteResponse, error) {
2013-04-28 21:23:21 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
2013-05-08 03:56:32 +00:00
// Fail if the server is not running.
if !s.Running() {
return NewRequestVoteResponse(s.currentTerm, false), fmt.Errorf("raft.Server: Server is stopped")
}
2013-04-28 21:23:21 +00:00
// If the request is coming from an old term then reject it.
if req.Term < s.currentTerm {
return NewRequestVoteResponse(s.currentTerm, false), fmt.Errorf("raft.Server: Stale term: %v < %v", req.Term, s.currentTerm)
2013-04-28 21:23:21 +00:00
}
2013-04-30 04:13:50 +00:00
s.setCurrentTerm(req.Term)
2013-04-28 21:23:21 +00:00
// If we've already voted for a different candidate then don't vote for this candidate.
if s.votedFor != "" && s.votedFor != req.CandidateName {
return NewRequestVoteResponse(s.currentTerm, false), fmt.Errorf("raft.Server: Already voted for %v", s.votedFor)
2013-04-28 21:23:21 +00:00
}
// If the candidate's log is not at least as up-to-date as our committed log then don't vote.
2013-04-28 22:49:52 +00:00
lastCommitIndex, lastCommitTerm := s.log.CommitInfo()
if lastCommitIndex > req.LastLogIndex || lastCommitTerm > req.LastLogTerm {
return NewRequestVoteResponse(s.currentTerm, false), fmt.Errorf("raft.Server: Out-of-date log: [%v/%v] > [%v/%v]", lastCommitIndex, lastCommitTerm, req.LastLogIndex, req.LastLogTerm)
2013-04-28 22:49:52 +00:00
}
2013-04-28 21:23:21 +00:00
2013-04-28 22:49:52 +00:00
// If we made it this far then cast a vote and reset our election time out.
s.votedFor = req.CandidateName
2013-06-03 23:16:50 +00:00
if s.electionTimer != nil {
s.electionTimer.Reset()
}
return NewRequestVoteResponse(s.currentTerm, true), nil
2013-04-28 21:23:21 +00:00
}
2013-04-30 04:13:50 +00:00
// Updates the current term on the server if the term is greater than the
// server's current term. When the term is changed then the server's vote is
// cleared and its state is changed to be a follower.
func (s *Server) setCurrentTerm(term uint64) {
if term > s.currentTerm {
s.currentTerm = term
s.votedFor = ""
s.state = Follower
for _, peer := range s.peers {
peer.pause()
}
}
}
// Listens to the election timeout and kicks off a new election.
2013-06-04 13:38:02 +00:00
func (s *Server) electionTimeoutFunc(startChannel chan bool) {
startChannel <- true
for {
// Grab the current timer channel.
s.mutex.Lock()
var c chan time.Time
if s.electionTimer != nil {
c = s.electionTimer.C()
}
s.mutex.Unlock()
// If the channel or timer are gone then exit.
if c == nil {
break
}
// If an election times out then promote this server. If the channel
// closes then that means the server has stopped so kill the function.
2013-05-10 14:47:24 +00:00
if _, ok := <-c; ok {
s.promote()
} else {
break
}
2013-04-30 04:13:50 +00:00
}
}
2013-04-17 02:32:49 +00:00
//--------------------------------------
// Membership
//--------------------------------------
// Adds a peer to the server. This should be called by a system's join command
// within the context so that it is within the context of the server lock.
func (s *Server) AddPeer(name string) error {
// Do not allow peers to be added twice.
if s.peers[name] != nil {
return DuplicatePeerError
2013-04-17 02:32:49 +00:00
}
2013-04-28 21:23:21 +00:00
// Only add the peer if it doesn't have the same name.
if s.name != name {
peer := NewPeer(s, name, s.heartbeatTimeout)
2013-06-04 13:35:43 +00:00
if s.state == Leader {
peer.resume()
}
s.peers[peer.name] = peer
2013-04-28 21:23:21 +00:00
2013-06-05 00:02:45 +00:00
}
return nil
2013-04-17 02:32:49 +00:00
}
2013-06-03 21:58:12 +00:00
//--------------------------------------
// Log compaction
//--------------------------------------
// Creates a snapshot request.
func (s *Server) createSnapshotRequest() *SnapshotRequest {
s.mutex.Lock()
defer s.mutex.Unlock()
return NewSnapshotRequest(s.name, s.lastSnapshot)
}
2013-06-05 05:56:59 +00:00
// The background snapshot function
2013-06-05 00:02:45 +00:00
func (s *Server) Snapshot() {
for {
s.takeSnapshot()
2013-06-03 21:58:12 +00:00
2013-06-05 05:56:59 +00:00
// TODO: change this... to something reasonable
time.Sleep(5000 * time.Millisecond)
2013-06-05 00:02:45 +00:00
}
}
2013-06-03 21:58:12 +00:00
2013-06-05 00:02:45 +00:00
func (s *Server) takeSnapshot() error {
2013-06-03 21:58:12 +00:00
//TODO put a snapshot mutex
if s.currentSnapshot != nil {
return errors.New("handling snapshot")
}
2013-06-05 05:56:59 +00:00
2013-06-03 21:58:12 +00:00
lastIndex, lastTerm := s.log.CommitInfo()
if lastIndex == 0 || lastTerm == 0 {
return errors.New("No logs")
}
path := s.SnapshotPath(lastIndex, lastTerm)
2013-06-05 05:56:59 +00:00
s.currentSnapshot = &Snapshot{lastIndex, lastTerm, s.machineState , path}
2013-06-03 21:58:12 +00:00
2013-06-05 00:02:45 +00:00
s.saveSnapshot()
2013-06-05 05:56:59 +00:00
s.log.Compact(lastIndex, lastTerm)
2013-06-05 05:56:59 +00:00
2013-06-03 21:58:12 +00:00
return nil
}
// Retrieves the log path for the server.
2013-06-05 00:02:45 +00:00
func (s *Server) saveSnapshot() error {
2013-06-05 05:56:59 +00:00
2013-06-03 21:58:12 +00:00
if s.currentSnapshot == nil {
return errors.New("no snapshot to save")
}
err := s.currentSnapshot.Save()
if err != nil {
return err
}
tmp := s.lastSnapshot
s.lastSnapshot = s.currentSnapshot
2013-06-05 05:56:59 +00:00
// delete the previous snapshot if there is any change
2013-06-05 00:02:45 +00:00
if tmp != nil && !(tmp.lastIndex == s.lastSnapshot.lastIndex && tmp.lastTerm == s.lastSnapshot.lastTerm) {
tmp.Remove()
}
2013-06-03 21:58:12 +00:00
s.currentSnapshot = nil
return nil
}
2013-06-05 00:02:45 +00:00
// Retrieves the log path for the server.
func (s *Server) SnapshotPath(lastIndex uint64, lastTerm uint64) string {
return path.Join(s.path, "snapshot", fmt.Sprintf("%v_%v.ss", lastTerm, lastIndex))
2013-06-05 00:02:45 +00:00
}
2013-06-06 03:25:17 +00:00
func (s *Server) SnapshotRecovery(req *SnapshotRequest) (*SnapshotResponse, error){
2013-06-03 21:58:12 +00:00
//
s.mutex.Lock()
defer s.mutex.Unlock()
2013-06-05 05:56:59 +00:00
2013-06-03 21:58:12 +00:00
//recovery machine state
2013-06-06 03:25:17 +00:00
s.machineState = req.MachineState
2013-06-03 21:58:12 +00:00
//update term and index
2013-06-06 03:25:17 +00:00
s.currentTerm = req.LastTerm
s.log.UpdateCommitIndex(req.LastIndex)
snapshotPath := s.SnapshotPath(req.LastIndex, req.LastTerm)
s.currentSnapshot = &Snapshot{req.LastIndex, req.LastTerm, req.MachineState, snapshotPath}
2013-06-05 00:02:45 +00:00
s.saveSnapshot()
2013-06-06 03:25:17 +00:00
s.log.Compact(req.LastIndex, req.LastTerm)
2013-06-05 00:02:45 +00:00
2013-06-06 03:25:17 +00:00
return NewSnapshotResponse(req.LastTerm, true, req.LastIndex), nil
2013-06-03 21:58:12 +00:00
}
2013-06-05 05:56:59 +00:00
// Load a snapshot at restart
2013-06-03 21:58:12 +00:00
func (s *Server) LoadSnapshot() error {
dir, err := os.OpenFile(path.Join(s.path, "snapshot"), os.O_RDONLY, 0)
2013-06-03 21:58:12 +00:00
if err != nil {
dir.Close()
panic(err)
}
filenames, err := dir.Readdirnames(-1)
if err != nil {
dir.Close()
panic(err)
}
dir.Close()
if len(filenames) == 0 {
return errors.New("no snapshot")
}
// not sure how many snapshot we should keep
sort.Strings(filenames)
snapshotPath := path.Join(s.path, "snapshot", filenames[len(filenames) - 1])
2013-06-03 21:58:12 +00:00
// should not file
file, err := os.OpenFile(snapshotPath, os.O_RDONLY, 0)
defer file.Close()
if err != nil {
panic(err)
}
// TODO check checksum first
// TODO recovery state machine
2013-06-05 05:56:59 +00:00
var machineState int
2013-06-03 21:58:12 +00:00
var checksum, lastIndex, lastTerm uint64
reader := bufio.NewReader(file)
2013-06-05 05:56:59 +00:00
n , err := fmt.Fscanf(reader, "%08x\n%v\n%v\n%v", &checksum, &machineState,
2013-06-03 21:58:12 +00:00
&lastIndex, &lastTerm)
if err != nil {
panic(err)
}
if n != 4 {
panic(n)
}
2013-06-05 00:02:45 +00:00
s.lastSnapshot = &Snapshot{lastIndex, lastTerm, 1, snapshotPath}
2013-06-05 05:56:59 +00:00
s.machineState = machineState
2013-06-03 21:58:12 +00:00
s.log.SetStartTerm(lastTerm)
s.log.SetStartIndex(lastIndex)
2013-06-05 00:02:45 +00:00
s.log.UpdateCommitIndex(lastIndex)
2013-06-03 21:58:12 +00:00
return err
}