influxdb/server.go

990 lines
24 KiB
Go
Raw Normal View History

2013-04-14 21:37:33 +00:00
package raft
2013-04-17 02:28:08 +00:00
import (
2013-06-24 16:52:51 +00:00
"encoding/json"
2013-04-17 02:28:08 +00:00
"errors"
"fmt"
2013-07-01 00:55:54 +00:00
"hash/crc32"
2013-06-08 02:19:18 +00:00
"io/ioutil"
2013-06-03 21:58:12 +00:00
"os"
"path"
2013-06-08 02:19:18 +00:00
"sort"
"sync"
"time"
2013-04-17 02:28:08 +00:00
)
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Constants
//
//------------------------------------------------------------------------------
const (
2013-04-17 02:32:49 +00:00
Stopped = "stopped"
Follower = "follower"
Candidate = "candidate"
Leader = "leader"
)
const (
2013-04-28 04:51:17 +00:00
DefaultHeartbeatTimeout = 50 * time.Millisecond
2013-04-28 21:23:21 +00:00
DefaultElectionTimeout = 150 * time.Millisecond
2013-04-14 21:37:33 +00:00
)
2013-07-07 20:21:04 +00:00
var stopValue interface{}
//------------------------------------------------------------------------------
//
// Errors
//
//------------------------------------------------------------------------------
var NotLeaderError = errors.New("raft.Server: Not current leader")
var DuplicatePeerError = errors.New("raft.Server: Duplicate peer")
2013-07-07 20:21:04 +00:00
var CommandTimeoutError = errors.New("raft: Command timeout")
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Typedefs
//
//------------------------------------------------------------------------------
// A server is involved in the consensus protocol and can act as a follower,
// candidate or a leader.
type Server struct {
2013-06-24 16:52:51 +00:00
name string
path string
state string
transporter Transporter
context interface{}
currentTerm uint64
2013-07-07 20:55:55 +00:00
votedFor string
log *Log
leader string
peers map[string]*Peer
mutex sync.Mutex
stateMutex sync.Mutex
2013-07-07 20:21:04 +00:00
commitCount int
2013-07-06 04:49:47 +00:00
electionTimer *timer
2013-06-08 02:19:18 +00:00
heartbeatTimeout time.Duration
2013-07-07 20:21:04 +00:00
c chan *event
2013-06-24 16:52:51 +00:00
stepDown chan uint64
2013-07-06 19:41:42 +00:00
stopChan chan bool
2013-06-24 16:52:51 +00:00
currentSnapshot *Snapshot
lastSnapshot *Snapshot
stateMachine StateMachine
2013-04-14 21:37:33 +00:00
}
2013-07-07 20:21:04 +00:00
// An event to be processed by the server's event loop.
type event struct {
target interface{}
returnValue interface{}
c chan error
}
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Constructor
//
//------------------------------------------------------------------------------
2013-04-17 02:28:08 +00:00
// Creates a new server with a log at the given path.
2013-06-12 16:47:48 +00:00
func NewServer(name string, path string, transporter Transporter, stateMachine StateMachine, context interface{}) (*Server, error) {
2013-04-17 02:28:08 +00:00
if name == "" {
return nil, errors.New("raft.Server: Name cannot be blank")
}
2013-05-28 19:57:38 +00:00
if transporter == nil {
2013-06-08 02:41:36 +00:00
panic("raft: Transporter required")
2013-05-28 19:57:38 +00:00
}
2013-04-17 02:28:08 +00:00
s := &Server{
2013-05-10 14:47:24 +00:00
name: name,
path: path,
2013-05-28 19:57:38 +00:00
transporter: transporter,
2013-06-12 16:47:48 +00:00
stateMachine: stateMachine,
2013-06-03 02:43:40 +00:00
context: context,
2013-05-10 14:47:24 +00:00
state: Stopped,
peers: make(map[string]*Peer),
2013-07-06 04:49:47 +00:00
log: newLog(),
2013-07-07 20:21:04 +00:00
c: make(chan *event, 256),
stepDown: make(chan uint64, 1),
2013-07-06 19:41:42 +00:00
stopChan: make(chan bool),
2013-07-06 04:49:47 +00:00
electionTimer: newTimer(DefaultElectionTimeout, DefaultElectionTimeout*2),
2013-05-05 20:26:04 +00:00
heartbeatTimeout: DefaultHeartbeatTimeout,
2013-04-14 21:37:33 +00:00
}
2013-04-30 04:13:50 +00:00
// Setup apply function.
s.log.ApplyFunc = func(c Command) (interface{}, error) {
result, err := c.Apply(s)
return result, err
2013-04-30 04:13:50 +00:00
}
2013-04-17 02:28:08 +00:00
return s, nil
}
//------------------------------------------------------------------------------
//
// Accessors
//
//------------------------------------------------------------------------------
2013-05-01 05:21:56 +00:00
//--------------------------------------
// General
//--------------------------------------
2013-04-17 02:28:08 +00:00
// Retrieves the name of the server.
func (s *Server) Name() string {
return s.name
}
// Retrieves the storage path for the server.
func (s *Server) Path() string {
return s.path
}
2013-05-05 19:36:23 +00:00
2013-07-06 19:41:42 +00:00
// The name of the current leader.
2013-06-03 13:51:52 +00:00
func (s *Server) Leader() string {
2013-06-03 01:18:25 +00:00
return s.leader
2013-06-08 02:19:18 +00:00
}
2013-06-26 18:25:22 +00:00
// Retrieves a copy of the peer data.
func (s *Server) Peers() map[string]*Peer {
s.mutex.Lock()
defer s.mutex.Unlock()
peers := make(map[string]*Peer)
for name, peer := range s.peers {
peers[name] = peer.clone()
}
return peers
}
2013-05-28 19:57:38 +00:00
// Retrieves the object that transports requests.
func (s *Server) Transporter() Transporter {
return s.transporter
}
2013-06-11 22:30:13 +00:00
func (s *Server) SetTransporter(t Transporter) {
s.transporter = t
}
2013-06-03 02:43:40 +00:00
// Retrieves the context passed into the constructor.
func (s *Server) Context() interface{} {
return s.context
}
2013-04-17 02:28:08 +00:00
// Retrieves the log path for the server.
func (s *Server) LogPath() string {
return fmt.Sprintf("%s/log", s.path)
}
// Retrieves the current state of the server.
2013-04-17 02:32:49 +00:00
func (s *Server) State() string {
2013-04-17 02:28:08 +00:00
return s.state
2013-04-14 21:37:33 +00:00
}
// Retrieves the current term of the server.
func (s *Server) Term() uint64 {
return s.currentTerm
2013-04-14 21:37:33 +00:00
}
2013-07-06 19:41:42 +00:00
// Retrieves the current commit index of the server.
func (s *Server) CommitIndex() uint64 {
2013-07-06 04:49:47 +00:00
return s.log.commitIndex
}
2013-05-03 04:16:39 +00:00
// Retrieves the name of the candidate this server voted for in this term.
func (s *Server) VotedFor() string {
return s.votedFor
}
2013-05-08 03:56:32 +00:00
// Retrieves whether the server's log has no entries.
func (s *Server) IsLogEmpty() bool {
2013-07-06 04:49:47 +00:00
return s.log.isEmpty()
2013-05-08 03:56:32 +00:00
}
2013-05-08 20:22:08 +00:00
// A list of all the log entries. This should only be used for debugging purposes.
func (s *Server) LogEntries() []*LogEntry {
2013-07-06 19:41:42 +00:00
return s.log.entries
2013-05-08 20:22:08 +00:00
}
2013-06-03 19:13:38 +00:00
// A reference to the command name of the last entry.
2013-07-06 19:41:42 +00:00
func (s *Server) LastCommandName() string {
return s.log.lastCommandName()
2013-06-03 19:13:38 +00:00
}
// Get the state of the server for debugging
func (s *Server) GetState() string {
2013-07-06 19:41:42 +00:00
return fmt.Sprintf("Name: %s, State: %s, Term: %v, Index: %v ", s.name, s.state, s.currentTerm, s.CommitIndex())
}
2013-05-01 05:21:56 +00:00
//--------------------------------------
// Membership
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Retrieves the number of member servers in the consensus.
2013-05-01 05:11:23 +00:00
func (s *Server) MemberCount() int {
s.mutex.Lock()
defer s.mutex.Unlock()
return len(s.peers) + 1
2013-04-28 04:51:17 +00:00
}
// Retrieves the number of servers required to make a quorum.
2013-05-01 05:11:23 +00:00
func (s *Server) QuorumSize() int {
2013-04-28 04:51:17 +00:00
return (s.MemberCount() / 2) + 1
}
2013-05-01 05:21:56 +00:00
//--------------------------------------
// Election timeout
//--------------------------------------
// Retrieves the election timeout.
func (s *Server) ElectionTimeout() time.Duration {
2013-07-06 04:49:47 +00:00
return s.electionTimer.minDuration
2013-05-01 05:21:56 +00:00
}
// Sets the election timeout.
func (s *Server) SetElectionTimeout(duration time.Duration) {
2013-07-06 04:49:47 +00:00
s.electionTimer.minDuration = duration
s.electionTimer.maxDuration = duration * 2
2013-05-01 05:21:56 +00:00
}
//--------------------------------------
// Heartbeat timeout
//--------------------------------------
// Retrieves the heartbeat timeout.
func (s *Server) HeartbeatTimeout() time.Duration {
return s.heartbeatTimeout
}
// Sets the heartbeat timeout.
2013-07-06 19:41:42 +00:00
func (s *Server) SetHeartbeatTimeout(duration time.Duration) {
s.mutex.Lock()
defer s.mutex.Unlock()
s.heartbeatTimeout = duration
for _, peer := range s.peers {
2013-07-06 04:49:47 +00:00
peer.setHeartbeatTimeout(duration)
}
}
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Methods
//
//------------------------------------------------------------------------------
2013-04-17 02:28:08 +00:00
//--------------------------------------
2013-06-24 16:52:51 +00:00
// Initialization
2013-04-17 02:28:08 +00:00
//--------------------------------------
// Starts the server with a log at the given path.
func (s *Server) Initialize() error {
2013-04-17 02:28:08 +00:00
s.mutex.Lock()
defer s.mutex.Unlock()
// Exit if the server is already running.
if s.Running() {
return errors.New("raft.Server: Server already running")
}
2013-04-28 21:23:21 +00:00
2013-06-24 16:52:51 +00:00
// Create snapshot directory if not exist
2013-06-08 02:19:18 +00:00
os.Mkdir(s.path+"/snapshot", 0700)
2013-06-03 21:58:12 +00:00
2013-04-17 02:28:08 +00:00
// Initialize the log and load it up.
2013-07-06 04:49:47 +00:00
if err := s.log.open(s.LogPath()); err != nil {
2013-07-07 20:21:04 +00:00
s.debugln("raft: Log error: %s", err)
2013-07-06 19:41:42 +00:00
return fmt.Errorf("raft: Initialization error: %s", err)
2013-04-17 02:28:08 +00:00
}
2013-05-10 03:50:57 +00:00
// Update the term to the last term in the log.
2013-07-06 04:49:47 +00:00
s.currentTerm = s.log.currentTerm()
2013-05-10 03:50:57 +00:00
2013-06-24 16:52:51 +00:00
return nil
}
2013-06-24 16:52:51 +00:00
// Start the sever as a follower
func (s *Server) StartFollower() {
2013-07-07 20:21:04 +00:00
s.state = Follower
go s.loop()
2013-06-27 00:12:44 +00:00
2013-06-24 16:52:51 +00:00
}
// Start the sever as a leader
2013-07-03 16:53:46 +00:00
func (s *Server) StartLeader() {
2013-07-07 20:21:04 +00:00
s.state = Leader
s.currentTerm++
2013-07-07 20:21:04 +00:00
go s.loop()
2013-07-03 16:53:46 +00:00
}
// Shuts down the server.
func (s *Server) Stop() {
2013-07-07 20:21:04 +00:00
s.send(&stopValue)
s.log.close()
2013-07-03 16:53:46 +00:00
}
2013-07-07 20:21:04 +00:00
// Checks if the server is currently running.
func (s *Server) Running() bool {
return s.state != Stopped
}
2013-07-03 16:53:46 +00:00
2013-07-07 20:21:04 +00:00
//--------------------------------------
// Term
//--------------------------------------
2013-07-03 16:53:46 +00:00
2013-07-07 20:21:04 +00:00
// Sets the current term for the server. This is only used when an external
// current term is found.
func (s *Server) setCurrentTerm(term uint64, leaderName string) {
if term > s.currentTerm {
s.state = Follower
s.currentTerm = term
s.leader = leaderName
s.votedFor = ""
2013-07-03 16:53:46 +00:00
}
}
2013-07-06 19:41:42 +00:00
//--------------------------------------
// Event Loop
//--------------------------------------
// timeout
// ______
// | |
// | |
// v | recv majority votes
// -------- timeout ----------- -----------
// |Follower| ----------> | Candidate |--------------------> | Leader |
// -------- ----------- -----------
// ^ stepDown | stepDown |
// |_______________________|____________________________________ |
//
// The main event loop for the server
2013-07-07 20:21:04 +00:00
func (s *Server) loop() {
defer s.debugln("server.loop.end")
2013-07-06 19:41:42 +00:00
for {
2013-07-07 20:21:04 +00:00
s.debugln("server.loop.run ", s.state)
switch s.state {
2013-07-06 19:41:42 +00:00
case Follower:
2013-07-07 20:21:04 +00:00
s.followerLoop()
2013-07-06 19:41:42 +00:00
case Candidate:
2013-07-07 20:21:04 +00:00
s.candidateLoop()
2013-07-06 19:41:42 +00:00
case Leader:
2013-07-07 20:21:04 +00:00
s.leaderLoop()
case Stopped:
return
2013-07-06 19:41:42 +00:00
}
}
}
2013-07-07 20:21:04 +00:00
// Sends an event to the event loop to be processed. The function will wait
// until the event is actually processed before returning.
func (s *Server) send(value interface{}) (interface{}, error) {
event := s.sendAsync(value)
err := <-event.c
return event.returnValue, err
}
func (s *Server) sendAsync(value interface{}) *event {
event := &event{target: value, c: make(chan error, 1)}
s.c <- event
return event
}
2013-07-06 19:41:42 +00:00
// The event loop that is run when the server is in a Follower state.
// Responds to RPCs from candidates and leaders.
// Converts to candidate if election timeout elapses without either:
// 1.Receiving valid AppendEntries RPC, or
// 2.Granting vote to candidate
2013-07-07 20:21:04 +00:00
func (s *Server) followerLoop() {
2013-07-03 16:53:46 +00:00
s.state = Follower
for {
2013-07-07 20:21:04 +00:00
var err error
select {
case e := <-s.c:
if e.target == &stopValue {
s.state = Stopped
} else if _, ok := e.target.(Command); ok {
err = NotLeaderError
} else if req, ok := e.target.(*AppendEntriesRequest); ok {
e.returnValue = s.processAppendEntriesRequest(req)
} else if req, ok := e.target.(*RequestVoteRequest); ok {
e.returnValue = s.processRequestVoteRequest(req)
}
2013-07-03 16:53:46 +00:00
2013-07-07 20:21:04 +00:00
// Callback to event.
e.c <- err
2013-07-07 20:55:55 +00:00
2013-07-07 20:21:04 +00:00
case <-afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2):
s.state = Candidate
}
2013-07-07 20:55:55 +00:00
2013-07-07 20:21:04 +00:00
// Exit loop on state change.
if s.state != Follower {
break
2013-07-03 16:53:46 +00:00
}
}
}
2013-07-06 19:41:42 +00:00
// The event loop that is run when the server is in a Candidate state.
2013-07-07 20:21:04 +00:00
func (s *Server) candidateLoop() {
2013-07-06 04:49:47 +00:00
lastLogIndex, lastLogTerm := s.log.lastInfo()
2013-07-07 20:21:04 +00:00
s.leader = ""
2013-07-03 16:53:46 +00:00
for {
2013-07-06 19:41:42 +00:00
// Increment current term, vote for self.
2013-07-03 16:53:46 +00:00
s.currentTerm++
2013-07-06 19:41:42 +00:00
s.votedFor = s.name
2013-07-03 16:53:46 +00:00
2013-07-06 19:41:42 +00:00
// Send RequestVote RPCs to all other servers.
2013-07-07 20:21:04 +00:00
respChan := make(chan *RequestVoteResponse, len(s.peers))
2013-07-06 04:49:47 +00:00
req := newRequestVoteRequest(s.currentTerm, s.name, lastLogIndex, lastLogTerm)
2013-07-03 16:53:46 +00:00
for _, peer := range s.peers {
2013-07-07 20:21:04 +00:00
go peer.sendVoteRequest(req, respChan)
2013-07-03 16:53:46 +00:00
}
2013-07-06 19:41:42 +00:00
// Wait for either:
// * Votes received from majority of servers: become leader
// * AppendEntries RPC received from new leader: step down.
// * Election timeout elapses without election resolution: increment term, start new election
// * Discover higher term: step down (§5.1)
2013-07-07 20:21:04 +00:00
votesGranted := 1
timeoutChan := afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2)
for {
// If we received enough votes then stop waiting for more votes.
s.debugln("server.candidate.votes: ", votesGranted, " quorum:", s.QuorumSize())
if votesGranted >= s.QuorumSize() {
s.state = Leader
break
}
// Collect votes from peers.
select {
case resp := <-respChan:
if resp.VoteGranted {
s.debugln("server.candidate.vote.granted: ", votesGranted)
votesGranted++
} else if resp.Term > s.currentTerm {
s.debugln("server.candidate.vote.failed")
s.setCurrentTerm(resp.Term, "")
break
}
2013-07-07 20:55:55 +00:00
case e := <-s.c:
2013-07-07 20:21:04 +00:00
var err error
if e.target == &stopValue {
s.state = Stopped
break
} else if _, ok := e.target.(Command); ok {
err = NotLeaderError
} else if _, ok := e.target.(*AppendEntriesRequest); ok {
err = NotLeaderError
} else if req, ok := e.target.(*RequestVoteRequest); ok {
e.returnValue = s.processRequestVoteRequest(req)
}
// Callback to event.
e.c <- err
case <-timeoutChan:
break
}
}
2013-07-07 20:55:55 +00:00
2013-07-07 20:21:04 +00:00
if s.state != Candidate {
break
2013-07-03 16:53:46 +00:00
}
}
}
2013-07-06 19:41:42 +00:00
// The event loop that is run when the server is in a Candidate state.
2013-07-07 20:21:04 +00:00
func (s *Server) leaderLoop() {
2013-07-06 19:41:42 +00:00
s.state = Leader
s.leader = s.name
2013-07-07 20:21:04 +00:00
s.commitCount = 0
2013-07-06 04:49:47 +00:00
logIndex, _ := s.log.lastInfo()
2013-07-03 16:53:46 +00:00
2013-07-06 19:41:42 +00:00
// Update the peers prevLogIndex to leader's lastLogIndex and start heartbeat.
2013-07-03 16:53:46 +00:00
for _, peer := range s.peers {
peer.prevLogIndex = logIndex
2013-07-06 04:49:47 +00:00
peer.startHeartbeat()
2013-07-03 16:53:46 +00:00
}
2013-07-01 15:46:53 +00:00
2013-07-03 16:53:46 +00:00
// Begin to collect response from followers
2013-07-06 19:41:42 +00:00
for {
2013-07-07 20:21:04 +00:00
var err error
2013-07-06 19:41:42 +00:00
select {
2013-07-07 20:21:04 +00:00
case e := <-s.c:
s.debugln("server.leader.select")
if e.target == &stopValue {
s.state = Stopped
} else if command, ok := e.target.(Command); ok {
s.processCommand(command, e)
continue
} else if req, ok := e.target.(*AppendEntriesRequest); ok {
e.returnValue = s.processAppendEntriesRequest(req)
} else if resp, ok := e.target.(*AppendEntriesResponse); ok {
s.processAppendEntriesResponse(resp)
} else if req, ok := e.target.(*RequestVoteRequest); ok {
e.returnValue = s.processRequestVoteRequest(req)
2013-07-06 19:41:42 +00:00
}
2013-07-03 16:53:46 +00:00
2013-07-07 20:21:04 +00:00
// Callback to event.
e.c <- err
}
2013-07-07 20:55:55 +00:00
2013-07-07 20:21:04 +00:00
// Exit loop on state change.
if s.state != Leader {
2013-07-06 19:41:42 +00:00
break
2013-07-03 16:53:46 +00:00
}
}
2013-07-06 19:41:42 +00:00
// Stop all peers.
for _, peer := range s.peers {
2013-07-07 20:21:04 +00:00
peer.stopHeartbeat()
}
}
2013-04-17 02:28:08 +00:00
//--------------------------------------
// Commands
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Attempts to execute a command and replicate it. The function will return
// when the command has been successfully committed or an error has occurred.
func (s *Server) Do(command Command) (interface{}, error) {
2013-07-07 20:21:04 +00:00
return s.send(command)
}
2013-07-07 20:21:04 +00:00
// Processes a command.
func (s *Server) processCommand(command Command, e *event) {
s.debugln("server.command.process")
2013-07-07 20:21:04 +00:00
// Create an entry for the command in the log.
entry := s.log.createEntry(s.currentTerm, command)
2013-07-06 04:49:47 +00:00
if err := s.log.appendEntry(entry); err != nil {
2013-07-07 20:21:04 +00:00
s.debugln("server.command.log.error:", err)
e.c <- err
return
2013-05-05 19:36:23 +00:00
}
2013-05-05 20:26:04 +00:00
2013-07-07 20:21:04 +00:00
// Issue a callback for the entry once it's committed.
go func() {
// Wait for the entry to be committed.
select {
case <-entry.commit:
s.debugln("server.command.commit")
e.returnValue = entry.result
entry.result = nil
e.c <- nil
case <-time.After(time.Second):
s.debugln("server.command.timeout")
e.c <- CommandTimeoutError
}
}()
// Issue an append entries response for the server.
s.sendAsync(newAppendEntriesResponse(s.currentTerm, true, s.log.commitIndex))
2013-04-28 04:51:17 +00:00
}
2013-07-07 20:21:04 +00:00
//--------------------------------------
// Append Entries
//--------------------------------------
2013-07-06 19:41:42 +00:00
2013-07-07 20:21:04 +00:00
// Appends zero or more log entry from the leader to this server.
func (s *Server) AppendEntries(req *AppendEntriesRequest) *AppendEntriesResponse {
ret, _ := s.send(req)
resp, _ := ret.(*AppendEntriesResponse)
return resp
}
2013-07-07 20:21:04 +00:00
// Processes the "append entries" request.
func (s *Server) processAppendEntriesRequest(req *AppendEntriesRequest) *AppendEntriesResponse {
2013-04-30 04:13:50 +00:00
if req.Term < s.currentTerm {
2013-07-07 20:21:04 +00:00
s.debugln("server.ae.error: stale term")
return newAppendEntriesResponse(s.currentTerm, false, s.log.commitIndex)
2013-04-30 04:13:50 +00:00
}
2013-06-24 16:52:51 +00:00
2013-07-07 20:21:04 +00:00
// Update term and leader.
s.setCurrentTerm(req.Term, req.LeaderName)
2013-06-08 02:19:18 +00:00
2013-04-30 04:13:50 +00:00
// Reject if log doesn't contain a matching previous entry.
2013-07-06 04:49:47 +00:00
if err := s.log.truncate(req.PrevLogIndex, req.PrevLogTerm); err != nil {
2013-07-07 20:21:04 +00:00
s.debugln("server.ae.truncate.error: ", err)
return newAppendEntriesResponse(s.currentTerm, false, s.log.commitIndex)
2013-04-30 04:13:50 +00:00
}
// Append entries to the log.
2013-07-06 04:49:47 +00:00
if err := s.log.appendEntries(req.Entries); err != nil {
2013-07-07 20:21:04 +00:00
s.debugln("server.ae.append.error: ", err)
return newAppendEntriesResponse(s.currentTerm, false, s.log.commitIndex)
2013-04-30 04:13:50 +00:00
}
// Commit up to the commit index.
2013-07-06 04:49:47 +00:00
if err := s.log.setCommitIndex(req.CommitIndex); err != nil {
2013-07-07 20:21:04 +00:00
s.debugln("server.ae.commit.error: ", err)
return newAppendEntriesResponse(s.currentTerm, false, s.log.commitIndex)
2013-06-08 02:19:18 +00:00
}
2013-04-30 04:13:50 +00:00
2013-07-07 20:21:04 +00:00
return newAppendEntriesResponse(s.currentTerm, true, s.log.commitIndex)
2013-04-28 04:51:17 +00:00
}
2013-07-07 20:21:04 +00:00
// Processes the "append entries" response from the peer. This is only
// processed when the server is a leader. Responses received during other
// states are dropped.
func (s *Server) processAppendEntriesResponse(resp *AppendEntriesResponse) {
// If we find a higher term then change to a follower and exit.
if resp.Term > s.currentTerm {
s.setCurrentTerm(resp.Term, "")
return
}
2013-07-07 20:55:55 +00:00
2013-07-07 20:21:04 +00:00
// Ignore response if it's not successful.
if !resp.Success {
return
}
2013-07-07 20:55:55 +00:00
2013-07-07 20:21:04 +00:00
// Increment the commit count to make sure we have a quorum before committing.
s.commitCount++
if s.commitCount < s.QuorumSize() {
return
}
// Determine the committed index that a majority has.
var indices []uint64
indices = append(indices, s.log.currentIndex())
for _, peer := range s.peers {
indices = append(indices, peer.prevLogIndex)
}
sort.Sort(uint64Slice(indices))
// We can commit up to the index which the majority of the members have appended.
commitIndex := indices[s.QuorumSize()-1]
committedIndex := s.log.commitIndex
if commitIndex > committedIndex {
s.log.setCommitIndex(commitIndex)
for i := committedIndex; i < commitIndex; i++ {
if entry := s.log.getEntry(i + 1); entry != nil {
entry.commit <- true
}
}
2013-06-25 21:41:42 +00:00
}
2013-05-05 19:36:23 +00:00
}
2013-05-01 05:11:23 +00:00
//--------------------------------------
// Request Vote
2013-04-28 21:23:21 +00:00
//--------------------------------------
// Requests a vote from a server. A vote can be obtained if the vote's term is
// at the server's current term and the server has not made a vote yet. A vote
// can also be obtained if the term is greater than the server's current term.
2013-07-07 20:21:04 +00:00
func (s *Server) RequestVote(req *RequestVoteRequest) *RequestVoteResponse {
ret, _ := s.send(req)
resp, _ := ret.(*RequestVoteResponse)
return resp
}
2013-05-08 03:56:32 +00:00
2013-07-07 20:21:04 +00:00
// Processes a "request vote" request.
func (s *Server) processRequestVoteRequest(req *RequestVoteRequest) *RequestVoteResponse {
2013-04-28 21:23:21 +00:00
// If the request is coming from an old term then reject it.
if req.Term < s.currentTerm {
2013-07-07 20:21:04 +00:00
s.debugln("server.rv.error: stale term")
return newRequestVoteResponse(s.currentTerm, false)
2013-04-28 21:23:21 +00:00
}
2013-07-03 01:22:37 +00:00
2013-07-07 20:21:04 +00:00
s.setCurrentTerm(req.Term, "")
2013-04-28 21:23:21 +00:00
// If we've already voted for a different candidate then don't vote for this candidate.
if s.votedFor != "" && s.votedFor != req.CandidateName {
2013-07-07 20:21:04 +00:00
s.debugln("server.rv.error: duplicate vote: ", req.CandidateName)
return newRequestVoteResponse(s.currentTerm, false)
2013-04-28 21:23:21 +00:00
}
2013-07-07 20:21:04 +00:00
// If the candidate's log is not at least as up-to-date as our last log then don't vote.
2013-07-06 04:49:47 +00:00
lastIndex, lastTerm := s.log.lastInfo()
if lastIndex > req.LastLogIndex || lastTerm > req.LastLogTerm {
2013-07-07 20:21:04 +00:00
s.debugln("server.rv.error: out of date log: ", req.CandidateName)
return newRequestVoteResponse(s.currentTerm, false)
2013-04-28 22:49:52 +00:00
}
2013-04-28 21:23:21 +00:00
2013-04-28 22:49:52 +00:00
// If we made it this far then cast a vote and reset our election time out.
2013-07-07 20:21:04 +00:00
s.debugln("server.rv.vote: ", s.name, " votes for", req.CandidateName, "at term", req.Term)
2013-04-28 22:49:52 +00:00
s.votedFor = req.CandidateName
2013-06-24 16:52:51 +00:00
2013-07-07 20:21:04 +00:00
return newRequestVoteResponse(s.currentTerm, true)
}
2013-04-17 02:32:49 +00:00
//--------------------------------------
// Membership
//--------------------------------------
// Adds a peer to the server. This should be called by a system's join command
// within the context so that it is within the context of the server lock.
func (s *Server) AddPeer(name string) error {
// Do not allow peers to be added twice.
if s.peers[name] != nil {
return DuplicatePeerError
2013-04-17 02:32:49 +00:00
}
2013-04-28 21:23:21 +00:00
// Only add the peer if it doesn't have the same name.
if s.name != name {
2013-07-07 20:21:04 +00:00
//s.debugln("Add peer ", name)
2013-07-06 04:49:47 +00:00
peer := newPeer(s, name, s.heartbeatTimeout)
2013-06-04 13:35:43 +00:00
if s.state == Leader {
2013-07-06 04:49:47 +00:00
peer.startHeartbeat()
2013-06-04 13:35:43 +00:00
}
s.peers[peer.name] = peer
2013-04-28 21:23:21 +00:00
2013-06-05 00:02:45 +00:00
}
return nil
2013-04-17 02:32:49 +00:00
}
// Removes a peer from the server. This should be called by a system's join command
// within the context so that it is within the context of the server lock.
func (s *Server) RemovePeer(name string) error {
// Ignore removal of the server itself.
if s.name == name {
return nil
}
// Return error if peer doesn't exist.
peer := s.peers[name]
2013-06-07 05:58:41 +00:00
if peer == nil {
return fmt.Errorf("raft: Peer not found: %s", name)
}
2013-06-07 05:58:41 +00:00
// Flush entries to the peer first.
if s.state == Leader {
2013-07-07 20:21:04 +00:00
peer.flush()
2013-06-07 05:58:41 +00:00
}
// Stop peer and remove it.
2013-07-07 20:21:04 +00:00
peer.stopHeartbeat()
delete(s.peers, name)
return nil
}
2013-06-06 03:32:52 +00:00
2013-06-03 21:58:12 +00:00
//--------------------------------------
// Log compaction
//--------------------------------------
2013-06-05 05:56:59 +00:00
// The background snapshot function
2013-06-05 00:02:45 +00:00
func (s *Server) Snapshot() {
for {
2013-06-05 05:56:59 +00:00
// TODO: change this... to something reasonable
2013-06-12 16:47:48 +00:00
time.Sleep(60 * time.Second)
2013-06-24 16:52:51 +00:00
s.takeSnapshot()
2013-06-05 00:02:45 +00:00
}
}
2013-06-03 21:58:12 +00:00
2013-06-05 00:02:45 +00:00
func (s *Server) takeSnapshot() error {
2013-06-03 21:58:12 +00:00
//TODO put a snapshot mutex
2013-07-07 20:21:04 +00:00
s.debugln("take Snapshot")
2013-06-03 21:58:12 +00:00
if s.currentSnapshot != nil {
return errors.New("handling snapshot")
}
2013-06-05 05:56:59 +00:00
2013-07-06 04:49:47 +00:00
lastIndex, lastTerm := s.log.commitInfo()
2013-06-03 21:58:12 +00:00
if lastIndex == 0 || lastTerm == 0 {
return errors.New("No logs")
}
path := s.SnapshotPath(lastIndex, lastTerm)
2013-06-12 16:47:48 +00:00
var state []byte
var err error
2013-06-06 04:14:07 +00:00
2013-06-12 16:47:48 +00:00
if s.stateMachine != nil {
state, err = s.stateMachine.Save()
if err != nil {
return err
}
} else {
state = []byte{0}
2013-06-06 04:14:07 +00:00
}
2013-06-12 16:47:48 +00:00
var peerNames []string
for _, peer := range s.peers {
peerNames = append(peerNames, peer.Name())
2013-06-06 04:14:07 +00:00
}
2013-06-12 16:47:48 +00:00
peerNames = append(peerNames, s.Name())
2013-06-06 04:14:07 +00:00
2013-06-24 16:52:51 +00:00
s.currentSnapshot = &Snapshot{lastIndex, lastTerm, peerNames, state, path}
2013-06-03 21:58:12 +00:00
2013-06-05 00:02:45 +00:00
s.saveSnapshot()
2013-06-05 05:56:59 +00:00
2013-07-06 04:49:47 +00:00
s.log.compact(lastIndex, lastTerm)
2013-06-05 05:56:59 +00:00
2013-06-03 21:58:12 +00:00
return nil
}
// Retrieves the log path for the server.
2013-06-05 00:02:45 +00:00
func (s *Server) saveSnapshot() error {
2013-06-05 05:56:59 +00:00
2013-06-03 21:58:12 +00:00
if s.currentSnapshot == nil {
return errors.New("no snapshot to save")
}
2013-07-06 04:49:47 +00:00
err := s.currentSnapshot.save()
2013-06-03 21:58:12 +00:00
if err != nil {
return err
}
2013-06-08 02:19:18 +00:00
2013-06-03 21:58:12 +00:00
tmp := s.lastSnapshot
s.lastSnapshot = s.currentSnapshot
2013-06-05 05:56:59 +00:00
// delete the previous snapshot if there is any change
2013-06-12 16:47:48 +00:00
if tmp != nil && !(tmp.LastIndex == s.lastSnapshot.LastIndex && tmp.LastTerm == s.lastSnapshot.LastTerm) {
2013-07-06 04:49:47 +00:00
tmp.remove()
2013-06-05 00:02:45 +00:00
}
2013-06-03 21:58:12 +00:00
s.currentSnapshot = nil
return nil
}
2013-06-05 00:02:45 +00:00
// Retrieves the log path for the server.
func (s *Server) SnapshotPath(lastIndex uint64, lastTerm uint64) string {
return path.Join(s.path, "snapshot", fmt.Sprintf("%v_%v.ss", lastTerm, lastIndex))
2013-06-05 00:02:45 +00:00
}
2013-06-08 02:19:18 +00:00
func (s *Server) SnapshotRecovery(req *SnapshotRequest) (*SnapshotResponse, error) {
2013-06-03 21:58:12 +00:00
//
s.mutex.Lock()
defer s.mutex.Unlock()
2013-06-05 05:56:59 +00:00
2013-06-06 04:14:07 +00:00
s.stateMachine.Recovery(req.State)
2013-06-03 21:58:12 +00:00
2013-06-12 16:47:48 +00:00
//recovery the cluster configuration
for _, peerName := range req.Peers {
s.AddPeer(peerName)
}
2013-06-03 21:58:12 +00:00
//update term and index
2013-06-06 03:25:17 +00:00
s.currentTerm = req.LastTerm
2013-06-12 16:47:48 +00:00
2013-07-06 04:49:47 +00:00
s.log.updateCommitIndex(req.LastIndex)
2013-06-12 16:47:48 +00:00
2013-06-06 03:25:17 +00:00
snapshotPath := s.SnapshotPath(req.LastIndex, req.LastTerm)
2013-06-12 16:47:48 +00:00
s.currentSnapshot = &Snapshot{req.LastIndex, req.LastTerm, req.Peers, req.State, snapshotPath}
2013-06-24 16:52:51 +00:00
2013-06-08 02:19:18 +00:00
s.saveSnapshot()
2013-06-24 16:52:51 +00:00
2013-07-06 04:49:47 +00:00
s.log.compact(req.LastIndex, req.LastTerm)
2013-06-05 00:02:45 +00:00
2013-07-06 04:49:47 +00:00
return newSnapshotResponse(req.LastTerm, true, req.LastIndex), nil
2013-06-03 21:58:12 +00:00
}
2013-06-05 05:56:59 +00:00
// Load a snapshot at restart
2013-06-03 21:58:12 +00:00
func (s *Server) LoadSnapshot() error {
dir, err := os.OpenFile(path.Join(s.path, "snapshot"), os.O_RDONLY, 0)
2013-06-03 21:58:12 +00:00
if err != nil {
2013-06-24 16:52:51 +00:00
2013-06-12 16:47:48 +00:00
return err
2013-06-03 21:58:12 +00:00
}
filenames, err := dir.Readdirnames(-1)
if err != nil {
dir.Close()
panic(err)
}
dir.Close()
if len(filenames) == 0 {
return errors.New("no snapshot")
}
// not sure how many snapshot we should keep
sort.Strings(filenames)
2013-06-08 02:19:18 +00:00
snapshotPath := path.Join(s.path, "snapshot", filenames[len(filenames)-1])
2013-06-03 21:58:12 +00:00
2013-07-01 02:20:23 +00:00
// should not fail
2013-06-03 21:58:12 +00:00
file, err := os.OpenFile(snapshotPath, os.O_RDONLY, 0)
defer file.Close()
if err != nil {
panic(err)
}
2013-06-24 16:52:51 +00:00
// TODO check checksum first
2013-06-05 05:56:59 +00:00
2013-06-12 16:47:48 +00:00
var snapshotBytes []byte
2013-07-01 02:14:02 +00:00
var checksum uint32
2013-06-06 20:54:27 +00:00
2013-07-01 02:50:48 +00:00
n, err := fmt.Fscanf(file, "%08x\n", &checksum)
2013-06-03 21:58:12 +00:00
if err != nil {
2013-06-06 04:14:07 +00:00
return err
2013-06-03 21:58:12 +00:00
}
2013-06-12 16:47:48 +00:00
if n != 1 {
2013-06-06 04:14:07 +00:00
return errors.New("Bad snapshot file")
2013-06-03 21:58:12 +00:00
}
2013-06-12 16:47:48 +00:00
snapshotBytes, _ = ioutil.ReadAll(file)
2013-07-07 20:21:04 +00:00
s.debugln(string(snapshotBytes))
2013-06-12 16:47:48 +00:00
2013-07-01 00:55:54 +00:00
// Generate checksum.
byteChecksum := crc32.ChecksumIEEE(snapshotBytes)
if uint32(checksum) != byteChecksum {
2013-07-07 20:21:04 +00:00
s.debugln(checksum, " ", byteChecksum)
2013-07-01 00:55:54 +00:00
return errors.New("bad snapshot file")
}
2013-06-12 16:47:48 +00:00
err = json.Unmarshal(snapshotBytes, &s.lastSnapshot)
2013-06-06 20:54:27 +00:00
2013-06-06 04:14:07 +00:00
if err != nil {
2013-07-07 20:21:04 +00:00
s.debugln("unmarshal error: ", err)
2013-06-06 04:14:07 +00:00
return err
}
2013-06-12 16:47:48 +00:00
err = s.stateMachine.Recovery(s.lastSnapshot.State)
2013-07-01 00:55:54 +00:00
if err != nil {
2013-07-07 20:21:04 +00:00
s.debugln("recovery error: ", err)
2013-07-01 02:14:02 +00:00
return err
2013-07-01 00:55:54 +00:00
}
2013-06-12 16:47:48 +00:00
for _, peerName := range s.lastSnapshot.Peers {
s.AddPeer(peerName)
}
2013-06-06 04:14:07 +00:00
2013-07-06 04:49:47 +00:00
s.log.startTerm = s.lastSnapshot.LastTerm
s.log.startIndex = s.lastSnapshot.LastIndex
s.log.updateCommitIndex(s.lastSnapshot.LastIndex)
2013-06-05 00:02:45 +00:00
2013-06-03 21:58:12 +00:00
return err
}
2013-07-07 20:21:04 +00:00
//--------------------------------------
// Debugging
//--------------------------------------
func (s *Server) debugln(v ...interface{}) {
debugf("[%s] %s", s.name, fmt.Sprintln(v...))
}
func (s *Server) traceln(v ...interface{}) {
tracef("[%s] %s", s.name, fmt.Sprintln(v...))
}