influxdb/_vendor/raft/server.go

package raft

import (
	"encoding/json"
	"errors"
	"fmt"
	"io/ioutil"
	"os"
	"path"
	"path/filepath"
	"sort"
	"sync"
	"time"
)

//------------------------------------------------------------------------------
//
// Constants
//
//------------------------------------------------------------------------------

const (
	Stopped      = "stopped"
	Initialized  = "initialized"
	Follower     = "follower"
	Candidate    = "candidate"
	Leader       = "leader"
	Snapshotting = "snapshotting"
)

const (
	MaxLogEntriesPerRequest         = 2000
	NumberOfLogEntriesAfterSnapshot = 200
)

const (
	// DefaultHeartbeatInterval is the interval that the leader will send
	// AppendEntriesRequests to followers to maintain leadership.
	DefaultHeartbeatInterval = 50 * time.Millisecond

	DefaultElectionTimeout = 150 * time.Millisecond
)

// ElectionTimeoutThresholdPercent specifies the threshold at which the server
// will dispatch warning events that the heartbeat RTT is too close to the
// election timeout.
const ElectionTimeoutThresholdPercent = 0.8

//------------------------------------------------------------------------------
//
// Errors
//
//------------------------------------------------------------------------------

var NotLeaderError = errors.New("raft.Server: Not current leader")
var DuplicatePeerError = errors.New("raft.Server: Duplicate peer")
var CommandTimeoutError = errors.New("raft: Command timeout")
var StopError = errors.New("raft: Has been stopped")

//------------------------------------------------------------------------------
//
// Typedefs
//
//------------------------------------------------------------------------------

// A server is involved in the consensus protocol and can act as a follower,
// candidate or a leader.
type Server interface {
	Name() string
	Context() interface{}
	StateMachine() StateMachine
	Leader() string
	State() string
	Path() string
	LogPath() string
	SnapshotPath(lastIndex uint64, lastTerm uint64) string
	Term() uint64
	CommitIndex() uint64
	VotedFor() string
	MemberCount() int
	QuorumSize() int
	IsLogEmpty() bool
	LogEntries() []*LogEntry
	LastCommandName() string
	GetState() string
	ElectionTimeout() time.Duration
	SetElectionTimeout(duration time.Duration)
	HeartbeatInterval() time.Duration
	SetHeartbeatInterval(duration time.Duration)
	Transporter() Transporter
	SetTransporter(t Transporter)
	AppendEntries(req *AppendEntriesRequest) *AppendEntriesResponse
	RequestVote(req *RequestVoteRequest) *RequestVoteResponse
	RequestSnapshot(req *SnapshotRequest) *SnapshotResponse
	SnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse
	AddPeer(name string, connectiongString string) error
	RemovePeer(name string) error
	Peers() map[string]*Peer
	Init() error
	Start() error
	Stop()
	Running() bool
	Do(command Command) (interface{}, error)
	TakeSnapshot() error
	LoadSnapshot() error
	AddEventListener(string, EventListener)
	FlushCommitIndex()
}

type server struct {
	*eventDispatcher

	name        string
	path        string
	state       string
	transporter Transporter
	context     interface{}
	currentTerm uint64

	votedFor   string
	log        *Log
	leader     string
	peers      map[string]*Peer
	mutex      sync.RWMutex
	syncedPeer map[string]bool

	stopped           chan bool
	c                 chan *ev
	electionTimeout   time.Duration
	heartbeatInterval time.Duration

	snapshot *Snapshot

	// PendingSnapshot is an unfinished snapshot.
	// After the pendingSnapshot is saved to disk,
	// it will be set to snapshot and also will be
	// set to nil.
	pendingSnapshot *Snapshot

	stateMachine            StateMachine
	maxLogEntriesPerRequest uint64

	connectionString string

	routineGroup sync.WaitGroup
}

// An internal event to be processed by the server's event loop.
type ev struct {
	target      interface{}
	returnValue interface{}
	c           chan error
}

//------------------------------------------------------------------------------
//
// Constructor
//
//------------------------------------------------------------------------------

// Creates a new server with a log at the given path. transporter must
// not be nil. stateMachine can be nil if snapshotting and log
// compaction is to be disabled. context can be anything (including nil)
// and is not used by the raft package except returned by
// Server.Context(). connectionString can be anything.
func NewServer(name string, path string, transporter Transporter, stateMachine StateMachine, ctx interface{}, connectionString string) (Server, error) {
	if name == "" {
		return nil, errors.New("raft.Server: Name cannot be blank")
	}
	if transporter == nil {
		panic("raft: Transporter required")
	}

	s := &server{
		name:                    name,
		path:                    path,
		transporter:             transporter,
		stateMachine:            stateMachine,
		context:                 ctx,
		state:                   Stopped,
		peers:                   make(map[string]*Peer),
		log:                     newLog(),
		c:                       make(chan *ev, 256),
		electionTimeout:         DefaultElectionTimeout,
		heartbeatInterval:       DefaultHeartbeatInterval,
		maxLogEntriesPerRequest: MaxLogEntriesPerRequest,
		connectionString:        connectionString,
	}
	s.eventDispatcher = newEventDispatcher(s)

	// Setup apply function.
	s.log.ApplyFunc = func(e *LogEntry, c Command) (interface{}, error) {
		// Dispatch commit event.
		s.DispatchEvent(newEvent(CommitEventType, e, nil))

		// Apply command to the state machine.
		switch c := c.(type) {
		case CommandApply:
			return c.Apply(&context{
				server:       s,
				currentTerm:  s.currentTerm,
				currentIndex: s.log.internalCurrentIndex(),
				commitIndex:  s.log.commitIndex,
			})
		case deprecatedCommandApply:
			return c.Apply(s)
		default:
			return nil, fmt.Errorf("Command does not implement Apply()")
		}
	}

	return s, nil
}

//------------------------------------------------------------------------------
//
// Accessors
//
//------------------------------------------------------------------------------

//--------------------------------------
// General
//--------------------------------------

// Retrieves the name of the server.
func (s *server) Name() string {
	return s.name
}

// Retrieves the storage path for the server.
func (s *server) Path() string {
	return s.path
}

// The name of the current leader.
func (s *server) Leader() string {
	return s.leader
}

// Retrieves a copy of the peer data.
func (s *server) Peers() map[string]*Peer {
	s.mutex.Lock()
	defer s.mutex.Unlock()

	peers := make(map[string]*Peer)
	for name, peer := range s.peers {
		peers[name] = peer.clone()
	}
	return peers
}

// Retrieves the object that transports requests.
func (s *server) Transporter() Transporter {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return s.transporter
}

func (s *server) SetTransporter(t Transporter) {
	s.mutex.Lock()
	defer s.mutex.Unlock()
	s.transporter = t
}

// Retrieves the context passed into the constructor.
func (s *server) Context() interface{} {
	return s.context
}

// Retrieves the state machine passed into the constructor.
func (s *server) StateMachine() StateMachine {
	return s.stateMachine
}

// Retrieves the log path for the server.
func (s *server) LogPath() string {
	return path.Join(s.path, "log")
}

// Retrieves the current state of the server.
func (s *server) State() string {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return s.state
}

// Sets the state of the server.
func (s *server) setState(state string) {
	s.mutex.Lock()
	defer s.mutex.Unlock()

	// Temporarily store previous values.
	prevState := s.state
	prevLeader := s.leader

	// Update state and leader.
	s.state = state
	if state == Leader {
		s.leader = s.Name()
		s.syncedPeer = make(map[string]bool)
	}

	// Dispatch state and leader change events.
	s.DispatchEvent(newEvent(StateChangeEventType, s.state, prevState))

	if prevLeader != s.leader {
		s.DispatchEvent(newEvent(LeaderChangeEventType, s.leader, prevLeader))
	}
}

// Retrieves the current term of the server.
func (s *server) Term() uint64 {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return s.currentTerm
}

// Retrieves the current commit index of the server.
func (s *server) CommitIndex() uint64 {
	s.log.mutex.RLock()
	defer s.log.mutex.RUnlock()
	return s.log.commitIndex
}

// Retrieves the name of the candidate this server voted for in this term.
func (s *server) VotedFor() string {
	return s.votedFor
}

// Retrieves whether the server's log has no entries.
func (s *server) IsLogEmpty() bool {
	return s.log.isEmpty()
}

// A list of all the log entries. This should only be used for debugging purposes.
func (s *server) LogEntries() []*LogEntry {
	s.log.mutex.RLock()
	defer s.log.mutex.RUnlock()
	return s.log.entries
}

// A reference to the command name of the last entry.
func (s *server) LastCommandName() string {
	return s.log.lastCommandName()
}

// Get the state of the server for debugging
func (s *server) GetState() string {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return fmt.Sprintf("Name: %s, State: %s, Term: %v, CommitedIndex: %v ", s.name, s.state, s.currentTerm, s.log.commitIndex)
}

// Check if the server is promotable
func (s *server) promotable() bool {
	return s.log.currentIndex() > 0
}

//--------------------------------------
// Membership
//--------------------------------------

// Retrieves the number of member servers in the consensus.
func (s *server) MemberCount() int {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return len(s.peers) + 1
}

// Retrieves the number of servers required to make a quorum.
func (s *server) QuorumSize() int {
	return (s.MemberCount() / 2) + 1
}

//--------------------------------------
// Election timeout
//--------------------------------------

// Retrieves the election timeout.
func (s *server) ElectionTimeout() time.Duration {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return s.electionTimeout
}

// Sets the election timeout.
func (s *server) SetElectionTimeout(duration time.Duration) {
	s.mutex.Lock()
	defer s.mutex.Unlock()
	s.electionTimeout = duration
}

//--------------------------------------
// Heartbeat timeout
//--------------------------------------

// Retrieves the heartbeat timeout.
func (s *server) HeartbeatInterval() time.Duration {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return s.heartbeatInterval
}

// Sets the heartbeat timeout.
func (s *server) SetHeartbeatInterval(duration time.Duration) {
	s.mutex.Lock()
	defer s.mutex.Unlock()

	s.heartbeatInterval = duration
	for _, peer := range s.peers {
		peer.setHeartbeatInterval(duration)
	}
}

//------------------------------------------------------------------------------
//
// Methods
//
//------------------------------------------------------------------------------

//--------------------------------------
// Initialization
//--------------------------------------

// Reg the NOPCommand
func init() {
	RegisterCommand(&NOPCommand{})
	RegisterCommand(&DefaultJoinCommand{})
	RegisterCommand(&DefaultLeaveCommand{})
}

// Start the raft server
// If log entries exist then allow promotion to candidate if no AEs received.
// If no log entries exist then wait for AEs from another node.
// If no log entries exist and a self-join command is issued then
// immediately become leader and commit entry.
func (s *server) Start() error {
	// Exit if the server is already running.
	if s.Running() {
		return fmt.Errorf("raft.Server: Server already running[%v]", s.state)
	}

	if err := s.Init(); err != nil {
		return err
	}

	// stopped needs to be allocated each time server starts
	// because it is closed at `Stop`.
	s.stopped = make(chan bool)
	s.setState(Follower)

	// If no log entries exist then
	// 1. wait for AEs from another node
	// 2. wait for self-join command
	// to set itself promotable
	if !s.promotable() {
		s.debugln("start as a new raft server")

		// If log entries exist then allow promotion to candidate
		// if no AEs received.
	} else {
		s.debugln("start from previous saved state")
	}

	debugln(s.GetState())

	s.routineGroup.Add(1)
	go func() {
		defer s.routineGroup.Done()
		s.loop()
	}()

	return nil
}

// Init initializes the raft server.
// If there is no previous log file under the given path, Init() will create an empty log file.
// Otherwise, Init() will load in the log entries from the log file.
func (s *server) Init() error {
	if s.Running() {
		return fmt.Errorf("raft.Server: Server already running[%v]", s.state)
	}

	// Server has been initialized or server was stopped after initialized
	// If log has been initialized, we know that the server was stopped after
	// running.
	if s.state == Initialized || s.log.initialized {
		s.state = Initialized
		return nil
	}

	// Create snapshot directory if it does not exist
	err := os.Mkdir(path.Join(s.path, "snapshot"), 0700)
	if err != nil && !os.IsExist(err) {
		s.debugln("raft: Snapshot dir error: ", err)
		return fmt.Errorf("raft: Initialization error: %s", err)
	}

	if err := s.readConf(); err != nil {
		s.debugln("raft: Conf file error: ", err)
		return fmt.Errorf("raft: Initialization error: %s", err)
	}

	// Initialize the log and load it up.
	if err := s.log.open(s.LogPath()); err != nil {
		s.debugln("raft: Log error: ", err)
		return fmt.Errorf("raft: Initialization error: %s", err)
	}

	// Update the term to the last term in the log.
	_, s.currentTerm = s.log.lastInfo()

	s.state = Initialized
	return nil
}

// Shuts down the server.
func (s *server) Stop() {
	if s.State() == Stopped {
		return
	}

	close(s.stopped)

	// make sure all goroutines have stopped before we close the log
	s.routineGroup.Wait()

	s.log.close()
	s.setState(Stopped)
}

// Checks if the server is currently running.
func (s *server) Running() bool {
	s.mutex.RLock()
	defer s.mutex.RUnlock()
	return (s.state != Stopped && s.state != Initialized)
}

//--------------------------------------
// Term
//--------------------------------------

// updates the current term for the server. This is only used when a larger
// external term is found.
func (s *server) updateCurrentTerm(term uint64, leaderName string) {
	_assert(term > s.currentTerm,
		"upadteCurrentTerm: update is called when term is not larger than currentTerm")

	// Store previous values temporarily.
	prevTerm := s.currentTerm
	prevLeader := s.leader

	// set currentTerm = T, convert to follower (§5.1)
	// stop heartbeats before step-down
	if s.state == Leader {
		for _, peer := range s.peers {
			peer.stopHeartbeat(false)
		}
	}
	// update the term and clear vote for
	if s.state != Follower {
		s.setState(Follower)
	}

	s.mutex.Lock()
	s.currentTerm = term
	s.leader = leaderName
	s.votedFor = ""
	s.mutex.Unlock()

	// Dispatch change events.
	s.DispatchEvent(newEvent(TermChangeEventType, s.currentTerm, prevTerm))

	if prevLeader != s.leader {
		s.DispatchEvent(newEvent(LeaderChangeEventType, s.leader, prevLeader))
	}
}

//--------------------------------------
// Event Loop
//--------------------------------------

//               ________
//            --|Snapshot|                 timeout
//            |  --------                  ______
// recover    |       ^                   |      |
// snapshot / |       |snapshot           |      |
// higher     |       |                   v      |     recv majority votes
// term       |    --------    timeout    -----------                        -----------
//            |-> |Follower| ----------> | Candidate |--------------------> |  Leader   |
//                 --------               -----------                        -----------
//                    ^          higher term/ |                         higher term |
//                    |            new leader |                                     |
//                    |_______________________|____________________________________ |
// The main event loop for the server
func (s *server) loop() {
	defer s.debugln("server.loop.end")

	state := s.State()

	for state != Stopped {
		s.debugln("server.loop.run ", state)
		switch state {
		case Follower:
			s.followerLoop()
		case Candidate:
			s.candidateLoop()
		case Leader:
			s.leaderLoop()
		case Snapshotting:
			s.snapshotLoop()
		}
		state = s.State()
	}
}

// Sends an event to the event loop to be processed. The function will wait
// until the event is actually processed before returning.
func (s *server) send(value interface{}) (interface{}, error) {
	if !s.Running() {
		return nil, StopError
	}

	event := &ev{target: value, c: make(chan error, 1)}
	select {
	case s.c <- event:
	case <-s.stopped:
		return nil, StopError
	}
	select {
	case <-s.stopped:
		return nil, StopError
	case err := <-event.c:
		return event.returnValue, err
	}
}

func (s *server) sendAsync(value interface{}) {
	if !s.Running() {
		return
	}

	event := &ev{target: value, c: make(chan error, 1)}
	// try a non-blocking send first
	// in most cases, this should not be blocking
	// avoid create unnecessary go routines
	select {
	case s.c <- event:
		return
	default:
	}

	s.routineGroup.Add(1)
	go func() {
		defer s.routineGroup.Done()
		select {
		case s.c <- event:
		case <-s.stopped:
		}
	}()
}

// The event loop that is run when the server is in a Follower state.
// Responds to RPCs from candidates and leaders.
// Converts to candidate if election timeout elapses without either:
//   1.Receiving valid AppendEntries RPC, or
//   2.Granting vote to candidate
func (s *server) followerLoop() {
	since := time.Now()
	electionTimeout := s.ElectionTimeout()
	timeoutChan := afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2)

	for s.State() == Follower {
		var err error
		update := false
		select {
		case <-s.stopped:
			s.setState(Stopped)
			return

		case e := <-s.c:
			switch req := e.target.(type) {
			case JoinCommand:
				//If no log entries exist and a self-join command is issued
				//then immediately become leader and commit entry.
				if s.log.currentIndex() == 0 && req.NodeName() == s.Name() {
					s.debugln("selfjoin and promote to leader")
					s.setState(Leader)
					s.processCommand(req, e)
				} else {
					err = NotLeaderError
				}
			case *AppendEntriesRequest:
				// If heartbeats get too close to the election timeout then send an event.
				elapsedTime := time.Now().Sub(since)
				if elapsedTime > time.Duration(float64(electionTimeout)*ElectionTimeoutThresholdPercent) {
					s.DispatchEvent(newEvent(ElectionTimeoutThresholdEventType, elapsedTime, nil))
				}
				e.returnValue, update = s.processAppendEntriesRequest(req)
			case *RequestVoteRequest:
				e.returnValue, update = s.processRequestVoteRequest(req)
			case *SnapshotRequest:
				e.returnValue = s.processSnapshotRequest(req)
			default:
				err = NotLeaderError
			}
			// Callback to event.
			e.c <- err

		case <-timeoutChan:
			// only allow synced follower to promote to candidate
			if s.promotable() {
				s.setState(Candidate)
			} else {
				update = true
			}
		}

		// Converts to candidate if election timeout elapses without either:
		//   1.Receiving valid AppendEntries RPC, or
		//   2.Granting vote to candidate
		if update {
			since = time.Now()
			timeoutChan = afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2)
		}
	}
}

// The event loop that is run when the server is in a Candidate state.
func (s *server) candidateLoop() {
	// Clear leader value.
	prevLeader := s.leader
	s.leader = ""
	if prevLeader != s.leader {
		s.DispatchEvent(newEvent(LeaderChangeEventType, s.leader, prevLeader))
	}

	lastLogIndex, lastLogTerm := s.log.lastInfo()
	doVote := true
	votesGranted := 0
	var timeoutChan <-chan time.Time
	var respChan chan *RequestVoteResponse

	for s.State() == Candidate {
		if doVote {
			// Increment current term, vote for self.
			s.currentTerm++
			s.votedFor = s.name

			// Send RequestVote RPCs to all other servers.
			respChan = make(chan *RequestVoteResponse, len(s.peers))
			for _, peer := range s.peers {
				s.routineGroup.Add(1)
				go func(peer *Peer) {
					defer s.routineGroup.Done()
					peer.sendVoteRequest(newRequestVoteRequest(s.currentTerm, s.name, lastLogIndex, lastLogTerm), respChan)
				}(peer)
			}

			// Wait for either:
			//   * Votes received from majority of servers: become leader
			//   * AppendEntries RPC received from new leader: step down.
			//   * Election timeout elapses without election resolution: increment term, start new election
			//   * Discover higher term: step down (§5.1)
			votesGranted = 1
			timeoutChan = afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2)
			doVote = false
		}

		// If we received enough votes then stop waiting for more votes.
		// And return from the candidate loop
		if votesGranted == s.QuorumSize() {
			s.debugln("server.candidate.recv.enough.votes")
			s.setState(Leader)
			return
		}

		// Collect votes from peers.
		select {
		case <-s.stopped:
			s.setState(Stopped)
			return

		case resp := <-respChan:
			if success := s.processVoteResponse(resp); success {
				s.debugln("server.candidate.vote.granted: ", votesGranted)
				votesGranted++
			}

		case e := <-s.c:
			var err error
			switch req := e.target.(type) {
			case Command:
				err = NotLeaderError
			case *AppendEntriesRequest:
				e.returnValue, _ = s.processAppendEntriesRequest(req)
			case *RequestVoteRequest:
				e.returnValue, _ = s.processRequestVoteRequest(req)
			}

			// Callback to event.
			e.c <- err

		case <-timeoutChan:
			doVote = true
		}
	}
}

// The event loop that is run when the server is in a Leader state.
func (s *server) leaderLoop() {
	logIndex, _ := s.log.lastInfo()

	// Update the peers prevLogIndex to leader's lastLogIndex and start heartbeat.
	s.debugln("leaderLoop.set.PrevIndex to ", logIndex)
	for _, peer := range s.peers {
		peer.setPrevLogIndex(logIndex)
		peer.startHeartbeat()
	}

	// Commit a NOP after the server becomes leader. From the Raft paper:
	// "Upon election: send initial empty AppendEntries RPCs (heartbeat) to
	// each server; repeat during idle periods to prevent election timeouts
	// (§5.2)". The heartbeats started above do the "idle" period work.
	s.routineGroup.Add(1)
	go func() {
		defer s.routineGroup.Done()
		s.Do(NOPCommand{})
	}()

	// Begin to collect response from followers
	for s.State() == Leader {
		var err error
		select {
		case <-s.stopped:
			// Stop all peers before stop
			for _, peer := range s.peers {
				peer.stopHeartbeat(false)
			}
			s.setState(Stopped)
			return

		case e := <-s.c:
			switch req := e.target.(type) {
			case Command:
				s.processCommand(req, e)
				continue
			case *AppendEntriesRequest:
				e.returnValue, _ = s.processAppendEntriesRequest(req)
			case *AppendEntriesResponse:
				s.processAppendEntriesResponse(req)
			case *RequestVoteRequest:
				e.returnValue, _ = s.processRequestVoteRequest(req)
			}

			// Callback to event.
			e.c <- err
		}
	}

	s.syncedPeer = nil
}

func (s *server) snapshotLoop() {
	for s.State() == Snapshotting {
		var err error
		select {
		case <-s.stopped:
			s.setState(Stopped)
			return

		case e := <-s.c:
			switch req := e.target.(type) {
			case Command:
				err = NotLeaderError
			case *AppendEntriesRequest:
				e.returnValue, _ = s.processAppendEntriesRequest(req)
			case *RequestVoteRequest:
				e.returnValue, _ = s.processRequestVoteRequest(req)
			case *SnapshotRecoveryRequest:
				e.returnValue = s.processSnapshotRecoveryRequest(req)
			}
			// Callback to event.
			e.c <- err
		}
	}
}

//--------------------------------------
// Commands
//--------------------------------------

// Attempts to execute a command and replicate it. The function will return
// when the command has been successfully committed or an error has occurred.

func (s *server) Do(command Command) (interface{}, error) {
	return s.send(command)
}

// Processes a command.
func (s *server) processCommand(command Command, e *ev) {
	s.debugln("server.command.process")

	// Create an entry for the command in the log.
	entry, err := s.log.createEntry(s.currentTerm, command, e)

	if err != nil {
		s.debugln("server.command.log.entry.error:", err)
		e.c <- err
		return
	}

	if err := s.log.appendEntry(entry); err != nil {
		s.debugln("server.command.log.error:", err)
		e.c <- err
		return
	}

	s.syncedPeer[s.Name()] = true
	if len(s.peers) == 0 {
		commitIndex := s.log.currentIndex()
		s.log.setCommitIndex(commitIndex)
		s.debugln("commit index ", commitIndex)
	}
}

//--------------------------------------
// Append Entries
//--------------------------------------

// Appends zero or more log entry from the leader to this server.
func (s *server) AppendEntries(req *AppendEntriesRequest) *AppendEntriesResponse {
	ret, _ := s.send(req)
	resp, _ := ret.(*AppendEntriesResponse)
	return resp
}

// Processes the "append entries" request.
func (s *server) processAppendEntriesRequest(req *AppendEntriesRequest) (*AppendEntriesResponse, bool) {
	s.traceln("server.ae.process")

	if req.Term < s.currentTerm {
		s.debugln("server.ae.error: stale term")
		return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), false
	}

	if req.Term == s.currentTerm {
		_assert(s.State() != Leader, "leader.elected.at.same.term.%d\n", s.currentTerm)

		// step-down to follower when it is a candidate
		if s.state == Candidate {
			// change state to follower
			s.setState(Follower)
		}

		// discover new leader when candidate
		// save leader name when follower
		s.leader = req.LeaderName
	} else {
		// Update term and leader.
		s.updateCurrentTerm(req.Term, req.LeaderName)
	}

	// Reject if log doesn't contain a matching previous entry.
	if err := s.log.truncate(req.PrevLogIndex, req.PrevLogTerm); err != nil {
		s.debugln("server.ae.truncate.error: ", err)
		return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true
	}

	// Append entries to the log.
	if err := s.log.appendEntries(req.Entries); err != nil {
		s.debugln("server.ae.append.error: ", err)
		return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true
	}

	// Commit up to the commit index.
	if err := s.log.setCommitIndex(req.CommitIndex); err != nil {
		s.debugln("server.ae.commit.error: ", err)
		return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true
	}

	// once the server appended and committed all the log entries from the leader

	return newAppendEntriesResponse(s.currentTerm, true, s.log.currentIndex(), s.log.CommitIndex()), true
}

// Processes the "append entries" response from the peer. This is only
// processed when the server is a leader. Responses received during other
// states are dropped.
func (s *server) processAppendEntriesResponse(resp *AppendEntriesResponse) {
	// If we find a higher term then change to a follower and exit.
	if resp.Term() > s.Term() {
		s.updateCurrentTerm(resp.Term(), "")
		return
	}

	// panic response if it's not successful.
	if !resp.Success() {
		return
	}

	// if one peer successfully append a log from the leader term,
	// we add it to the synced list
	if resp.append == true {
		s.syncedPeer[resp.peer] = true
	}

	// Increment the commit count to make sure we have a quorum before committing.
	if len(s.syncedPeer) < s.QuorumSize() {
		return
	}

	// Determine the committed index that a majority has.
	var indices []uint64
	indices = append(indices, s.log.currentIndex())
	for _, peer := range s.peers {
		indices = append(indices, peer.getPrevLogIndex())
	}
	sort.Sort(sort.Reverse(uint64Slice(indices)))

	// We can commit up to the index which the majority of the members have appended.
	commitIndex := indices[s.QuorumSize()-1]
	committedIndex := s.log.commitIndex

	if commitIndex > committedIndex {
		// leader needs to do a fsync before committing log entries
		s.log.sync()
		s.log.setCommitIndex(commitIndex)
		s.debugln("commit index ", commitIndex)
	}
}

// processVoteReponse processes a vote request:
// 1. if the vote is granted for the current term of the candidate, return true
// 2. if the vote is denied due to smaller term, update the term of this server
//    which will also cause the candidate to step-down, and return false.
// 3. if the vote is for a smaller term, ignore it and return false.
func (s *server) processVoteResponse(resp *RequestVoteResponse) bool {
	if resp.VoteGranted && resp.Term == s.currentTerm {
		return true
	}

	if resp.Term > s.currentTerm {
		s.debugln("server.candidate.vote.failed")
		s.updateCurrentTerm(resp.Term, "")
	} else {
		s.debugln("server.candidate.vote: denied")
	}
	return false
}

//--------------------------------------
// Request Vote
//--------------------------------------

// Requests a vote from a server. A vote can be obtained if the vote's term is
// at the server's current term and the server has not made a vote yet. A vote
// can also be obtained if the term is greater than the server's current term.
func (s *server) RequestVote(req *RequestVoteRequest) *RequestVoteResponse {
	ret, _ := s.send(req)
	resp, _ := ret.(*RequestVoteResponse)
	return resp
}

// Processes a "request vote" request.
func (s *server) processRequestVoteRequest(req *RequestVoteRequest) (*RequestVoteResponse, bool) {

	// If the request is coming from an old term then reject it.
	if req.Term < s.Term() {
		s.debugln("server.rv.deny.vote: cause stale term")
		return newRequestVoteResponse(s.currentTerm, false), false
	}

	// If the term of the request peer is larger than this node, update the term
	// If the term is equal and we've already voted for a different candidate then
	// don't vote for this candidate.
	if req.Term > s.Term() {
		s.updateCurrentTerm(req.Term, "")
	} else if s.votedFor != "" && s.votedFor != req.CandidateName {
		s.debugln("server.deny.vote: cause duplicate vote: ", req.CandidateName,
			" already vote for ", s.votedFor)
		return newRequestVoteResponse(s.currentTerm, false), false
	}

	// If the candidate's log is not at least as up-to-date as our last log then don't vote.
	lastIndex, lastTerm := s.log.lastInfo()
	if lastIndex > req.LastLogIndex || lastTerm > req.LastLogTerm {
		s.debugln("server.deny.vote: cause out of date log: ", req.CandidateName,
			"Index :[", lastIndex, "]", " [", req.LastLogIndex, "]",
			"Term :[", lastTerm, "]", " [", req.LastLogTerm, "]")
		return newRequestVoteResponse(s.currentTerm, false), false
	}

	// If we made it this far then cast a vote and reset our election time out.
	s.debugln("server.rv.vote: ", s.name, " votes for", req.CandidateName, "at term", req.Term)
	s.votedFor = req.CandidateName

	return newRequestVoteResponse(s.currentTerm, true), true
}

//--------------------------------------
// Membership
//--------------------------------------

// Adds a peer to the server.
func (s *server) AddPeer(name string, connectiongString string) error {
	s.debugln("server.peer.add: ", name, len(s.peers))

	// Do not allow peers to be added twice.
	if s.peers[name] != nil {
		return nil
	}

	// Skip the Peer if it has the same name as the Server
	if s.name != name {
		peer := newPeer(s, name, connectiongString, s.heartbeatInterval)

		if s.State() == Leader {
			peer.startHeartbeat()
		}

		s.peers[peer.Name] = peer

		s.DispatchEvent(newEvent(AddPeerEventType, name, nil))
	}

	// Write the configuration to file.
	s.writeConf()

	return nil
}

// Removes a peer from the server.
func (s *server) RemovePeer(name string) error {
	s.debugln("server.peer.remove: ", name, len(s.peers))

	// Skip the Peer if it has the same name as the Server
	if name != s.Name() {
		// Return error if peer doesn't exist.
		peer := s.peers[name]
		if peer == nil {
			return fmt.Errorf("raft: Peer not found: %s", name)
		}

		// Stop peer and remove it.
		if s.State() == Leader {
			// We create a go routine here to avoid potential deadlock.
			// We are holding log write lock when reach this line of code.
			// Peer.stopHeartbeat can be blocked without go routine, if the
			// target go routine (which we want to stop) is calling
			// log.getEntriesAfter and waiting for log read lock.
			// So we might be holding log lock and waiting for log lock,
			// which lead to a deadlock.
			// TODO(xiangli) refactor log lock
			s.routineGroup.Add(1)
			go func() {
				defer s.routineGroup.Done()
				peer.stopHeartbeat(true)
			}()
		}

		delete(s.peers, name)

		s.DispatchEvent(newEvent(RemovePeerEventType, name, nil))
	}

	// Write the configuration to file.
	s.writeConf()

	return nil
}

//--------------------------------------
// Log compaction
//--------------------------------------

func (s *server) TakeSnapshot() error {
	if s.stateMachine == nil {
		return errors.New("Snapshot: Cannot create snapshot. Missing state machine.")
	}

	// Shortcut without lock
	// Exit if the server is currently creating a snapshot.
	if s.pendingSnapshot != nil {
		return errors.New("Snapshot: Last snapshot is not finished.")
	}

	// TODO: acquire the lock and no more committed is allowed
	// This will be done after finishing refactoring heartbeat
	s.debugln("take.snapshot")

	lastIndex, lastTerm := s.log.commitInfo()

	// check if there is log has been committed since the
	// last snapshot.
	if lastIndex == s.log.startIndex {
		return nil
	}

	path := s.SnapshotPath(lastIndex, lastTerm)
	// Attach snapshot to pending snapshot and save it to disk.
	s.pendingSnapshot = &Snapshot{lastIndex, lastTerm, nil, nil, path}

	state, err := s.stateMachine.Save()
	if err != nil {
		s.pendingSnapshot = nil
		return err
	}

	// Clone the list of peers.
	peers := make([]*Peer, 0, len(s.peers)+1)
	for _, peer := range s.peers {
		peers = append(peers, peer.clone())
	}
	peers = append(peers, &Peer{Name: s.Name(), ConnectionString: s.connectionString})

	// Attach snapshot to pending snapshot and save it to disk.
	s.pendingSnapshot.Peers = peers
	s.pendingSnapshot.State = state
	s.saveSnapshot()

	// We keep some log entries after the snapshot.
	// We do not want to send the whole snapshot to the slightly slow machines
	if lastIndex-s.log.startIndex > NumberOfLogEntriesAfterSnapshot {
		compactIndex := lastIndex - NumberOfLogEntriesAfterSnapshot
		compactTerm := s.log.getEntry(compactIndex).Term()
		s.log.compact(compactIndex, compactTerm)
	}

	return nil
}

// Retrieves the log path for the server.
func (s *server) saveSnapshot() error {
	if s.pendingSnapshot == nil {
		return errors.New("pendingSnapshot.is.nil")
	}

	// Write snapshot to disk.
	if err := s.pendingSnapshot.save(); err != nil {
		s.pendingSnapshot = nil
		return err
	}

	// Swap the current and last snapshots.
	tmp := s.snapshot
	s.snapshot = s.pendingSnapshot

	// Delete the previous snapshot if there is any change
	if tmp != nil && !(tmp.LastIndex == s.snapshot.LastIndex && tmp.LastTerm == s.snapshot.LastTerm) {
		tmp.remove()
	}
	s.pendingSnapshot = nil

	return nil
}

// Returns a list of available snapshot names sorted newest to oldest
func (s *server) SnapshotList() ([]string, error) {
	// Get FileInfo for everything in the snapshot dir
	ssdir := path.Join(s.path, "snapshot")
	finfos, err := ioutil.ReadDir(ssdir)

	if err != nil {
		return nil, err
	}

	// Build a list of snapshot file names
	var ssnames []string
	for _, finfo := range finfos {
		fname := finfo.Name()
		if finfo.Mode().IsRegular() && filepath.Ext(fname) == ".ss" {
			ssnames = append(ssnames, fname)
		}
	}

	// Sort snapshot names from newest to oldest
	sort.Sort(sort.Reverse(sort.StringSlice(ssnames)))

	return ssnames, nil
}

// Retrieves the log path for the server.
func (s *server) SnapshotPath(lastIndex uint64, lastTerm uint64) string {
	return path.Join(s.path, "snapshot", fmt.Sprintf("%v_%v.ss", lastTerm, lastIndex))
}

func (s *server) RequestSnapshot(req *SnapshotRequest) *SnapshotResponse {
	ret, _ := s.send(req)
	resp, _ := ret.(*SnapshotResponse)
	return resp
}

func (s *server) processSnapshotRequest(req *SnapshotRequest) *SnapshotResponse {
	// If the follower’s log contains an entry at the snapshot’s last index with a term
	// that matches the snapshot’s last term, then the follower already has all the
	// information found in the snapshot and can reply false.
	entry := s.log.getEntry(req.LastIndex)

	if entry != nil && entry.Term() == req.LastTerm {
		return newSnapshotResponse(false)
	}

	// Update state.
	s.setState(Snapshotting)

	return newSnapshotResponse(true)
}

func (s *server) SnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse {
	ret, _ := s.send(req)
	resp, _ := ret.(*SnapshotRecoveryResponse)
	return resp
}

func (s *server) processSnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse {
	// Recover state sent from request.
	if err := s.stateMachine.Recovery(req.State); err != nil {
		panic("cannot recover from previous state")
	}

	// Recover the cluster configuration.
	s.peers = make(map[string]*Peer)
	for _, peer := range req.Peers {
		s.AddPeer(peer.Name, peer.ConnectionString)
	}

	// Update log state.
	s.currentTerm = req.LastTerm
	s.log.updateCommitIndex(req.LastIndex)

	// Create local snapshot.
	s.pendingSnapshot = &Snapshot{req.LastIndex, req.LastTerm, req.Peers, req.State, s.SnapshotPath(req.LastIndex, req.LastTerm)}
	s.saveSnapshot()

	// Clear the previous log entries.
	s.log.compact(req.LastIndex, req.LastTerm)

	return newSnapshotRecoveryResponse(req.LastTerm, true, req.LastIndex)
}

// Load a snapshot at restart
func (s *server) LoadSnapshot() error {
	sslist, err := s.SnapshotList()

	// if the snapshot directory doesn't exist or no snapshots exist in
	// the directory, return immediately
	if err != nil && os.IsNotExist(err) || err == nil && len(sslist) == 0 {
		return nil
	}

	if err != nil {
		return err
	}

	// Load most recent snapshot (falling back to older snapshots if needed)
	var ss *Snapshot
	for _, ssname := range sslist {
		ssFullPath := path.Join(s.path, "snapshot", ssname)
		ss, err = loadSnapshot(ssFullPath)
		if err == nil {
			break
		}
		s.debugln(err)
	}

	if err != nil {
		return err // couldn't load any of the snapshots
	}

	s.snapshot = ss

	// Recover snapshot into state machine.
	if err = s.stateMachine.Recovery(ss.State); err != nil {
		s.debugln("recovery.snapshot.error: ", err)
		return err
	}

	// Recover cluster configuration.
	for _, peer := range ss.Peers {
		s.AddPeer(peer.Name, peer.ConnectionString)
	}

	// Update log state.
	s.log.startTerm = ss.LastTerm
	s.log.startIndex = ss.LastIndex
	s.log.updateCommitIndex(ss.LastIndex)

	return err
}

//--------------------------------------
// Config File
//--------------------------------------

// Flushes commit index to the disk.
// So when the raft server restarts, it will commit upto the flushed commitIndex.
func (s *server) FlushCommitIndex() {
	s.debugln("server.conf.update")
	// Write the configuration to file.
	s.writeConf()
}

func (s *server) writeConf() {

	peers := make([]*Peer, len(s.peers))

	i := 0
	for _, peer := range s.peers {
		peers[i] = peer.clone()
		i++
	}

	r := &Config{
		CommitIndex: s.log.commitIndex,
		Peers:       peers,
	}

	b, _ := json.Marshal(r)

	confPath := path.Join(s.path, "conf")
	tmpConfPath := path.Join(s.path, "conf.tmp")

	err := writeFileSynced(tmpConfPath, b, 0600)

	if err != nil {
		panic(err)
	}

	os.Rename(tmpConfPath, confPath)
}

// Read the configuration for the server.
func (s *server) readConf() error {
	confPath := path.Join(s.path, "conf")
	s.debugln("readConf.open ", confPath)

	// open conf file
	b, err := ioutil.ReadFile(confPath)

	if err != nil {
		return nil
	}

	conf := &Config{}

	if err = json.Unmarshal(b, conf); err != nil {
		return err
	}

	s.log.updateCommitIndex(conf.CommitIndex)

	return nil
}

//--------------------------------------
// Debugging
//--------------------------------------

func (s *server) debugln(v ...interface{}) {
	if logLevel > Debug {
		debugf("[%s Term:%d] %s", s.name, s.Term(), fmt.Sprintln(v...))
	}
}

func (s *server) traceln(v ...interface{}) {
	if logLevel > Trace {
		tracef("[%s] %s", s.name, fmt.Sprintln(v...))
	}
}
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								package raft
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+								import (
-												add comments and gofmt

											
										
										
											2013-06-24 16:52:51 +00:00
+									"encoding/json"
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+									"errors"
 									"fmt"
-												go fmt

											
										
										
											2013-06-08 02:19:18 +00:00
+									"io/ioutil"
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									"os"
-												fix issues (https://github.com/benbjohnson/go-raft/pull/19)

											
										
										
											2013-06-05 17:38:49 +00:00
+									"path"
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+									"path/filepath"
-												go fmt

											
										
										
											2013-06-08 02:19:18 +00:00
+									"sort"
 									"sync"
 									"time"
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+								)
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								//------------------------------------------------------------------------------
 								//
 								// Constants
 								//
 								//------------------------------------------------------------------------------
 								const (
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									Stopped      = "stopped"
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+									Initialized  = "initialized"
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									Follower     = "follower"
 									Candidate    = "candidate"
 									Leader       = "leader"
 									Snapshotting = "snapshotting"
-												Add single node configuration.

											
										
										
											2013-04-17 02:32:49 +00:00
+								)
-												move MLPR frmo log to server

											
										
										
											2013-07-15 05:48:41 +00:00
+								const (
-												move buf to log struct

											
										
										
											2013-08-02 00:58:03 +00:00
+									MaxLogEntriesPerRequest         = 2000
-												change func names and fix typo

											
										
										
											2013-07-18 23:44:01 +00:00
+									NumberOfLogEntriesAfterSnapshot = 200
-												move MLPR frmo log to server

											
										
										
											2013-07-15 05:48:41 +00:00
+								)
-												Add single node configuration.

											
										
										
											2013-04-17 02:32:49 +00:00
+								const (
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+									// DefaultHeartbeatInterval is the interval that the leader will send
 									// AppendEntriesRequests to followers to maintain leadership.
 									DefaultHeartbeatInterval = 50 * time.Millisecond
 									DefaultElectionTimeout = 150 * time.Millisecond
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								)
-												Add election threshold and heartbeat timeout events.

											
										
										
											2013-12-29 19:54:00 +00:00
+								// ElectionTimeoutThresholdPercent specifies the threshold at which the server
 								// will dispatch warning events that the heartbeat RTT is too close to the
 								// election timeout.
-												Merge branch 'master' of https://github.com/goraft/raft

											
										
										
											2013-12-29 19:54:15 +00:00
+								const ElectionTimeoutThresholdPercent = 0.8
-												Add election threshold and heartbeat timeout events.

											
										
										
											2013-12-29 19:54:00 +00:00
-												Remove join command. Fix race condition.

											
										
										
											2013-05-28 18:46:27 +00:00
+								//------------------------------------------------------------------------------
 								//
 								// Errors
 								//
 								//------------------------------------------------------------------------------
 								var NotLeaderError = errors.New("raft.Server: Not current leader")
 								var DuplicatePeerError = errors.New("raft.Server: Duplicate peer")
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								var CommandTimeoutError = errors.New("raft: Command timeout")
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+								var StopError = errors.New("raft: Has been stopped")
-												Deny command execution for non-leaders.

											
										
										
											2013-05-27 02:04:41 +00:00
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								//------------------------------------------------------------------------------
 								//
 								// Typedefs
 								//
 								//------------------------------------------------------------------------------
 								// A server is involved in the consensus protocol and can act as a follower,
 								// candidate or a leader.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								type Server interface {
 									Name() string
 									Context() interface{}
 									StateMachine() StateMachine
 									Leader() string
 									State() string
 									Path() string
 									LogPath() string
-												Server interface fix.

											
										
										
											2013-10-14 19:33:01 +00:00
+									SnapshotPath(lastIndex uint64, lastTerm uint64) string
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+									Term() uint64
 									CommitIndex() uint64
 									VotedFor() string
 									MemberCount() int
 									QuorumSize() int
 									IsLogEmpty() bool
-												Server interface fix.

											
										
										
											2013-10-14 19:40:20 +00:00
+									LogEntries() []*LogEntry
 									LastCommandName() string
 									GetState() string
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+									ElectionTimeout() time.Duration
 									SetElectionTimeout(duration time.Duration)
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+									HeartbeatInterval() time.Duration
 									SetHeartbeatInterval(duration time.Duration)
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+									Transporter() Transporter
 									SetTransporter(t Transporter)
 									AppendEntries(req *AppendEntriesRequest) *AppendEntriesResponse
 									RequestVote(req *RequestVoteRequest) *RequestVoteResponse
-												Server interface fix.

											
										
										
											2013-10-14 19:33:01 +00:00
+									RequestSnapshot(req *SnapshotRequest) *SnapshotResponse
 									SnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+									AddPeer(name string, connectiongString string) error
 									RemovePeer(name string) error
 									Peers() map[string]*Peer
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+									Init() error
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+									Start() error
 									Stop()
 									Running() bool
 									Do(command Command) (interface{}, error)
-												Server interface fix.

											
										
										
											2013-10-14 19:33:01 +00:00
+									TakeSnapshot() error
 									LoadSnapshot() error
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									AddEventListener(string, EventListener)
-												chore: add interface for FlushCommitIndex

											
										
										
											2014-03-21 01:13:38 +00:00
+									FlushCommitIndex()
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								}
 								type server struct {
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									*eventDispatcher
-												add comments and gofmt

											
										
										
											2013-06-24 16:52:51 +00:00
+									name        string
 									path        string
 									state       string
 									transporter Transporter
 									context     interface{}
 									currentTerm uint64
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+									votedFor   string
 									log        *Log
 									leader     string
 									peers      map[string]*Peer
 									mutex      sync.RWMutex
 									syncedPeer map[string]bool
-												when the leader fails in during the collecting response phase, let it step down

											
										
										
											2013-06-13 18:03:32 +00:00
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									stopped           chan bool
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+									c                 chan *ev
 									electionTimeout   time.Duration
 									heartbeatInterval time.Duration
-												when the leader fails in during the collecting response phase, let it step down

											
										
										
											2013-06-13 18:03:32 +00:00
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+									snapshot *Snapshot
 									// PendingSnapshot is an unfinished snapshot.
 									// After the pendingSnapshot is saved to disk,
 									// it will be set to snapshot and also will be
 									// set to nil.
 									pendingSnapshot *Snapshot
-												move MLPR frmo log to server

											
										
										
											2013-07-15 05:48:41 +00:00
+									stateMachine            StateMachine
 									maxLogEntriesPerRequest uint64
-												fix snapshot related issue

											
										
										
											2013-09-18 04:19:46 +00:00
 									connectionString string
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
 									routineGroup sync.WaitGroup
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								}
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+								// An internal event to be processed by the server's event loop.
 								type ev struct {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									target      interface{}
 									returnValue interface{}
 									c           chan error
 								}
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								//------------------------------------------------------------------------------
 								//
 								// Constructor
 								//
 								//------------------------------------------------------------------------------
-												Additional documentation

											
										
										
											2013-11-24 23:15:02 +00:00
+								// Creates a new server with a log at the given path. transporter must
 								// not be nil. stateMachine can be nil if snapshotting and log
 								// compaction is to be disabled. context can be anything (including nil)
 								// and is not used by the raft package except returned by
 								// Server.Context(). connectionString can be anything.
-												Command.Apply(Context)

											
										
										
											2013-12-23 14:16:37 +00:00
+								func NewServer(name string, path string, transporter Transporter, stateMachine StateMachine, ctx interface{}, connectionString string) (Server, error) {
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+									if name == "" {
 										return nil, errors.New("raft.Server: Name cannot be blank")
 									}
-												Switch to use Transporter interface.

											
										
										
											2013-05-28 19:57:38 +00:00
+									if transporter == nil {
-												Minor cleanup.

											
										
										
											2013-06-08 02:41:36 +00:00
+										panic("raft: Transporter required")
-												Switch to use Transporter interface.

											
										
										
											2013-05-28 19:57:38 +00:00
+									}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+									s := &server{
-												move MLPR frmo log to server

											
										
										
											2013-07-15 05:48:41 +00:00
+										name:                    name,
 										path:                    path,
 										transporter:             transporter,
 										stateMachine:            stateMachine,
-												Command.Apply(Context)

											
										
										
											2013-12-23 14:16:37 +00:00
+										context:                 ctx,
-												move MLPR frmo log to server

											
										
										
											2013-07-15 05:48:41 +00:00
+										state:                   Stopped,
 										peers:                   make(map[string]*Peer),
 										log:                     newLog(),
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+										c:                       make(chan *ev, 256),
-												move MLPR frmo log to server

											
										
										
											2013-07-15 05:48:41 +00:00
+										electionTimeout:         DefaultElectionTimeout,
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+										heartbeatInterval:       DefaultHeartbeatInterval,
-												move MLPR frmo log to server

											
										
										
											2013-07-15 05:48:41 +00:00
+										maxLogEntriesPerRequest: MaxLogEntriesPerRequest,
-												chore(server): spelling error

s/connectiongString/connectionString/
											
										
										
											2013-09-22 04:08:18 +00:00
+										connectionString:        connectionString,
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+									}
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									s.eventDispatcher = newEventDispatcher(s)
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
 									// Setup apply function.
-												Dispatch commit event.

											
										
										
											2014-02-10 16:36:17 +00:00
+									s.log.ApplyFunc = func(e *LogEntry, c Command) (interface{}, error) {
 										// Dispatch commit event.
 										s.DispatchEvent(newEvent(CommitEventType, e, nil))
 										// Apply command to the state machine.
-												Command.Apply(Context)

											
										
										
											2013-12-23 14:16:37 +00:00
+										switch c := c.(type) {
 										case CommandApply:
-												Add Context.CommitIndex().

											
										
										
											2013-12-23 23:07:40 +00:00
+											return c.Apply(&context{
 												server:       s,
 												currentTerm:  s.currentTerm,
-												Fix internal log index deadlock.

											
										
										
											2013-12-27 22:23:59 +00:00
+												currentIndex: s.log.internalCurrentIndex(),
-												Add Context.CommitIndex().

											
										
										
											2013-12-23 23:07:40 +00:00
+												commitIndex:  s.log.commitIndex,
 											})
-												Command.Apply(Context)

											
										
										
											2013-12-23 14:16:37 +00:00
+										case deprecatedCommandApply:
 											return c.Apply(s)
 										default:
 											return nil, fmt.Errorf("Command does not implement Apply()")
 										}
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
+									}
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+									return s, nil
 								}
 								//------------------------------------------------------------------------------
 								//
 								// Accessors
 								//
 								//------------------------------------------------------------------------------
-												Add repeated election test.

											
										
										
											2013-05-01 05:21:56 +00:00
+								//--------------------------------------
 								// General
 								//--------------------------------------
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+								// Retrieves the name of the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Name() string {
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+									return s.name
 								}
 								// Retrieves the storage path for the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Path() string {
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+									return s.path
 								}
-												Add multi-node join.

											
										
										
											2013-05-05 19:36:23 +00:00
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+								// The name of the current leader.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Leader() string {
-												fix leader field of Server

											
										
										
											2013-06-03 01:18:25 +00:00
+									return s.leader
-												go fmt

											
										
										
											2013-06-08 02:19:18 +00:00
+								}
-												Add s.Peers() and peer cloning.

											
										
										
											2013-06-26 18:25:22 +00:00
+								// Retrieves a copy of the peer data.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Peers() map[string]*Peer {
-												Add s.Peers() and peer cloning.

											
										
										
											2013-06-26 18:25:22 +00:00
+									s.mutex.Lock()
 									defer s.mutex.Unlock()
 									peers := make(map[string]*Peer)
 									for name, peer := range s.peers {
 										peers[name] = peer.clone()
 									}
 									return peers
 								}
-												Switch to use Transporter interface.

											
										
										
											2013-05-28 19:57:38 +00:00
+								// Retrieves the object that transports requests.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Transporter() Transporter {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												Switch to use Transporter interface.

											
										
										
											2013-05-28 19:57:38 +00:00
+									return s.transporter
 								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) SetTransporter(t Transporter) {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.mutex.Lock()
 									defer s.mutex.Unlock()
-												election new leader and rejoin works

											
										
										
											2013-06-11 22:30:13 +00:00
+									s.transporter = t
 								}
-												Clean up API.

											
										
										
											2013-06-03 02:43:40 +00:00
+								// Retrieves the context passed into the constructor.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Context() interface{} {
-												Clean up API.

											
										
										
											2013-06-03 02:43:40 +00:00
+									return s.context
 								}
-												feat return the stateMachine

											
										
										
											2013-10-10 03:15:57 +00:00
+								// Retrieves the state machine passed into the constructor.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) StateMachine() StateMachine {
-												feat return the stateMachine

											
										
										
											2013-10-10 03:15:57 +00:00
+									return s.stateMachine
 								}
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+								// Retrieves the log path for the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) LogPath() string {
-												refactor fixlog

											
										
										
											2013-07-23 22:30:14 +00:00
+									return path.Join(s.path, "log")
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+								}
 								// Retrieves the current state of the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) State() string {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+									return s.state
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								}
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+								// Sets the state of the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) setState(state string) {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.mutex.Lock()
 									defer s.mutex.Unlock()
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
 									// Temporarily store previous values.
 									prevState := s.state
 									prevLeader := s.leader
 									// Update state and leader.
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.state = state
-												when change the state to leader, we should also update the s.leader to its own name

											
										
										
											2013-07-08 00:01:55 +00:00
+									if state == Leader {
 										s.leader = s.Name()
-												refactor(server.go) Leader do not need to send its own response of receiving a command to itself.
We send response to itself before, since it is a simple way to deal one machine cluster. After a little modification, we do not need to do this anymore.

											
										
										
											2014-01-12 08:01:38 +00:00
+										s.syncedPeer = make(map[string]bool)
-												when change the state to leader, we should also update the s.leader to its own name

											
										
										
											2013-07-08 00:01:55 +00:00
+									}
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
 									// Dispatch state and leader change events.
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+									s.DispatchEvent(newEvent(StateChangeEventType, s.state, prevState))
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									if prevLeader != s.leader {
 										s.DispatchEvent(newEvent(LeaderChangeEventType, s.leader, prevLeader))
 									}
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+								}
-												add timer.fire function, which can fire at the timer channel

											
										
										
											2013-06-10 04:47:59 +00:00
+								// Retrieves the current term of the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Term() uint64 {
-												fix test for data race

											
										
										
											2013-10-22 22:39:32 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												add timer.fire function, which can fire at the timer channel

											
										
										
											2013-06-10 04:47:59 +00:00
+									return s.currentTerm
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								}
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+								// Retrieves the current commit index of the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) CommitIndex() uint64 {
-												fix test for data race

											
										
										
											2013-10-22 22:39:32 +00:00
+									s.log.mutex.RLock()
 									defer s.log.mutex.RUnlock()
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									return s.log.commitIndex
-												return the index of the commited command to the application via do() and add Index func to get the current committed index of the server

											
										
										
											2013-06-28 23:14:41 +00:00
+								}
-												Fix race conditions.

											
										
										
											2013-05-03 04:16:39 +00:00
+								// Retrieves the name of the candidate this server voted for in this term.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) VotedFor() string {
-												Fix race conditions.

											
										
										
											2013-05-03 04:16:39 +00:00
+									return s.votedFor
 								}
-												Fixed minor bugs.

											
										
										
											2013-05-08 03:56:32 +00:00
+								// Retrieves whether the server's log has no entries.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) IsLogEmpty() bool {
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									return s.log.isEmpty()
-												Fixed minor bugs.

											
										
										
											2013-05-08 03:56:32 +00:00
+								}
-												Fix log entry serialization.

											
										
										
											2013-05-08 20:22:08 +00:00
+								// A list of all the log entries. This should only be used for debugging purposes.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) LogEntries() []*LogEntry {
-												fix(server.go) fix data race between func LogEntries and func appendEntry
The append function in appendEntry might change the value of the slice pointer when it wants to grow the capacity of the slice. We are not protecting the content of the slice, since the LogEntries func is just used for internal testing. The main reason of this pull request is to make the race detector happy during the testing.

											
										
										
											2014-04-10 17:21:34 +00:00
+									s.log.mutex.RLock()
 									defer s.log.mutex.RUnlock()
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+									return s.log.entries
-												Fix log entry serialization.

											
										
										
											2013-05-08 20:22:08 +00:00
+								}
-												Add Server.LastCommandName().

											
										
										
											2013-06-03 19:13:38 +00:00
+								// A reference to the command name of the last entry.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) LastCommandName() string {
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+									return s.log.lastCommandName()
-												Add Server.LastCommandName().

											
										
										
											2013-06-03 19:13:38 +00:00
+								}
-												change log ouput format to Lmicroseconds

											
										
										
											2013-07-02 18:42:14 +00:00
+								// Get the state of the server for debugging
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) GetState() string {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												init fix

											
										
										
											2013-08-03 02:00:11 +00:00
+									return fmt.Sprintf("Name: %s, State: %s, Term: %v, CommitedIndex: %v ", s.name, s.state, s.currentTerm, s.log.commitIndex)
-												change log ouput format to Lmicroseconds

											
										
										
											2013-07-02 18:42:14 +00:00
+								}
-												change promotable to a func

											
										
										
											2013-07-25 23:16:06 +00:00
+								// Check if the server is promotable
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) promotable() bool {
-												change promotable to a func

											
										
										
											2013-07-25 23:16:06 +00:00
+									return s.log.currentIndex() > 0
 								}
-												Add repeated election test.

											
										
										
											2013-05-01 05:21:56 +00:00
+								//--------------------------------------
 								// Membership
 								//--------------------------------------
-												Intermediate.

											
										
										
											2013-04-28 04:51:17 +00:00
+								// Retrieves the number of member servers in the consensus.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) MemberCount() int {
-												refactor(server.go) No lock is needed when reading server variables in a internal function
Our server is a single routine for loop. We have a lock to avoid "data-race" between the external function calls and internal operations(writes). So we can drop the extra lock when reading variables internally.

											
										
										
											2014-03-10 20:09:51 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												server.MemberCount: count peers with len() is cheaper than loop
											
										
										
											2013-06-22 15:35:30 +00:00
+									return len(s.peers) + 1
-												Intermediate.

											
										
										
											2013-04-28 04:51:17 +00:00
+								}
 								// Retrieves the number of servers required to make a quorum.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) QuorumSize() int {
-												Intermediate.

											
										
										
											2013-04-28 04:51:17 +00:00
+									return (s.MemberCount() / 2) + 1
 								}
-												Add repeated election test.

											
										
										
											2013-05-01 05:21:56 +00:00
+								//--------------------------------------
 								// Election timeout
 								//--------------------------------------
 								// Retrieves the election timeout.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) ElectionTimeout() time.Duration {
-												add mutex

											
										
										
											2013-08-07 04:02:30 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									return s.electionTimeout
-												Add repeated election test.

											
										
										
											2013-05-01 05:21:56 +00:00
+								}
 								// Sets the election timeout.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) SetElectionTimeout(duration time.Duration) {
-												add mutex

											
										
										
											2013-08-07 04:02:30 +00:00
+									s.mutex.Lock()
 									defer s.mutex.Unlock()
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.electionTimeout = duration
-												Add repeated election test.

											
										
										
											2013-05-01 05:21:56 +00:00
+								}
-												Add multi-node failure with re-election test.

											
										
										
											2013-05-05 21:41:55 +00:00
+								//--------------------------------------
 								// Heartbeat timeout
 								//--------------------------------------
 								// Retrieves the heartbeat timeout.
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+								func (s *server) HeartbeatInterval() time.Duration {
-												add mutex

											
										
										
											2013-08-07 04:02:30 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+									return s.heartbeatInterval
-												Add multi-node failure with re-election test.

											
										
										
											2013-05-05 21:41:55 +00:00
+								}
 								// Sets the heartbeat timeout.
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+								func (s *server) SetHeartbeatInterval(duration time.Duration) {
-												Add multi-node failure with re-election test.

											
										
										
											2013-05-05 21:41:55 +00:00
+									s.mutex.Lock()
 									defer s.mutex.Unlock()
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+									s.heartbeatInterval = duration
-												Add multi-node failure with re-election test.

											
										
										
											2013-05-05 21:41:55 +00:00
+									for _, peer := range s.peers {
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+										peer.setHeartbeatInterval(duration)
-												Add multi-node failure with re-election test.

											
										
										
											2013-05-05 21:41:55 +00:00
+									}
 								}
-												Basic log append.

											
										
										
											2013-04-14 21:37:33 +00:00
+								//------------------------------------------------------------------------------
 								//
 								// Methods
 								//
 								//------------------------------------------------------------------------------
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
 								//--------------------------------------
-												add comments and gofmt

											
										
										
											2013-06-24 16:52:51 +00:00
+								// Initialization
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+								//--------------------------------------
-												refactor

											
										
										
											2013-07-11 05:19:57 +00:00
+								// Reg the NOPCommand
 								func init() {
 									RegisterCommand(&NOPCommand{})
-												change join command to interface, so application can overwrite it

											
										
										
											2013-07-26 19:13:52 +00:00
+									RegisterCommand(&DefaultJoinCommand{})
-												add leaveCommand interface

											
										
										
											2013-07-26 20:33:58 +00:00
+									RegisterCommand(&DefaultLeaveCommand{})
-												refactor

											
										
										
											2013-07-11 05:19:57 +00:00
+								}
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+								// Start the raft server
-												refactor

											
										
										
											2013-07-25 22:47:35 +00:00
+								// If log entries exist then allow promotion to candidate if no AEs received.
 								// If no log entries exist then wait for AEs from another node.
 								// If no log entries exist and a self-join command is issued then
 								// immediately become leader and commit entry.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Start() error {
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
+									// Exit if the server is already running.
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+									if s.Running() {
 										return fmt.Errorf("raft.Server: Server already running[%v]", s.state)
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
+									}
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+									if err := s.Init(); err != nil {
 										return err
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
+									}
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									// stopped needs to be allocated each time server starts
 									// because it is closed at `Stop`.
 									s.stopped = make(chan bool)
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
+									s.setState(Follower)
-												improve init process

											
										
										
											2013-07-25 22:40:20 +00:00
+									// If no log entries exist then
 									// 1. wait for AEs from another node
 									// 2. wait for self-join command
 									// to set itself promotable
-												change promotable to a func

											
										
										
											2013-07-25 23:16:06 +00:00
+									if !s.promotable() {
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
+										s.debugln("start as a new raft server")
-												improve init process

											
										
										
											2013-07-25 22:40:20 +00:00
+										// If log entries exist then allow promotion to candidate
 										// if no AEs received.
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
+									} else {
 										s.debugln("start from previous saved state")
 									}
-												init fix

											
										
										
											2013-08-03 02:00:11 +00:00
+									debugln(s.GetState())
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									s.routineGroup.Add(1)
 									go func() {
 										defer s.routineGroup.Done()
 										s.loop()
 									}()
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
 									return nil
 								}
-												chore(server.go) add comment for init

											
										
										
											2014-04-10 03:21:31 +00:00
+								// Init initializes the raft server.
 								// If there is no previous log file under the given path, Init() will create an empty log file.
 								// Otherwise, Init() will load in the log entries from the log file.
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+								func (s *server) Init() error {
 									if s.Running() {
 										return fmt.Errorf("raft.Server: Server already running[%v]", s.state)
 									}
-												fix(server.go) fix that the server cannot be inited after loading snapshot.
We cannot use log.empty to check if the log has been loaded or not. After we load the snapshot, log.empty will be false but we has not loaded the previous log. We introduce log.initialized to indicate that we have loaded the previous log file.

											
										
										
											2014-04-04 01:08:36 +00:00
+									// Server has been initialized or server was stopped after initialized
 									// If log has been initialized, we know that the server was stopped after
 									// running.
 									if s.state == Initialized || s.log.initialized {
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+										s.state = Initialized
 										return nil
 									}
 									// Create snapshot directory if it does not exist
-												fix(server.Init) Fix wrong error reporting
We should only report error if the snapshot path is not exist and we cannot create a new one.

											
										
										
											2014-03-22 17:30:14 +00:00
+									err := os.Mkdir(path.Join(s.path, "snapshot"), 0700)
-												fix(server.Init) fix snapshot mkdir error handling
We should report all errors other than existErr.

											
										
										
											2014-03-22 23:19:00 +00:00
+									if err != nil && !os.IsExist(err) {
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+										s.debugln("raft: Snapshot dir error: ", err)
 										return fmt.Errorf("raft: Initialization error: %s", err)
 									}
 									if err := s.readConf(); err != nil {
 										s.debugln("raft: Conf file error: ", err)
 										return fmt.Errorf("raft: Initialization error: %s", err)
 									}
 									// Initialize the log and load it up.
 									if err := s.log.open(s.LogPath()); err != nil {
 										s.debugln("raft: Log error: ", err)
 										return fmt.Errorf("raft: Initialization error: %s", err)
 									}
 									// Update the term to the last term in the log.
 									_, s.currentTerm = s.log.lastInfo()
 									s.state = Initialized
 									return nil
 								}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+								// Shuts down the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Stop() {
-												test(snapshot) add tests for recovery from previous snapshot and log

											
										
										
											2014-03-25 21:58:38 +00:00
+									if s.State() == Stopped {
 										return
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									}
-												test(snapshot) add tests for recovery from previous snapshot and log

											
										
										
											2014-03-25 21:58:38 +00:00
-												Merge branch 'master' of github.com:goraft/raft

Conflicts:
	server.go

											
										
										
											2014-04-04 21:43:02 +00:00
+									close(s.stopped)
-												fix(server.go/peer.go) server.stop should stop heartbeat before close the log

											
										
										
											2013-11-27 05:30:03 +00:00
-												chore(server): improve comment

											
										
										
											2014-04-04 23:36:55 +00:00
+									// make sure all goroutines have stopped before we close the log
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									s.routineGroup.Wait()
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									s.log.close()
-												fix(server.go) fix data race in server.stop
Use setState() function to set state; avoid data race.

											
										
										
											2014-03-24 19:02:15 +00:00
+									s.setState(Stopped)
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+								}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								// Checks if the server is currently running.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Running() bool {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+									s.mutex.RLock()
 									defer s.mutex.RUnlock()
-												feat(server.go) introduce server.Init()
This commit enable dividing raft server starting process to two phases. So application can do extra work based on the status of the raft server before starting it. To implement this, we add a initialization phase, in which raft server reads in the log entires from data-dir and commits to the last recorded index.

											
										
										
											2014-03-21 02:10:48 +00:00
+									return (s.state != Stopped && s.state != Initialized)
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								//--------------------------------------
 								// Term
 								//--------------------------------------
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+								// updates the current term for the server. This is only used when a larger
 								// external term is found.
 								func (s *server) updateCurrentTerm(term uint64, leaderName string) {
-												introduce _assert

											
										
										
											2014-02-18 00:51:01 +00:00
+									_assert(term > s.currentTerm,
 										"upadteCurrentTerm: update is called when term is not larger than currentTerm")
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									// Store previous values temporarily.
 									prevTerm := s.currentTerm
 									prevLeader := s.leader
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+									// set currentTerm = T, convert to follower (§5.1)
 									// stop heartbeats before step-down
 									if s.state == Leader {
 										for _, peer := range s.peers {
 											peer.stopHeartbeat(false)
-												leader must stop all heartbeat routines before stepdown and truncate its log

											
										
										
											2014-01-27 14:05:54 +00:00
+										}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									}
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+									// update the term and clear vote for
 									if s.state != Follower {
 										s.setState(Follower)
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									}
-												refactor(server.go) No lock is needed when reading server variables in a internal function
Our server is a single routine for loop. We have a lock to avoid "data-race" between the external function calls and internal operations(writes). So we can drop the extra lock when reading variables internally.

											
										
										
											2014-03-10 20:09:51 +00:00
 									s.mutex.Lock()
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+									s.currentTerm = term
 									s.leader = leaderName
 									s.votedFor = ""
-												refactor(server.go) No lock is needed when reading server variables in a internal function
Our server is a single routine for loop. We have a lock to avoid "data-race" between the external function calls and internal operations(writes). So we can drop the extra lock when reading variables internally.

											
										
										
											2014-03-10 20:09:51 +00:00
+									s.mutex.Unlock()
-												update leadername

											
										
										
											2013-07-26 00:49:01 +00:00
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									// Dispatch change events.
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+									s.DispatchEvent(newEvent(TermChangeEventType, s.currentTerm, prevTerm))
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									if prevLeader != s.leader {
 										s.DispatchEvent(newEvent(LeaderChangeEventType, s.leader, prevLeader))
 									}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+								}
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+								//--------------------------------------
 								// Event Loop
 								//--------------------------------------
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+								//               ________
 								//            --|Snapshot|                 timeout
 								//            |  --------                  ______
 								// recover    |       ^                   |      |
 								// snapshot / |       |snapshot           |      |
 								// higher     |       |                   v      |     recv majority votes
 								// term       |    --------    timeout    -----------                        -----------
 								//            |-> |Follower| ----------> | Candidate |--------------------> |  Leader   |
 								//                 --------               -----------                        -----------
 								//                    ^          higher term/ |                         higher term |
 								//                    |            new leader |                                     |
 								//                    |_______________________|____________________________________ |
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+								// The main event loop for the server
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) loop() {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									defer s.debugln("server.loop.end")
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
-												fix(server.go) use server.setState everywhere

											
										
										
											2014-03-24 19:32:39 +00:00
+									state := s.State()
-												when candidate stepdown it should break the loop; gofmt

											
										
										
											2013-07-07 23:37:11 +00:00
-												fix(server.go) use server.setState everywhere

											
										
										
											2014-03-24 19:32:39 +00:00
+									for state != Stopped {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+										s.debugln("server.loop.run ", state)
 										switch state {
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+										case Follower:
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+											s.followerLoop()
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+										case Candidate:
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+											s.candidateLoop()
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+										case Leader:
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+											s.leaderLoop()
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+										case Snapshotting:
 											s.snapshotLoop()
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+										}
-												fix(server.go) use server.setState everywhere

											
										
										
											2014-03-24 19:32:39 +00:00
+										state = s.State()
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+									}
 								}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								// Sends an event to the event loop to be processed. The function will wait
 								// until the event is actually processed before returning.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) send(value interface{}) (interface{}, error) {
-												fix(server.go) check server state before send event to channel
This commit fixs a data race between server.Start() and server.send(). Before this commit, server send might receive on a uninitialized stopped channel. To fix this, we initialize the channle before we change the server state to a running state. Then we check the state before receive on the stopped channel.

											
										
										
											2014-04-06 17:19:13 +00:00
+									if !s.Running() {
 										return nil, StopError
 									}
-												merge with master

											
										
										
											2014-01-09 10:18:06 +00:00
+									event := &ev{target: value, c: make(chan error, 1)}
-												chore(server): avoid blocking on event send

											
										
										
											2014-04-04 21:56:33 +00:00
+									select {
 									case s.c <- event:
 									case <-s.stopped:
 										return nil, StopError
 									}
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									select {
 									case <-s.stopped:
 										return nil, StopError
 									case err := <-event.c:
 										return event.returnValue, err
 									}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								}
-												refactor(server.go) make async really async.

											
										
										
											2013-11-27 04:36:42 +00:00
+								func (s *server) sendAsync(value interface{}) {
-												fix(server.go) check server state before send event to channel
This commit fixs a data race between server.Start() and server.send(). Before this commit, server send might receive on a uninitialized stopped channel. To fix this, we initialize the channle before we change the server state to a running state. Then we check the state before receive on the stopped channel.

											
										
										
											2014-04-06 17:19:13 +00:00
+									if !s.Running() {
 										return
 									}
-												refactor(server.go) try non-blocking send first

											
										
										
											2014-01-09 10:54:58 +00:00
+									event := &ev{target: value, c: make(chan error, 1)}
 									// try a non-blocking send first
 									// in most cases, this should not be blocking
 									// avoid create unnecessary go routines
 									select {
 									case s.c <- event:
 										return
 									default:
 									}
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									s.routineGroup.Add(1)
-												refactor(server.go) make async really async.

											
										
										
											2013-11-27 04:36:42 +00:00
+									go func() {
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+										defer s.routineGroup.Done()
-												chore(server): avoid blocking on event send

											
										
										
											2014-04-04 21:56:33 +00:00
+										select {
 										case s.c <- event:
 										case <-s.stopped:
 										}
-												refactor(server.go) make async really async.

											
										
										
											2013-11-27 04:36:42 +00:00
+									}()
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								}
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+								// The event loop that is run when the server is in a Follower state.
 								// Responds to RPCs from candidates and leaders.
 								// Converts to candidate if election timeout elapses without either:
 								//   1.Receiving valid AppendEntries RPC, or
 								//   2.Granting vote to candidate
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) followerLoop() {
-												Add election threshold and heartbeat timeout events.

											
										
										
											2013-12-29 19:54:00 +00:00
+									since := time.Now()
 									electionTimeout := s.ElectionTimeout()
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+									timeoutChan := afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2)
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
-												add conditions for state for loops

											
										
										
											2014-01-28 15:58:23 +00:00
+									for s.State() == Follower {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										var err error
-												follower will not promote until get synced

											
										
										
											2013-07-12 01:03:24 +00:00
+										update := false
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										select {
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+										case <-s.stopped:
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											s.setState(Stopped)
 											return
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										case e := <-s.c:
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											switch req := e.target.(type) {
 											case JoinCommand:
 												//If no log entries exist and a self-join command is issued
 												//then immediately become leader and commit entry.
 												if s.log.currentIndex() == 0 && req.NodeName() == s.Name() {
 													s.debugln("selfjoin and promote to leader")
 													s.setState(Leader)
 													s.processCommand(req, e)
 												} else {
-												with assertion problem at server.go L567

											
										
										
											2013-07-25 21:26:27 +00:00
+													err = NotLeaderError
 												}
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											case *AppendEntriesRequest:
 												// If heartbeats get too close to the election timeout then send an event.
 												elapsedTime := time.Now().Sub(since)
 												if elapsedTime > time.Duration(float64(electionTimeout)*ElectionTimeoutThresholdPercent) {
 													s.DispatchEvent(newEvent(ElectionTimeoutThresholdEventType, elapsedTime, nil))
 												}
 												e.returnValue, update = s.processAppendEntriesRequest(req)
 											case *RequestVoteRequest:
 												e.returnValue, update = s.processRequestVoteRequest(req)
 											case *SnapshotRequest:
 												e.returnValue = s.processSnapshotRequest(req)
 											default:
 												err = NotLeaderError
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+											}
 											// Callback to event.
 											e.c <- err
-												gofmt

											
										
										
											2013-07-07 20:55:55 +00:00
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+										case <-timeoutChan:
-												follower will not promote until get synced

											
										
										
											2013-07-12 01:03:24 +00:00
+											// only allow synced follower to promote to candidate
-												change promotable to a func

											
										
										
											2013-07-25 23:16:06 +00:00
+											if s.promotable() {
-												follower will not promote until get synced

											
										
										
											2013-07-12 01:03:24 +00:00
+												s.setState(Candidate)
 											} else {
 												update = true
 											}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										}
-												gofmt

											
										
										
											2013-07-07 20:55:55 +00:00
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+										// Converts to candidate if election timeout elapses without either:
 										//   1.Receiving valid AppendEntries RPC, or
 										//   2.Granting vote to candidate
 										if update {
-												Add election threshold and heartbeat timeout events.

											
										
										
											2013-12-29 19:54:00 +00:00
+											since = time.Now()
-												HTTP Transporter fixes.

											
										
										
											2013-07-09 03:00:14 +00:00
+											timeoutChan = afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2)
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+										}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									}
 								}
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+								// The event loop that is run when the server is in a Candidate state.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) candidateLoop() {
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									// Clear leader value.
 									prevLeader := s.leader
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									s.leader = ""
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+									if prevLeader != s.leader {
 										s.DispatchEvent(newEvent(LeaderChangeEventType, s.leader, prevLeader))
 									}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+									lastLogIndex, lastLogTerm := s.log.lastInfo()
 									doVote := true
 									votesGranted := 0
 									var timeoutChan <-chan time.Time
 									var respChan chan *RequestVoteResponse
-												add conditions for state for loops

											
										
										
											2014-01-28 15:58:23 +00:00
+									for s.State() == Candidate {
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+										if doVote {
 											// Increment current term, vote for self.
 											s.currentTerm++
 											s.votedFor = s.name
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+											// Send RequestVote RPCs to all other servers.
 											respChan = make(chan *RequestVoteResponse, len(s.peers))
 											for _, peer := range s.peers {
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+												s.routineGroup.Add(1)
 												go func(peer *Peer) {
 													defer s.routineGroup.Done()
 													peer.sendVoteRequest(newRequestVoteRequest(s.currentTerm, s.name, lastLogIndex, lastLogTerm), respChan)
 												}(peer)
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+											}
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+											// Wait for either:
 											//   * Votes received from majority of servers: become leader
 											//   * AppendEntries RPC received from new leader: step down.
 											//   * Election timeout elapses without election resolution: increment term, start new election
 											//   * Discover higher term: step down (§5.1)
 											votesGranted = 1
 											timeoutChan = afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2)
 											doVote = false
 										}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+										// If we received enough votes then stop waiting for more votes.
 										// And return from the candidate loop
 										if votesGranted == s.QuorumSize() {
 											s.debugln("server.candidate.recv.enough.votes")
 											s.setState(Leader)
 											return
 										}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+										// Collect votes from peers.
 										select {
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+										case <-s.stopped:
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											s.setState(Stopped)
 											return
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+										case resp := <-respChan:
 											if success := s.processVoteResponse(resp); success {
 												s.debugln("server.candidate.vote.granted: ", votesGranted)
 												votesGranted++
-												fix break in select

											
										
										
											2013-07-07 23:52:18 +00:00
+											}
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+										case e := <-s.c:
 											var err error
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											switch req := e.target.(type) {
 											case Command:
 												err = NotLeaderError
 											case *AppendEntriesRequest:
 												e.returnValue, _ = s.processAppendEntriesRequest(req)
 											case *RequestVoteRequest:
 												e.returnValue, _ = s.processRequestVoteRequest(req)
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+											}
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+											// Callback to event.
 											e.c <- err
 										case <-timeoutChan:
 											doVote = true
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									}
 								}
-												nip comment

											
										
										
											2013-08-07 04:33:37 +00:00
+								// The event loop that is run when the server is in a Leader state.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) leaderLoop() {
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									logIndex, _ := s.log.lastInfo()
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+									// Update the peers prevLogIndex to leader's lastLogIndex and start heartbeat.
-												write log entry to file on disk when appendEntry and truncate log file on disk when truncate log entries

											
										
										
											2013-07-21 02:07:16 +00:00
+									s.debugln("leaderLoop.set.PrevIndex to ", logIndex)
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									for _, peer := range s.peers {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+										peer.setPrevLogIndex(logIndex)
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+										peer.startHeartbeat()
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									}
-												clean up promote

											
										
										
											2013-07-01 15:46:53 +00:00
-												docs(server): add a comment explaining the NOPCommand

											
										
										
											2014-01-23 20:42:06 +00:00
+									// Commit a NOP after the server becomes leader. From the Raft paper:
 									// "Upon election: send initial empty AppendEntries RPCs (heartbeat) to
 									// each server; repeat during idle periods to prevent election timeouts
 									// (§5.2)". The heartbeats started above do the "idle" period work.
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+									s.routineGroup.Add(1)
 									go func() {
 										defer s.routineGroup.Done()
 										s.Do(NOPCommand{})
 									}()
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									// Begin to collect response from followers
-												add conditions for state for loops

											
										
										
											2014-01-28 15:58:23 +00:00
+									for s.State() == Leader {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										var err error
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+										select {
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+										case <-s.stopped:
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											// Stop all peers before stop
 											for _, peer := range s.peers {
 												peer.stopHeartbeat(false)
 											}
 											s.setState(Stopped)
 											return
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										case e := <-s.c:
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											switch req := e.target.(type) {
 											case Command:
 												s.processCommand(req, e)
 												continue
 											case *AppendEntriesRequest:
 												e.returnValue, _ = s.processAppendEntriesRequest(req)
 											case *AppendEntriesResponse:
 												s.processAppendEntriesResponse(req)
 											case *RequestVoteRequest:
 												e.returnValue, _ = s.processRequestVoteRequest(req)
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
+											}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+											// Callback to event.
 											e.c <- err
 										}
-												rafactor server.go

											
										
										
											2013-07-03 16:53:46 +00:00
+									}
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+									s.syncedPeer = nil
-												change the do struct and add commit center

											
										
										
											2013-06-23 18:42:31 +00:00
+								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) snapshotLoop() {
-												add conditions for state for loops

											
										
										
											2014-01-28 15:58:23 +00:00
+									for s.State() == Snapshotting {
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+										var err error
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+										select {
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+										case <-s.stopped:
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+											s.setState(Stopped)
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											return
 										case e := <-s.c:
-												use switch.(type)

											
										
										
											2013-08-07 04:37:50 +00:00
+											switch req := e.target.(type) {
 											case Command:
 												err = NotLeaderError
 											case *AppendEntriesRequest:
 												e.returnValue, _ = s.processAppendEntriesRequest(req)
 											case *RequestVoteRequest:
 												e.returnValue, _ = s.processRequestVoteRequest(req)
 											case *SnapshotRecoveryRequest:
 												e.returnValue = s.processSnapshotRecoveryRequest(req)
 											}
-												refactor(server.go) change server.stopped a chan of chan
It is a clean way to sync between a function call and a goroutine.

											
										
										
											2014-02-16 01:16:53 +00:00
+											// Callback to event.
 											e.c <- err
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+										}
 									}
 								}
-												Add basic server setup.

											
										
										
											2013-04-17 02:28:08 +00:00
+								//--------------------------------------
 								// Commands
 								//--------------------------------------
-												Intermediate.

											
										
										
											2013-04-28 04:51:17 +00:00
+								// Attempts to execute a command and replicate it. The function will return
 								// when the command has been successfully committed or an error has occurred.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) Do(command Command) (interface{}, error) {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									return s.send(command)
 								}
-												add server state mutex to avoid state race condition

											
										
										
											2013-07-05 17:44:03 +00:00
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								// Processes a command.
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
+								func (s *server) processCommand(command Command, e *ev) {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									s.debugln("server.command.process")
-												add server state mutex to avoid state race condition

											
										
										
											2013-07-05 17:44:03 +00:00
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									// Create an entry for the command in the log.
-												remove command timeout

											
										
										
											2013-12-30 08:58:38 +00:00
+									entry, err := s.log.createEntry(s.currentTerm, command, e)
-												write log entry to file on disk when appendEntry and truncate log file on disk when truncate log entries

											
										
										
											2013-07-21 02:07:16 +00:00
-												[Fix #74] Refactor to use binary log and binary RPCs.

											
										
										
											2013-07-17 13:45:53 +00:00
+									if err != nil {
 										s.debugln("server.command.log.entry.error:", err)
 										e.c <- err
 										return
 									}
-												write multiple entries with a buffered io

											
										
										
											2013-07-24 18:03:20 +00:00
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									if err := s.log.appendEntry(entry); err != nil {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										s.debugln("server.command.log.error:", err)
 										e.c <- err
 										return
-												Add multi-node join.

											
										
										
											2013-05-05 19:36:23 +00:00
+									}
-												Add heartbeat timer.

											
										
										
											2013-05-05 20:26:04 +00:00
-												refactor(server.go) Leader do not need to send its own response of receiving a command to itself.
We send response to itself before, since it is a simple way to deal one machine cluster. After a little modification, we do not need to do this anymore.

											
										
										
											2014-01-12 08:01:38 +00:00
+									s.syncedPeer[s.Name()] = true
 									if len(s.peers) == 0 {
 										commitIndex := s.log.currentIndex()
 										s.log.setCommitIndex(commitIndex)
 										s.debugln("commit index ", commitIndex)
 									}
-												Intermediate.

											
										
										
											2013-04-28 04:51:17 +00:00
+								}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								//--------------------------------------
 								// Append Entries
 								//--------------------------------------
-												Server clean up.

											
										
										
											2013-07-06 19:41:42 +00:00
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								// Appends zero or more log entry from the leader to this server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) AppendEntries(req *AppendEntriesRequest) *AppendEntriesResponse {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									ret, _ := s.send(req)
 									resp, _ := ret.(*AppendEntriesResponse)
 									return resp
 								}
-												Add multi-node failure with re-election test.

											
										
										
											2013-05-05 21:41:55 +00:00
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								// Processes the "append entries" request.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) processAppendEntriesRequest(req *AppendEntriesRequest) (*AppendEntriesResponse, bool) {
-												Add HTTPTransporter.

											
										
										
											2013-07-09 02:55:00 +00:00
+									s.traceln("server.ae.process")
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
+									if req.Term < s.currentTerm {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										s.debugln("server.ae.error: stale term")
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+										return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), false
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
+									}
-												add comments and gofmt

											
										
										
											2013-06-24 16:52:51 +00:00
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+									if req.Term == s.currentTerm {
-												fix(server.go) use server.setState everywhere

											
										
										
											2014-03-24 19:32:39 +00:00
+										_assert(s.State() != Leader, "leader.elected.at.same.term.%d\n", s.currentTerm)
-												fix(server.go) remove extra calls of setState
Fix a bug introduced the by race fix. The setState function should not be called if there is no state change.

											
										
										
											2014-04-05 22:51:33 +00:00
 										// step-down to follower when it is a candidate
 										if s.state == Candidate {
 											// change state to follower
 											s.setState(Follower)
 										}
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+										// discover new leader when candidate
 										// save leader name when follower
 										s.leader = req.LeaderName
 									} else {
 										// Update term and leader.
 										s.updateCurrentTerm(req.Term, req.LeaderName)
 									}
-												go fmt

											
										
										
											2013-06-08 02:19:18 +00:00
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
+									// Reject if log doesn't contain a matching previous entry.
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									if err := s.log.truncate(req.PrevLogIndex, req.PrevLogTerm); err != nil {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										s.debugln("server.ae.truncate.error: ", err)
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+										return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
+									}
 									// Append entries to the log.
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									if err := s.log.appendEntries(req.Entries); err != nil {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										s.debugln("server.ae.append.error: ", err)
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+										return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
+									}
 									// Commit up to the commit index.
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									if err := s.log.setCommitIndex(req.CommitIndex); err != nil {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										s.debugln("server.ae.commit.error: ", err)
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+										return newAppendEntriesResponse(s.currentTerm, false, s.log.currentIndex(), s.log.CommitIndex()), true
-												go fmt

											
										
										
											2013-06-08 02:19:18 +00:00
+									}
-												Add basic append entries test.

											
										
										
											2013-04-30 04:13:50 +00:00
-												Fix spelling errors in comments and strings.

											
										
										
											2014-01-09 03:59:13 +00:00
+									// once the server appended and committed all the log entries from the leader
-												follower will not promote until get synced

											
										
										
											2013-07-12 01:03:24 +00:00
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+									return newAppendEntriesResponse(s.currentTerm, true, s.log.currentIndex(), s.log.CommitIndex()), true
-												Intermediate.

											
										
										
											2013-04-28 04:51:17 +00:00
+								}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								// Processes the "append entries" response from the peer. This is only
 								// processed when the server is a leader. Responses received during other
 								// states are dropped.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) processAppendEntriesResponse(resp *AppendEntriesResponse) {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									// If we find a higher term then change to a follower and exit.
-												in progress

											
										
										
											2014-01-12 07:40:55 +00:00
+									if resp.Term() > s.Term() {
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+										s.updateCurrentTerm(resp.Term(), "")
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										return
 									}
-												gofmt

											
										
										
											2013-07-07 20:55:55 +00:00
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+									// panic response if it's not successful.
-												in progress

											
										
										
											2014-01-12 07:40:55 +00:00
+									if !resp.Success() {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										return
 									}
-												gofmt

											
										
										
											2013-07-07 20:55:55 +00:00
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+									// if one peer successfully append a log from the leader term,
 									// we add it to the synced list
 									if resp.append == true {
 										s.syncedPeer[resp.peer] = true
 									}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									// Increment the commit count to make sure we have a quorum before committing.
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+									if len(s.syncedPeer) < s.QuorumSize() {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										return
 									}
 									// Determine the committed index that a majority has.
 									var indices []uint64
 									indices = append(indices, s.log.currentIndex())
 									for _, peer := range s.peers {
-												Pass race detector.

											
										
										
											2013-07-07 22:12:24 +00:00
+										indices = append(indices, peer.getPrevLogIndex())
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									}
-												use sort.Reverse

											
										
										
											2013-09-29 03:00:40 +00:00
+									sort.Sort(sort.Reverse(uint64Slice(indices)))
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
 									// We can commit up to the index which the majority of the members have appended.
 									commitIndex := indices[s.QuorumSize()-1]
 									committedIndex := s.log.commitIndex
 									if commitIndex > committedIndex {
-												fix(sync) leader should do a sync before committing log entires
As we do not fsync every time log writes to file when the state is leader, we need to do fsync before actually committing the log entries to ensure safety.

											
										
										
											2014-01-10 12:45:53 +00:00
+										// leader needs to do a fsync before committing log entries
 										s.log.sync()
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+										s.log.setCommitIndex(commitIndex)
-												create new channel before start heartbeat to avoid recv stop signal from old heartbeat func

											
										
										
											2013-07-08 05:26:36 +00:00
+										s.debugln("commit index ", commitIndex)
-												Clean up Peer.flush().

											
										
										
											2013-06-25 21:41:42 +00:00
+									}
-												Add multi-node join.

											
										
										
											2013-05-05 19:36:23 +00:00
+								}
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+								// processVoteReponse processes a vote request:
 								// 1. if the vote is granted for the current term of the candidate, return true
 								// 2. if the vote is denied due to smaller term, update the term of this server
 								//    which will also cause the candidate to step-down, and return false.
 								// 3. if the vote is for a smaller term, ignore it and return false.
 								func (s *server) processVoteResponse(resp *RequestVoteResponse) bool {
 									if resp.VoteGranted && resp.Term == s.currentTerm {
 										return true
 									}
 									if resp.Term > s.currentTerm {
 										s.debugln("server.candidate.vote.failed")
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+										s.updateCurrentTerm(resp.Term, "")
-												refactor(server.go) refactor candidate loop

											
										
										
											2014-02-13 22:05:42 +00:00
+									} else {
 										s.debugln("server.candidate.vote: denied")
 									}
 									return false
 								}
-												Add simple cluster leader election.

											
										
										
											2013-05-01 05:11:23 +00:00
+								//--------------------------------------
 								// Request Vote
-												Add election timer.

											
										
										
											2013-04-28 21:23:21 +00:00
+								//--------------------------------------
 								// Requests a vote from a server. A vote can be obtained if the vote's term is
 								// at the server's current term and the server has not made a vote yet. A vote
 								// can also be obtained if the term is greater than the server's current term.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) RequestVote(req *RequestVoteRequest) *RequestVoteResponse {
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									ret, _ := s.send(req)
 									resp, _ := ret.(*RequestVoteResponse)
 									return resp
 								}
-												Fixed minor bugs.

											
										
										
											2013-05-08 03:56:32 +00:00
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								// Processes a "request vote" request.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) processRequestVoteRequest(req *RequestVoteRequest) (*RequestVoteResponse, bool) {
-												add nitro tool to go-raft lib

											
										
										
											2013-07-11 19:56:34 +00:00
-												Add election timer.

											
										
										
											2013-04-28 21:23:21 +00:00
+									// If the request is coming from an old term then reject it.
-												fix test for data race

											
										
										
											2013-10-22 22:39:32 +00:00
+									if req.Term < s.Term() {
-												refactor make deny vote more clear

											
										
										
											2013-11-27 05:50:03 +00:00
+										s.debugln("server.rv.deny.vote: cause stale term")
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+										return newRequestVoteResponse(s.currentTerm, false), false
-												Add election timer.

											
										
										
											2013-04-28 21:23:21 +00:00
+									}
-												make stepdown blocking

											
										
										
											2013-07-03 01:22:37 +00:00
-												refactor(server.go) change setCurrentTerm to updateCurrentTerm
process functions should always compare the term
simplify the logic of updateCurrentTerm
remove uncessary setState

											
										
										
											2014-02-16 00:14:24 +00:00
+									// If the term of the request peer is larger than this node, update the term
 									// If the term is equal and we've already voted for a different candidate then
 									// don't vote for this candidate.
 									if req.Term > s.Term() {
 										s.updateCurrentTerm(req.Term, "")
 									} else if s.votedFor != "" && s.votedFor != req.CandidateName {
-												refactor make deny vote more clear

											
										
										
											2013-11-27 05:50:03 +00:00
+										s.debugln("server.deny.vote: cause duplicate vote: ", req.CandidateName,
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+											" already vote for ", s.votedFor)
 										return newRequestVoteResponse(s.currentTerm, false), false
-												Add election timer.

											
										
										
											2013-04-28 21:23:21 +00:00
+									}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									// If the candidate's log is not at least as up-to-date as our last log then don't vote.
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									lastIndex, lastTerm := s.log.lastInfo()
-												change the do struct and add commit center

											
										
										
											2013-06-23 18:42:31 +00:00
+									if lastIndex > req.LastLogIndex || lastTerm > req.LastLogTerm {
-												refactor make deny vote more clear

											
										
										
											2013-11-27 05:50:03 +00:00
+										s.debugln("server.deny.vote: cause out of date log: ", req.CandidateName,
-												commit a NOP after the server becomes leader. Commit ohter commands, after the majority of the peers synced by the NOP command

											
										
										
											2013-07-10 23:07:14 +00:00
+											"Index :[", lastIndex, "]", " [", req.LastLogIndex, "]",
 											"Term :[", lastTerm, "]", " [", req.LastLogTerm, "]")
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+										return newRequestVoteResponse(s.currentTerm, false), false
-												Add simple voting test.

											
										
										
											2013-04-28 22:49:52 +00:00
+									}
-												Add election timer.

											
										
										
											2013-04-28 21:23:21 +00:00
-												Add simple voting test.

											
										
										
											2013-04-28 22:49:52 +00:00
+									// If we made it this far then cast a vote and reset our election time out.
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+									s.debugln("server.rv.vote: ", s.name, " votes for", req.CandidateName, "at term", req.Term)
-												Add simple voting test.

											
										
										
											2013-04-28 22:49:52 +00:00
+									s.votedFor = req.CandidateName
-												add comments and gofmt

											
										
										
											2013-06-24 16:52:51 +00:00
-												fix election timeout problem

											
										
										
											2013-07-08 04:31:58 +00:00
+									return newRequestVoteResponse(s.currentTerm, true), true
-												Add multi-node failure with re-election test.

											
										
										
											2013-05-05 21:41:55 +00:00
+								}
-												Add single node configuration.

											
										
										
											2013-04-17 02:32:49 +00:00
+								//--------------------------------------
 								// Membership
 								//--------------------------------------
-												Add HTTPTransporter.

											
										
										
											2013-07-09 02:55:00 +00:00
+								// Adds a peer to the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) AddPeer(name string, connectiongString string) error {
-												Add HTTPTransporter.

											
										
										
											2013-07-09 02:55:00 +00:00
+									s.debugln("server.peer.add: ", name, len(s.peers))
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
-												Add HTTPTransporter.

											
										
										
											2013-07-09 02:55:00 +00:00
+									// Do not allow peers to be added twice.
-												Remove join command. Fix race condition.

											
										
										
											2013-05-28 18:46:27 +00:00
+									if s.peers[name] != nil {
-												let knowen peer to rejoin to the cluster

											
										
										
											2013-07-11 03:02:24 +00:00
+										return nil
-												Add single node configuration.

											
										
										
											2013-04-17 02:32:49 +00:00
+									}
-												Add election timer.

											
										
										
											2013-04-28 21:23:21 +00:00
-												fix(server): un-nest arg checking

simplify the nesting in AddPeer

											
										
										
											2013-08-09 16:55:25 +00:00
+									// Skip the Peer if it has the same name as the Server
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+									if s.name != name {
-												fix(*.go): rename HeartbeatTimeout to HeartbeatInterval

While re-reading the etcd tuning guide I realized that perhaps it is a
bit of a misnomer to call this a "Timeout" since unlike the "Election
Timeout" there is no immediate consequence of it being missed.

Perhaps naming it interval would be better?

											
										
										
											2014-01-23 20:50:29 +00:00
+										peer := newPeer(s, name, connectiongString, s.heartbeatInterval)
-												fix(server): un-nest arg checking

simplify the nesting in AddPeer

											
										
										
											2013-08-09 16:55:25 +00:00
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+										if s.State() == Leader {
 											peer.startHeartbeat()
 										}
-												conf

											
										
										
											2013-08-13 21:31:19 +00:00
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+										s.peers[peer.Name] = peer
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
 										s.DispatchEvent(newEvent(AddPeerEventType, name, nil))
-												fix(server): un-nest arg checking

simplify the nesting in AddPeer

											
										
										
											2013-08-09 16:55:25 +00:00
+									}
-												conf

											
										
										
											2013-08-13 21:31:19 +00:00
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+									// Write the configuration to file.
 									s.writeConf()
-												conf

											
										
										
											2013-08-13 21:31:19 +00:00
-												Remove join command. Fix race condition.

											
										
										
											2013-05-28 18:46:27 +00:00
+									return nil
-												Add single node configuration.

											
										
										
											2013-04-17 02:32:49 +00:00
+								}
-												Add error lookup for committed log entries.

											
										
										
											2013-06-05 17:57:31 +00:00
-												Add HTTPTransporter.

											
										
										
											2013-07-09 02:55:00 +00:00
+								// Removes a peer from the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) RemovePeer(name string) error {
-												Add HTTPTransporter.

											
										
										
											2013-07-09 02:55:00 +00:00
+									s.debugln("server.peer.remove: ", name, len(s.peers))
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+									// Skip the Peer if it has the same name as the Server
 									if name != s.Name() {
 										// Return error if peer doesn't exist.
 										peer := s.peers[name]
 										if peer == nil {
 											return fmt.Errorf("raft: Peer not found: %s", name)
 										}
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+										// Stop peer and remove it.
 										if s.State() == Leader {
-												fix(server.go) fix potential deadlock when removing nodes
When server entries removePeer, it is holding log write lock. peer.stopHeartbeat might also need to acquire log read lock to finish. We need to make peer.stopHeartbeat non-blocking to fix the deadlock. Also actually there is no need to wait for the peer go-routine to stop.

											
										
										
											2014-03-10 19:03:44 +00:00
+											// We create a go routine here to avoid potential deadlock.
 											// We are holding log write lock when reach this line of code.
 											// Peer.stopHeartbeat can be blocked without go routine, if the
 											// target go routine (which we want to stop) is calling
 											// log.getEntriesAfter and waiting for log read lock.
 											// So we might be holding log lock and waiting for log lock,
 											// which lead to a deadlock.
 											// TODO(xiangli) refactor log lock
-												fix: wait for all goroutines to finish before Stop

Changes are as follows:

1. Use wait group to wait all goroutines to finish before Stop
2. Remove `stop` channel because its functionality could be
replaced by the wait group
3. Change `stopped` type to `chan bool`, considering it doesn't
need to transfer `stop`
4. Make `send` function also notified by `stopped`, and cancel the request
when stopped
5. Error handling in HTTP handler functions which is caused by
canceling requests

											
										
										
											2014-04-01 17:21:31 +00:00
+											s.routineGroup.Add(1)
 											go func() {
 												defer s.routineGroup.Done()
 												peer.stopHeartbeat(true)
 											}()
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+										}
-												Add error lookup for committed log entries.

											
										
										
											2013-06-05 17:57:31 +00:00
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+										delete(s.peers, name)
-												Add Event Dispatch.

											
										
										
											2013-12-19 23:39:37 +00:00
 										s.DispatchEvent(newEvent(RemovePeerEventType, name, nil))
-												conf

											
										
										
											2013-08-13 21:31:19 +00:00
+									}
-												refactor fixlog

											
										
										
											2013-07-23 22:30:14 +00:00
-												Minor refactor of AddPeer/RemovePeer.

											
										
										
											2013-08-19 15:54:44 +00:00
+									// Write the configuration to file.
 									s.writeConf()
-												Add error lookup for committed log entries.

											
										
										
											2013-06-05 17:57:31 +00:00
 									return nil
 								}
-												solve conflicts

											
										
										
											2013-06-06 03:32:52 +00:00
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+								//--------------------------------------
 								// Log compaction
 								//--------------------------------------
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) TakeSnapshot() error {
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+									if s.stateMachine == nil {
 										return errors.New("Snapshot: Cannot create snapshot. Missing state machine.")
 									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+									// Shortcut without lock
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Exit if the server is currently creating a snapshot.
-												refactor(server) rename currentSnapshot->pendingSnapshot, lastSnapshot->snapshot for clearity

											
										
										
											2014-02-21 16:38:12 +00:00
+									if s.pendingSnapshot != nil {
 										return errors.New("Snapshot: Last snapshot is not finished.")
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									}
-												clean the codes

											
										
										
											2013-06-05 05:56:59 +00:00
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+									// TODO: acquire the lock and no more committed is allowed
 									// This will be done after finishing refactoring heartbeat
 									s.debugln("take.snapshot")
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									lastIndex, lastTerm := s.log.commitInfo()
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
 									// check if there is log has been committed since the
 									// last snapshot.
 									if lastIndex == s.log.startIndex {
 										return nil
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									}
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+									path := s.SnapshotPath(lastIndex, lastTerm)
 									// Attach snapshot to pending snapshot and save it to disk.
 									s.pendingSnapshot = &Snapshot{lastIndex, lastTerm, nil, nil, path}
 									state, err := s.stateMachine.Save()
 									if err != nil {
-												Don't try to take 2 snapshots at a time

Fix #943. Close #1081

											
										
										
											2014-10-30 16:06:30 +00:00
+										s.pendingSnapshot = nil
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+										return err
-												add statemachine interface

											
										
										
											2013-06-06 04:14:07 +00:00
+									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Clone the list of peers.
 									peers := make([]*Peer, 0, len(s.peers)+1)
-												fix snapshot

											
										
										
											2013-06-12 16:47:48 +00:00
+									for _, peer := range s.peers {
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+										peers = append(peers, peer.clone())
-												add statemachine interface

											
										
										
											2013-06-06 04:14:07 +00:00
+									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									peers = append(peers, &Peer{Name: s.Name(), ConnectionString: s.connectionString})
-												add statemachine interface

											
										
										
											2013-06-06 04:14:07 +00:00
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+									// Attach snapshot to pending snapshot and save it to disk.
 									s.pendingSnapshot.Peers = peers
 									s.pendingSnapshot.State = state
-												add log-compaction

											
										
										
											2013-06-05 00:02:45 +00:00
+									s.saveSnapshot()
-												clean the codes

											
										
										
											2013-06-05 05:56:59 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// We keep some log entries after the snapshot.
 									// We do not want to send the whole snapshot to the slightly slow machines
-												change func names and fix typo

											
										
										
											2013-07-18 23:44:01 +00:00
+									if lastIndex-s.log.startIndex > NumberOfLogEntriesAfterSnapshot {
 										compactIndex := lastIndex - NumberOfLogEntriesAfterSnapshot
-												refactor do not copy entry around when doing encoding

											
										
										
											2014-01-11 14:41:12 +00:00
+										compactTerm := s.log.getEntry(compactIndex).Term()
-												keep recent log entries after snapshot

											
										
										
											2013-07-16 20:16:33 +00:00
+										s.log.compact(compactIndex, compactTerm)
 									}
-												clean the codes

											
										
										
											2013-06-05 05:56:59 +00:00
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									return nil
 								}
 								// Retrieves the log path for the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) saveSnapshot() error {
-												refactor(server) rename currentSnapshot->pendingSnapshot, lastSnapshot->snapshot for clearity

											
										
										
											2014-02-21 16:38:12 +00:00
+									if s.pendingSnapshot == nil {
 										return errors.New("pendingSnapshot.is.nil")
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Write snapshot to disk.
-												refactor(server) rename currentSnapshot->pendingSnapshot, lastSnapshot->snapshot for clearity

											
										
										
											2014-02-21 16:38:12 +00:00
+									if err := s.pendingSnapshot.save(); err != nil {
-												Don't try to take 2 snapshots at a time

Fix #943. Close #1081

											
										
										
											2014-10-30 16:06:30 +00:00
+										s.pendingSnapshot = nil
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+										return err
 									}
-												go fmt

											
										
										
											2013-06-08 02:19:18 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Swap the current and last snapshots.
-												refactor(server) rename currentSnapshot->pendingSnapshot, lastSnapshot->snapshot for clearity

											
										
										
											2014-02-21 16:38:12 +00:00
+									tmp := s.snapshot
 									s.snapshot = s.pendingSnapshot
-												clean the codes

											
										
										
											2013-06-05 05:56:59 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Delete the previous snapshot if there is any change
-												refactor(server) rename currentSnapshot->pendingSnapshot, lastSnapshot->snapshot for clearity

											
										
										
											2014-02-21 16:38:12 +00:00
+									if tmp != nil && !(tmp.LastIndex == s.snapshot.LastIndex && tmp.LastTerm == s.snapshot.LastTerm) {
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+										tmp.remove()
-												add log-compaction

											
										
										
											2013-06-05 00:02:45 +00:00
+									}
-												refactor(server) rename currentSnapshot->pendingSnapshot, lastSnapshot->snapshot for clearity

											
										
										
											2014-02-21 16:38:12 +00:00
+									s.pendingSnapshot = nil
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									return nil
 								}
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+								// Returns a list of available snapshot names sorted newest to oldest
 								func (s *server) SnapshotList() ([]string, error) {
 									// Get FileInfo for everything in the snapshot dir
 									ssdir := path.Join(s.path, "snapshot")
 									finfos, err := ioutil.ReadDir(ssdir)
 									if err != nil {
 										return nil, err
 									}
 									// Build a list of snapshot file names
 									var ssnames []string
 									for _, finfo := range finfos {
 										fname := finfo.Name()
 										if finfo.Mode().IsRegular() && filepath.Ext(fname) == ".ss" {
 											ssnames = append(ssnames, fname)
 										}
 									}
 									// Sort snapshot names from newest to oldest
 									sort.Sort(sort.Reverse(sort.StringSlice(ssnames)))
 									return ssnames, nil
 								}
-												add log-compaction

											
										
										
											2013-06-05 00:02:45 +00:00
+								// Retrieves the log path for the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) SnapshotPath(lastIndex uint64, lastTerm uint64) string {
-												fix issues (https://github.com/benbjohnson/go-raft/pull/19)

											
										
										
											2013-06-05 17:38:49 +00:00
+									return path.Join(s.path, "snapshot", fmt.Sprintf("%v_%v.ss", lastTerm, lastIndex))
-												add log-compaction

											
										
										
											2013-06-05 00:02:45 +00:00
+								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) RequestSnapshot(req *SnapshotRequest) *SnapshotResponse {
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									ret, _ := s.send(req)
 									resp, _ := ret.(*SnapshotResponse)
 									return resp
 								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) processSnapshotRequest(req *SnapshotRequest) *SnapshotResponse {
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									// If the follower’s log contains an entry at the snapshot’s last index with a term
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// that matches the snapshot’s last term, then the follower already has all the
 									// information found in the snapshot and can reply false.
-												change getLogEntry to getEntry

											
										
										
											2013-07-18 23:29:06 +00:00
+									entry := s.log.getEntry(req.LastIndex)
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
-												refactor do not copy entry around when doing encoding

											
										
										
											2014-01-11 14:41:12 +00:00
+									if entry != nil && entry.Term() == req.LastTerm {
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+										return newSnapshotResponse(false)
 									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Update state.
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									s.setState(Snapshotting)
 									return newSnapshotResponse(true)
 								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) SnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse {
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									ret, _ := s.send(req)
 									resp, _ := ret.(*SnapshotRecoveryResponse)
 									return resp
 								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) processSnapshotRecoveryRequest(req *SnapshotRecoveryRequest) *SnapshotRecoveryResponse {
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Recover state sent from request.
 									if err := s.stateMachine.Recovery(req.State); err != nil {
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+										panic("cannot recover from previous state")
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									}
-												clean the codes

											
										
										
											2013-06-05 05:56:59 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Recover the cluster configuration.
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									s.peers = make(map[string]*Peer)
-												add connectionstring

											
										
										
											2013-08-15 23:35:01 +00:00
+									for _, peer := range req.Peers {
 										s.AddPeer(peer.Name, peer.ConnectionString)
-												fix snapshot

											
										
										
											2013-06-12 16:47:48 +00:00
+									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Update log state.
-												add snapshot unit test

											
										
										
											2013-06-06 03:25:17 +00:00
+									s.currentTerm = req.LastTerm
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									s.log.updateCommitIndex(req.LastIndex)
-												fix snapshot

											
										
										
											2013-06-12 16:47:48 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Create local snapshot.
-												refactor(server) rename currentSnapshot->pendingSnapshot, lastSnapshot->snapshot for clearity

											
										
										
											2014-02-21 16:38:12 +00:00
+									s.pendingSnapshot = &Snapshot{req.LastIndex, req.LastTerm, req.Peers, req.State, s.SnapshotPath(req.LastIndex, req.LastTerm)}
-												go fmt

											
										
										
											2013-06-08 02:19:18 +00:00
+									s.saveSnapshot()
-												add comments and gofmt

											
										
										
											2013-06-24 16:52:51 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Clear the previous log entries.
-												[Fix #47] Clean up external interface.

											
										
										
											2013-07-06 04:49:47 +00:00
+									s.log.compact(req.LastIndex, req.LastTerm)
-												add log-compaction

											
										
										
											2013-06-05 00:02:45 +00:00
-												add snapshotting state

											
										
										
											2013-07-17 00:40:19 +00:00
+									return newSnapshotRecoveryResponse(req.LastTerm, true, req.LastIndex)
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+								}
-												clean the codes

											
										
										
											2013-06-05 05:56:59 +00:00
+								// Load a snapshot at restart
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) LoadSnapshot() error {
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+									sslist, err := s.SnapshotList()
-												Don't panic on a non existing snapshot

											
										
										
											2014-10-20 23:04:13 +00:00
 									// if the snapshot directory doesn't exist or no snapshots exist in
 									// the directory, return immediately
 									if err != nil && os.IsNotExist(err) || err == nil && len(sslist) == 0 {
 										return nil
 									}
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									if err != nil {
-												fix snapshot

											
										
										
											2013-06-12 16:47:48 +00:00
+										return err
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									}
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+									// Load most recent snapshot (falling back to older snapshots if needed)
 									var ss *Snapshot
 									for _, ssname := range sslist {
 										ssFullPath := path.Join(s.path, "snapshot", ssname)
 										ss, err = loadSnapshot(ssFullPath)
 										if err == nil {
 											break
 										}
 										s.debugln(err)
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									if err != nil {
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+										return err // couldn't load any of the snapshots
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									}
-												fix snapshot

											
										
										
											2013-06-12 16:47:48 +00:00
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+									s.snapshot = ss
-												add statemachine interface

											
										
										
											2013-06-06 04:14:07 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Recover snapshot into state machine.
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+									if err = s.stateMachine.Recovery(ss.State); err != nil {
-												refactor(server) cleanup snapshot

											
										
										
											2014-02-27 16:12:39 +00:00
+										s.debugln("recovery.snapshot.error: ", err)
-												clean up println

											
										
										
											2013-07-01 02:14:02 +00:00
+										return err
-												check checksum before load snapshot

											
										
										
											2013-07-01 00:55:54 +00:00
+									}
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Recover cluster configuration.
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+									for _, peer := range ss.Peers {
-												add connectionstring

											
										
										
											2013-08-15 23:35:01 +00:00
+										s.AddPeer(peer.Name, peer.ConnectionString)
-												fix snapshot

											
										
										
											2013-06-12 16:47:48 +00:00
+									}
-												add statemachine interface

											
										
										
											2013-06-06 04:14:07 +00:00
-												Snapshot test coverage.

											
										
										
											2014-01-21 17:00:59 +00:00
+									// Update log state.
-												Revert to older snapshots if the latest raft snapshot is corrupted

Fix #1040

											
										
										
											2014-10-20 17:12:59 +00:00
+									s.log.startTerm = ss.LastTerm
 									s.log.startIndex = ss.LastIndex
 									s.log.updateCommitIndex(ss.LastIndex)
-												add log-compaction

											
										
										
											2013-06-05 00:02:45 +00:00
-												scratch log-compaction

											
										
										
											2013-06-03 21:58:12 +00:00
+									return err
 								}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
+								//--------------------------------------
 								// Config File
 								//--------------------------------------
-												feat(server): add FlushCommitIndex

											
										
										
											2014-03-20 23:29:36 +00:00
+								// Flushes commit index to the disk.
 								// So when the raft server restarts, it will commit upto the flushed commitIndex.
 								func (s *server) FlushCommitIndex() {
 									s.debugln("server.conf.update")
 									// Write the configuration to file.
 									s.writeConf()
 								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) writeConf() {
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
-												add connectionstring

											
										
										
											2013-08-15 23:35:01 +00:00
+									peers := make([]*Peer, len(s.peers))
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
 									i := 0
-												add connectionstring

											
										
										
											2013-08-15 23:35:01 +00:00
+									for _, peer := range s.peers {
 										peers[i] = peer.clone()
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
+										i++
 									}
 									r := &Config{
 										CommitIndex: s.log.commitIndex,
 										Peers:       peers,
 									}
 									b, _ := json.Marshal(r)
 									confPath := path.Join(s.path, "conf")
 									tmpConfPath := path.Join(s.path, "conf.tmp")
-												Save config file atomically

											
										
										
											2014-01-10 12:49:33 +00:00
+									err := writeFileSynced(tmpConfPath, b, 0600)
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
 									if err != nil {
 										panic(err)
 									}
 									os.Rename(tmpConfPath, confPath)
 								}
 								// Read the configuration for the server.
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) readConf() error {
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
+									confPath := path.Join(s.path, "conf")
 									s.debugln("readConf.open ", confPath)
 									// open conf file
 									b, err := ioutil.ReadFile(confPath)
 									if err != nil {
 										return nil
 									}
 									conf := &Config{}
 									if err = json.Unmarshal(b, conf); err != nil {
 										return err
 									}
-												fix snapshot related issue

											
										
										
											2013-09-18 04:19:46 +00:00
+									s.log.updateCommitIndex(conf.CommitIndex)
-												wip/ move write/read to server.go and add test

											
										
										
											2013-08-15 19:25:00 +00:00
 									return nil
 								}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								//--------------------------------------
 								// Debugging
 								//--------------------------------------
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) debugln(v ...interface{}) {
-												refactor(server.go) do not construct the string if not necessary

											
										
										
											2013-11-27 04:43:02 +00:00
+									if logLevel > Debug {
 										debugf("[%s Term:%d] %s", s.name, s.Term(), fmt.Sprintln(v...))
 									}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								}
-												Server interface.

											
										
										
											2013-10-14 17:54:49 +00:00
+								func (s *server) traceln(v ...interface{}) {
-												refactor(server.go) do not construct the string if not necessary

											
										
										
											2013-11-27 04:43:02 +00:00
+									if logLevel > Trace {
 										tracef("[%s] %s", s.name, fmt.Sprintln(v...))
 									}
-												Refactor server event loop.

											
										
										
											2013-07-07 20:21:04 +00:00
+								}