add snapshot unit test
parent
718ef79f6f
commit
f15cadeffd
70
peer.go
70
peer.go
|
@ -99,18 +99,42 @@ func (p *Peer) stop() {
|
|||
// Flush
|
||||
//--------------------------------------
|
||||
|
||||
// Sends an AppendEntries RPC but does not obtain a lock on the server. This
|
||||
// method should only be called from the server.
|
||||
func (p *Peer) internalFlush() (uint64, bool, error) {
|
||||
// if internal is set true, sends an AppendEntries RPC but does not obtain a lock
|
||||
// on the server.
|
||||
func (p *Peer) flush(internal bool) (uint64, bool, error) {
|
||||
// Retrieve the peer data within a lock that is separate from the
|
||||
// server lock when creating the request. Otherwise a deadlock can
|
||||
// occur.
|
||||
p.mutex.Lock()
|
||||
server, prevLogIndex := p.server, p.prevLogIndex
|
||||
p.mutex.Unlock()
|
||||
|
||||
var req *AppendEntriesRequest
|
||||
snapShotNeeded := false
|
||||
|
||||
// we need to hold the log lock to create AppendEntriesRequest
|
||||
// avoid snapshot to delete the desired entries before AEQ()
|
||||
server.log.mutex.Lock()
|
||||
if prevLogIndex >= server.log.StartIndex() {
|
||||
if internal {
|
||||
req = server.createInternalAppendEntriesRequest(prevLogIndex)
|
||||
} else {
|
||||
req = server.createAppendEntriesRequest(prevLogIndex)
|
||||
}
|
||||
} else {
|
||||
snapShotNeeded = true
|
||||
}
|
||||
server.log.mutex.Unlock()
|
||||
|
||||
p.mutex.Lock()
|
||||
defer p.mutex.Unlock()
|
||||
|
||||
if p.prevLogIndex < p.server.log.StartIndex() {
|
||||
req := p.server.createSnapshotRequest()
|
||||
if snapShotNeeded {
|
||||
req := server.createSnapshotRequest()
|
||||
return p.sendSnapshotRequest(req)
|
||||
} else {
|
||||
return p.sendFlushRequest(req)
|
||||
}
|
||||
req := p.server.createInternalAppendEntriesRequest(p.prevLogIndex)
|
||||
return p.sendFlushRequest(req)
|
||||
|
||||
}
|
||||
|
||||
// send Snapshot Request
|
||||
|
@ -207,35 +231,7 @@ func (p *Peer) heartbeatTimeoutFunc(startChannel chan bool) {
|
|||
// Flush the peer when we get a heartbeat timeout. If the channel is
|
||||
// closed then the peer is getting cleaned up and we should exit.
|
||||
if _, ok := <-c; ok {
|
||||
// Retrieve the peer data within a lock that is separate from the
|
||||
// server lock when creating the request. Otherwise a deadlock can
|
||||
// occur.
|
||||
p.mutex.Lock()
|
||||
server, prevLogIndex := p.server, p.prevLogIndex
|
||||
p.mutex.Unlock()
|
||||
|
||||
var req *AppendEntriesRequest
|
||||
snapShotNeeded := false
|
||||
|
||||
// we need to hold the log lock to create AppendEntriesRequest
|
||||
// avoid snapshot to delete the desired entries before AEQ()
|
||||
server.log.mutex.Lock()
|
||||
if prevLogIndex >= server.log.StartIndex() {
|
||||
req = server.createAppendEntriesRequest(prevLogIndex)
|
||||
} else {
|
||||
snapShotNeeded = true
|
||||
}
|
||||
server.log.mutex.Unlock()
|
||||
|
||||
p.mutex.Lock()
|
||||
if snapShotNeeded {
|
||||
req := server.createSnapshotRequest()
|
||||
p.sendSnapshotRequest(req)
|
||||
} else {
|
||||
p.sendFlushRequest(req)
|
||||
}
|
||||
p.mutex.Unlock()
|
||||
|
||||
p.flush(false)
|
||||
|
||||
} else {
|
||||
break
|
||||
|
|
42
server.go
42
server.go
|
@ -369,10 +369,10 @@ func (s *Server) do(command Command) error {
|
|||
// Capture the term that this command is executing within.
|
||||
currentTerm := s.currentTerm
|
||||
|
||||
// TEMP to solve the issue 18
|
||||
for _, peer := range s.peers {
|
||||
peer.pause()
|
||||
}
|
||||
// // TEMP to solve the issue 18
|
||||
// for _, peer := range s.peers {
|
||||
// peer.pause()
|
||||
// }
|
||||
|
||||
// Add a new entry to the log.
|
||||
entry := s.log.CreateEntry(s.currentTerm, command)
|
||||
|
@ -386,7 +386,7 @@ func (s *Server) do(command Command) error {
|
|||
peer := _peer
|
||||
go func() {
|
||||
|
||||
term, success, err := peer.internalFlush()
|
||||
term, success, err := peer.flush(true)
|
||||
|
||||
// Demote if we encounter a higher term.
|
||||
if err != nil {
|
||||
|
@ -419,9 +419,9 @@ loop:
|
|||
// If we received enough votes then stop waiting for more votes.
|
||||
if responseCount >= s.QuorumSize() {
|
||||
committed = true
|
||||
for _, peer := range s.peers {
|
||||
peer.resume()
|
||||
}
|
||||
// for _, peer := range s.peers {
|
||||
// peer.resume()
|
||||
// }
|
||||
break
|
||||
}
|
||||
|
||||
|
@ -434,9 +434,9 @@ loop:
|
|||
}
|
||||
responseCount++
|
||||
case <-afterBetween(s.ElectionTimeout(), s.ElectionTimeout()*2):
|
||||
for _, peer := range s.peers {
|
||||
peer.resume()
|
||||
}
|
||||
// for _, peer := range s.peers {
|
||||
// peer.resume()
|
||||
// }
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
@ -515,6 +515,7 @@ func (s *Server) createInternalAppendEntriesRequest(prevLogIndex uint64) *Append
|
|||
// server is elected then true is returned. If another server is elected then
|
||||
// false is returned.
|
||||
func (s *Server) promote() (bool, error) {
|
||||
|
||||
for {
|
||||
// Start a new election.
|
||||
term, lastLogIndex, lastLogTerm, err := s.promoteToCandidate()
|
||||
|
@ -610,7 +611,6 @@ func (s *Server) promoteToCandidate() (uint64, uint64, uint64, error) {
|
|||
s.leader = ""
|
||||
// Pause the election timer while we're a candidate.
|
||||
s.electionTimer.Pause()
|
||||
|
||||
// Return server state so we can check for it during leader promotion.
|
||||
lastLogIndex, lastLogTerm := s.log.CommitInfo()
|
||||
return s.currentTerm, lastLogIndex, lastLogTerm, nil
|
||||
|
@ -655,7 +655,6 @@ func (s *Server) promoteToLeader(term uint64, lastLogIndex uint64, lastLogTerm u
|
|||
func (s *Server) RequestVote(req *RequestVoteRequest) (*RequestVoteResponse, error) {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
// Fail if the server is not running.
|
||||
if !s.Running() {
|
||||
return NewRequestVoteResponse(s.currentTerm, false), fmt.Errorf("raft.Server: Server is stopped")
|
||||
|
@ -746,7 +745,6 @@ func (s *Server) AddPeer(name string) error {
|
|||
peer.resume()
|
||||
}
|
||||
s.peers[peer.name] = peer
|
||||
peer.resume()
|
||||
|
||||
}
|
||||
return nil
|
||||
|
@ -827,24 +825,24 @@ func (s *Server) SnapshotPath(lastIndex uint64, lastTerm uint64) string {
|
|||
}
|
||||
|
||||
|
||||
func (s *Server) SnapshotRecovery(index uint64, term uint64, machineState int) (*SnapshotResponse, error){
|
||||
func (s *Server) SnapshotRecovery(req *SnapshotRequest) (*SnapshotResponse, error){
|
||||
//
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
//recovery machine state
|
||||
s.machineState = machineState
|
||||
s.machineState = req.MachineState
|
||||
|
||||
//update term and index
|
||||
s.currentTerm = term
|
||||
s.log.UpdateCommitIndex(index)
|
||||
snapshotPath := s.SnapshotPath(index, term)
|
||||
s.currentSnapshot = &Snapshot{index, term, machineState, snapshotPath}
|
||||
s.currentTerm = req.LastTerm
|
||||
s.log.UpdateCommitIndex(req.LastIndex)
|
||||
snapshotPath := s.SnapshotPath(req.LastIndex, req.LastTerm)
|
||||
s.currentSnapshot = &Snapshot{req.LastIndex, req.LastTerm, req.MachineState, snapshotPath}
|
||||
s.saveSnapshot()
|
||||
s.log.Compact(index, term)
|
||||
s.log.Compact(req.LastIndex, req.LastTerm)
|
||||
|
||||
|
||||
return NewSnapshotResponse(term, true, index), nil
|
||||
return NewSnapshotResponse(req.LastTerm, true, req.LastIndex), nil
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -390,16 +389,12 @@ func TestServerMultiNode(t *testing.T) {
|
|||
time.Sleep(100 * time.Millisecond)
|
||||
leader.Stop()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// Check that either server 2 or 3 is the leader now.
|
||||
mutex.Lock()
|
||||
if servers["2"].State() != Leader && servers["3"].State() != Leader {
|
||||
t.Fatalf("Expected leader re-election: 2=%v, 3=%v", servers["2"].state, servers["3"].state)
|
||||
t.Fatalf("Expected leader re-election: 2=%v, 3=%v\n", servers["2"].state, servers["3"].state)
|
||||
}
|
||||
mutex.Unlock()
|
||||
|
||||
// Stop the servers.
|
||||
for _, server := range servers {
|
||||
server.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ func (ss *Snapshot) Save() error {
|
|||
|
||||
// open file
|
||||
file, err := os.OpenFile(ss.path, os.O_CREATE|os.O_WRONLY, 0600)
|
||||
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
6
test.go
6
test.go
|
@ -4,7 +4,7 @@ import (
|
|||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -98,6 +98,7 @@ func newTestCluster(names []string, transporter Transporter, lookup map[string]*
|
|||
type testTransporter struct {
|
||||
sendVoteRequestFunc func(server *Server, peer *Peer, req *RequestVoteRequest) (*RequestVoteResponse, error)
|
||||
sendAppendEntriesRequestFunc func(server *Server, peer *Peer, req *AppendEntriesRequest) (*AppendEntriesResponse, error)
|
||||
sendSnapshotRequestFunc func(server *Server, peer *Peer, req *SnapshotRequest) (*SnapshotResponse, error)
|
||||
}
|
||||
|
||||
func (t *testTransporter) SendVoteRequest(server *Server, peer *Peer, req *RequestVoteRequest) (*RequestVoteResponse, error) {
|
||||
|
@ -108,6 +109,9 @@ func (t *testTransporter) SendAppendEntriesRequest(server *Server, peer *Peer, r
|
|||
return t.sendAppendEntriesRequestFunc(server, peer, req)
|
||||
}
|
||||
|
||||
func (t *testTransporter) SendSnapshotRequest(server *Server, peer *Peer, req *SnapshotRequest) (*SnapshotResponse, error) {
|
||||
return t.sendSnapshotRequestFunc(server, peer, req)
|
||||
}
|
||||
|
||||
//--------------------------------------
|
||||
// Join Command
|
||||
|
|
Loading…
Reference in New Issue