2015-07-17 17:17:15 +00:00
package meta
import (
2015-07-23 21:53:39 +00:00
"bytes"
"encoding/json"
2015-07-17 17:17:15 +00:00
"errors"
"fmt"
2015-07-23 21:53:39 +00:00
"io/ioutil"
2015-07-17 17:17:15 +00:00
"math/rand"
"os"
"path/filepath"
2015-08-12 22:33:24 +00:00
"sync"
2015-07-17 17:17:15 +00:00
"time"
"github.com/hashicorp/raft"
"github.com/hashicorp/raft-boltdb"
)
// raftState abstracts the interaction of the raft consensus layer
// across local or remote nodes. It is a form of the state design pattern and allows
2015-07-20 20:59:00 +00:00
// the meta.Store to change its behavior with the raft layer at runtime.
2015-07-17 17:17:15 +00:00
type raftState interface {
2015-07-23 16:49:43 +00:00
open ( ) error
2015-07-23 21:53:39 +00:00
remove ( ) error
2015-07-17 17:17:15 +00:00
initialize ( ) error
leader ( ) string
2015-07-17 17:50:06 +00:00
isLeader ( ) bool
2015-07-17 17:17:15 +00:00
sync ( index uint64 , timeout time . Duration ) error
2015-07-17 18:03:04 +00:00
setPeers ( addrs [ ] string ) error
2015-07-17 18:09:07 +00:00
addPeer ( addr string ) error
2015-10-06 16:10:37 +00:00
removePeer ( addr string ) error
2015-07-23 21:53:39 +00:00
peers ( ) ( [ ] string , error )
2015-07-17 17:17:15 +00:00
invalidate ( ) error
close ( ) error
2015-07-17 18:34:39 +00:00
lastIndex ( ) uint64
2015-07-17 18:39:13 +00:00
apply ( b [ ] byte ) error
2015-07-17 18:43:42 +00:00
snapshot ( ) error
2015-11-10 15:20:19 +00:00
isLocal ( ) bool
2015-07-17 17:17:15 +00:00
}
2015-07-20 20:59:00 +00:00
// localRaft is a consensus strategy that uses a local raft implementation for
2015-07-17 17:17:15 +00:00
// consensus operations.
type localRaft struct {
2015-08-12 22:33:24 +00:00
wg sync . WaitGroup
closing chan struct { }
2015-07-17 19:27:09 +00:00
store * Store
raft * raft . Raft
transport * raft . NetworkTransport
peerStore raft . PeerStore
raftStore * raftboltdb . BoltStore
raftLayer * raftLayer
2015-07-17 17:17:15 +00:00
}
2015-07-23 21:53:39 +00:00
func ( r * localRaft ) remove ( ) error {
if err := os . RemoveAll ( filepath . Join ( r . store . path , "raft.db" ) ) ; err != nil {
return err
}
if err := os . RemoveAll ( filepath . Join ( r . store . path , "peers.json" ) ) ; err != nil {
return err
}
if err := os . RemoveAll ( filepath . Join ( r . store . path , "snapshots" ) ) ; err != nil {
return err
}
return nil
}
2015-07-22 22:13:22 +00:00
func ( r * localRaft ) updateMetaData ( ms * Data ) {
if ms == nil {
return
}
updated := false
r . store . mu . RLock ( )
if ms . Index > r . store . data . Index {
updated = true
}
r . store . mu . RUnlock ( )
if updated {
r . store . Logger . Printf ( "Updating metastore to term=%v index=%v" , ms . Term , ms . Index )
r . store . mu . Lock ( )
r . store . data = ms
2015-10-13 16:02:19 +00:00
// Signal any blocked goroutines that the meta store has been updated
r . store . notifyChanged ( )
2015-07-22 22:13:22 +00:00
r . store . mu . Unlock ( )
}
}
2015-07-17 17:17:15 +00:00
func ( r * localRaft ) invalidate ( ) error {
2015-07-22 22:13:22 +00:00
if r . store . IsLeader ( ) {
return nil
}
ms , err := r . store . rpc . fetchMetaData ( false )
if err != nil {
2015-10-06 16:20:13 +00:00
return fmt . Errorf ( "error fetching meta data: %s" , err )
2015-07-22 22:13:22 +00:00
}
r . updateMetaData ( ms )
2015-07-17 17:17:15 +00:00
return nil
}
2015-07-23 16:49:43 +00:00
func ( r * localRaft ) open ( ) error {
2015-08-12 22:33:24 +00:00
r . closing = make ( chan struct { } )
2015-07-17 17:17:15 +00:00
s := r . store
// Setup raft configuration.
config := raft . DefaultConfig ( )
2015-08-05 05:34:07 +00:00
config . LogOutput = ioutil . Discard
if s . clusterTracingEnabled {
config . Logger = s . Logger
}
2015-07-17 17:17:15 +00:00
config . HeartbeatTimeout = s . HeartbeatTimeout
config . ElectionTimeout = s . ElectionTimeout
config . LeaderLeaseTimeout = s . LeaderLeaseTimeout
config . CommitTimeout = s . CommitTimeout
2015-11-05 22:06:19 +00:00
// Since we actually never call `removePeer` this is safe.
// If in the future we decide to call remove peer we have to re-evaluate how to handle this
config . ShutdownOnRemove = false
2015-07-17 17:17:15 +00:00
2015-07-28 19:17:59 +00:00
// If no peers are set in the config or there is one and we are it, then start as a single server.
2015-08-05 05:34:07 +00:00
if len ( s . peers ) <= 1 {
config . EnableSingleNode = true
// Ensure we can always become the leader
config . DisableBootstrapAfterElect = false
}
2015-07-17 17:17:15 +00:00
// Build raft layer to multiplex listener.
2015-07-28 22:04:03 +00:00
r . raftLayer = newRaftLayer ( s . RaftListener , s . RemoteAddr )
2015-07-17 17:17:15 +00:00
// Create a transport layer
2015-08-05 05:34:07 +00:00
r . transport = raft . NewNetworkTransport ( r . raftLayer , 3 , 10 * time . Second , config . LogOutput )
2015-07-17 17:17:15 +00:00
// Create peer storage.
2015-07-17 19:27:09 +00:00
r . peerStore = raft . NewJSONPeers ( s . path , r . transport )
2015-07-17 17:17:15 +00:00
2015-08-05 05:34:07 +00:00
peers , err := r . peerStore . Peers ( )
if err != nil {
return err
}
2015-08-12 20:47:59 +00:00
// For single-node clusters, we can update the raft peers before we start the cluster if the hostname
// has changed.
if config . EnableSingleNode {
if err := r . peerStore . SetPeers ( [ ] string { s . RemoteAddr . String ( ) } ) ; err != nil {
return err
}
peers = [ ] string { s . RemoteAddr . String ( ) }
}
// If we have multiple nodes in the cluster, make sure our address is in the raft peers or
// we won't be able to boot into the cluster because the other peers will reject our new hostname. This
// is difficult to resolve automatically because we need to have all the raft peers agree on the current members
// of the cluster before we can change them.
2015-08-05 05:34:07 +00:00
if len ( peers ) > 0 && ! raft . PeerContained ( peers , s . RemoteAddr . String ( ) ) {
2015-11-05 22:06:19 +00:00
s . Logger . Printf ( "%s is not in the list of raft peers. Please update %v/peers.json on all raft nodes to have the same contents." , s . RemoteAddr . String ( ) , s . Path ( ) )
2015-08-05 05:34:07 +00:00
return fmt . Errorf ( "peers out of sync: %v not in %v" , s . RemoteAddr . String ( ) , peers )
}
2015-07-17 17:17:15 +00:00
// Create the log store and stable store.
store , err := raftboltdb . NewBoltStore ( filepath . Join ( s . path , "raft.db" ) )
if err != nil {
return fmt . Errorf ( "new bolt store: %s" , err )
}
2015-07-17 19:27:09 +00:00
r . raftStore = store
2015-07-17 17:17:15 +00:00
// Create the snapshot store.
snapshots , err := raft . NewFileSnapshotStore ( s . path , raftSnapshotsRetained , os . Stderr )
if err != nil {
return fmt . Errorf ( "file snapshot store: %s" , err )
}
// Create raft log.
2015-07-17 19:27:09 +00:00
ra , err := raft . NewRaft ( config , ( * storeFSM ) ( s ) , store , store , snapshots , r . peerStore , r . transport )
2015-07-17 17:17:15 +00:00
if err != nil {
return fmt . Errorf ( "new raft: %s" , err )
}
2015-07-17 19:27:09 +00:00
r . raft = ra
2015-07-17 17:17:15 +00:00
2015-08-12 22:33:24 +00:00
r . wg . Add ( 1 )
2015-08-12 21:27:47 +00:00
go r . logLeaderChanges ( )
2015-07-17 17:17:15 +00:00
return nil
}
2015-08-12 21:27:47 +00:00
func ( r * localRaft ) logLeaderChanges ( ) {
2015-08-12 22:33:24 +00:00
defer r . wg . Done ( )
2015-08-12 21:27:47 +00:00
// Logs our current state (Node at 1.2.3.4:8088 [Follower])
r . store . Logger . Printf ( r . raft . String ( ) )
for {
select {
2015-08-12 22:33:24 +00:00
case <- r . closing :
2015-08-12 21:27:47 +00:00
return
case <- r . raft . LeaderCh ( ) :
peers , err := r . peers ( )
if err != nil {
r . store . Logger . Printf ( "failed to lookup peers: %v" , err )
}
r . store . Logger . Printf ( "%v. peers=%v" , r . raft . String ( ) , peers )
}
}
}
2015-07-17 17:17:15 +00:00
func ( r * localRaft ) close ( ) error {
2015-12-07 12:23:10 +00:00
if r . closing != nil {
close ( r . closing )
}
2015-08-12 22:33:24 +00:00
r . wg . Wait ( )
2015-08-13 21:31:31 +00:00
if r . transport != nil {
r . transport . Close ( )
r . transport = nil
}
2015-07-17 17:28:50 +00:00
// Shutdown raft.
2015-07-17 19:27:09 +00:00
if r . raft != nil {
2015-08-05 21:41:39 +00:00
if err := r . raft . Shutdown ( ) . Error ( ) ; err != nil {
return err
}
2015-07-17 19:27:09 +00:00
r . raft = nil
2015-07-17 17:28:50 +00:00
}
2015-08-13 21:31:31 +00:00
2015-07-17 19:27:09 +00:00
if r . raftStore != nil {
r . raftStore . Close ( )
r . raftStore = nil
2015-07-17 17:28:50 +00:00
}
2015-07-17 17:17:15 +00:00
return nil
}
func ( r * localRaft ) initialize ( ) error {
s := r . store
// If we have committed entries then the store is already in the cluster.
2015-07-17 19:27:09 +00:00
if index , err := r . raftStore . LastIndex ( ) ; err != nil {
2015-07-17 17:17:15 +00:00
return fmt . Errorf ( "last index: %s" , err )
} else if index > 0 {
return nil
}
// Force set peers.
2015-07-17 19:27:09 +00:00
if err := r . setPeers ( s . peers ) ; err != nil {
2015-07-17 17:17:15 +00:00
return fmt . Errorf ( "set raft peers: %s" , err )
}
return nil
}
2015-07-17 18:39:13 +00:00
// apply applies a serialized command to the raft log.
func ( r * localRaft ) apply ( b [ ] byte ) error {
// Apply to raft log.
2015-07-17 19:27:09 +00:00
f := r . raft . Apply ( b , 0 )
2015-07-17 18:39:13 +00:00
if err := f . Error ( ) ; err != nil {
return err
}
// Return response if it's an error.
// No other non-nil objects should be returned.
resp := f . Response ( )
if err , ok := resp . ( error ) ; ok {
return lookupError ( err )
}
assert ( resp == nil , "unexpected response: %#v" , resp )
return nil
}
2015-07-17 18:34:39 +00:00
func ( r * localRaft ) lastIndex ( ) uint64 {
2015-07-17 19:27:09 +00:00
return r . raft . LastIndex ( )
2015-07-17 18:34:39 +00:00
}
2015-07-17 17:17:15 +00:00
func ( r * localRaft ) sync ( index uint64 , timeout time . Duration ) error {
ticker := time . NewTicker ( 100 * time . Millisecond )
defer ticker . Stop ( )
timer := time . NewTimer ( timeout )
defer timer . Stop ( )
for {
// Wait for next tick or timeout.
select {
case <- ticker . C :
case <- timer . C :
return errors . New ( "timeout" )
}
// Compare index against current metadata.
r . store . mu . Lock ( )
ok := ( r . store . data . Index >= index )
r . store . mu . Unlock ( )
// Exit if we are at least at the given index.
if ok {
return nil
}
}
}
2015-07-17 18:43:42 +00:00
func ( r * localRaft ) snapshot ( ) error {
2015-07-17 19:27:09 +00:00
future := r . raft . Snapshot ( )
2015-07-17 18:43:42 +00:00
return future . Error ( )
}
2015-07-17 18:09:07 +00:00
// addPeer adds addr to the list of peers in the cluster.
func ( r * localRaft ) addPeer ( addr string ) error {
2015-07-17 19:27:09 +00:00
peers , err := r . peerStore . Peers ( )
2015-07-17 18:09:07 +00:00
if err != nil {
return err
}
if len ( peers ) >= 3 {
return nil
}
2015-07-17 19:27:09 +00:00
if fut := r . raft . AddPeer ( addr ) ; fut . Error ( ) != nil {
2015-07-17 18:09:07 +00:00
return fut . Error ( )
}
return nil
}
2015-10-06 16:10:37 +00:00
// removePeer removes addr from the list of peers in the cluster.
func ( r * localRaft ) removePeer ( addr string ) error {
// Only do this on the leader
2015-10-26 15:07:40 +00:00
if ! r . isLeader ( ) {
return errors . New ( "not the leader" )
}
if fut := r . raft . RemovePeer ( addr ) ; fut . Error ( ) != nil {
return fut . Error ( )
2015-10-06 16:10:37 +00:00
}
return nil
}
2015-07-17 18:03:04 +00:00
// setPeers sets a list of peers in the cluster.
func ( r * localRaft ) setPeers ( addrs [ ] string ) error {
2015-07-28 22:04:03 +00:00
return r . raft . SetPeers ( addrs ) . Error ( )
2015-07-17 18:03:04 +00:00
}
2015-07-23 21:53:39 +00:00
func ( r * localRaft ) peers ( ) ( [ ] string , error ) {
return r . peerStore . Peers ( )
}
2015-07-17 17:17:15 +00:00
func ( r * localRaft ) leader ( ) string {
2015-07-17 19:27:09 +00:00
if r . raft == nil {
2015-07-17 17:17:15 +00:00
return ""
}
2015-10-14 18:18:01 +00:00
return r . raft . Leader ( )
2015-07-17 17:17:15 +00:00
}
2015-07-17 17:50:06 +00:00
func ( r * localRaft ) isLeader ( ) bool {
2015-07-17 19:27:09 +00:00
if r . raft == nil {
return false
}
return r . raft . State ( ) == raft . Leader
2015-07-17 17:50:06 +00:00
}
2015-11-10 15:20:19 +00:00
func ( r * localRaft ) isLocal ( ) bool {
return true
}
2015-07-17 17:17:15 +00:00
// remoteRaft is a consensus strategy that uses a remote raft cluster for
// consensus operations.
type remoteRaft struct {
store * Store
}
2015-07-23 21:53:39 +00:00
func ( r * remoteRaft ) remove ( ) error {
return nil
}
2015-07-17 17:17:15 +00:00
func ( r * remoteRaft ) updateMetaData ( ms * Data ) {
if ms == nil {
return
}
updated := false
r . store . mu . RLock ( )
if ms . Index > r . store . data . Index {
updated = true
}
r . store . mu . RUnlock ( )
if updated {
r . store . Logger . Printf ( "Updating metastore to term=%v index=%v" , ms . Term , ms . Index )
r . store . mu . Lock ( )
r . store . data = ms
2015-10-13 16:02:19 +00:00
// Signal any blocked goroutines that the meta store has been updated
r . store . notifyChanged ( )
2015-07-17 17:17:15 +00:00
r . store . mu . Unlock ( )
}
}
func ( r * remoteRaft ) invalidate ( ) error {
ms , err := r . store . rpc . fetchMetaData ( false )
if err != nil {
2015-10-06 16:20:13 +00:00
return fmt . Errorf ( "error fetching meta data: %s" , err )
2015-07-17 17:17:15 +00:00
}
r . updateMetaData ( ms )
return nil
}
2015-07-17 18:03:04 +00:00
func ( r * remoteRaft ) setPeers ( addrs [ ] string ) error {
2015-07-23 21:53:39 +00:00
// Convert to JSON
var buf bytes . Buffer
enc := json . NewEncoder ( & buf )
if err := enc . Encode ( addrs ) ; err != nil {
return err
}
// Write out as JSON
return ioutil . WriteFile ( filepath . Join ( r . store . path , "peers.json" ) , buf . Bytes ( ) , 0755 )
2015-07-17 18:03:04 +00:00
}
2015-07-17 18:09:07 +00:00
// addPeer adds addr to the list of peers in the cluster.
func ( r * remoteRaft ) addPeer ( addr string ) error {
return fmt . Errorf ( "cannot add peer using remote raft" )
}
2015-10-06 16:10:37 +00:00
// removePeer does nothing for remoteRaft.
func ( r * remoteRaft ) removePeer ( addr string ) error {
return nil
}
2015-07-23 21:53:39 +00:00
func ( r * remoteRaft ) peers ( ) ( [ ] string , error ) {
return readPeersJSON ( filepath . Join ( r . store . path , "peers.json" ) )
}
2015-07-23 16:49:43 +00:00
func ( r * remoteRaft ) open ( ) error {
2015-07-23 21:53:39 +00:00
if err := r . setPeers ( r . store . peers ) ; err != nil {
return err
}
2015-07-17 17:17:15 +00:00
go func ( ) {
for {
select {
case <- r . store . closing :
return
default :
}
ms , err := r . store . rpc . fetchMetaData ( true )
if err != nil {
r . store . Logger . Printf ( "fetch metastore: %v" , err )
time . Sleep ( time . Second )
continue
}
r . updateMetaData ( ms )
}
} ( )
return nil
}
func ( r * remoteRaft ) close ( ) error {
return nil
}
2015-07-17 18:39:13 +00:00
// apply applies a serialized command to the raft log.
func ( r * remoteRaft ) apply ( b [ ] byte ) error {
return fmt . Errorf ( "cannot apply log while in remote raft state" )
}
2015-07-17 17:17:15 +00:00
func ( r * remoteRaft ) initialize ( ) error {
return nil
}
func ( r * remoteRaft ) leader ( ) string {
if len ( r . store . peers ) == 0 {
return ""
}
return r . store . peers [ rand . Intn ( len ( r . store . peers ) ) ]
}
2015-07-17 17:50:06 +00:00
func ( r * remoteRaft ) isLeader ( ) bool {
return false
}
2015-11-10 15:20:19 +00:00
func ( r * remoteRaft ) isLocal ( ) bool {
return false
}
2015-07-17 18:34:39 +00:00
func ( r * remoteRaft ) lastIndex ( ) uint64 {
return r . store . cachedData ( ) . Index
}
2015-07-17 17:17:15 +00:00
func ( r * remoteRaft ) sync ( index uint64 , timeout time . Duration ) error {
//FIXME: jwilder: check index and timeout
return r . store . invalidate ( )
}
2015-07-17 18:43:42 +00:00
func ( r * remoteRaft ) snapshot ( ) error {
return fmt . Errorf ( "cannot snapshot while in remote raft state" )
}
2015-07-23 21:53:39 +00:00
func readPeersJSON ( path string ) ( [ ] string , error ) {
// Read the file
buf , err := ioutil . ReadFile ( path )
if err != nil && ! os . IsNotExist ( err ) {
return nil , err
}
// Check for no peers
if len ( buf ) == 0 {
return nil , nil
}
// Decode the peers
var peers [ ] string
dec := json . NewDecoder ( bytes . NewReader ( buf ) )
if err := dec . Decode ( & peers ) ; err != nil {
return nil , err
}
return peers , nil
}