2013-04-14 21:37:33 +00:00
package raft
import (
2013-04-16 02:47:59 +00:00
"bufio"
2013-04-14 21:37:33 +00:00
"errors"
"fmt"
"io"
"os"
2013-04-16 04:19:29 +00:00
"sync"
2013-04-14 21:37:33 +00:00
)
//------------------------------------------------------------------------------
//
// Typedefs
//
//------------------------------------------------------------------------------
// A log is a collection of log entries that are persisted to durable storage.
type Log struct {
2013-05-28 18:46:27 +00:00
ApplyFunc func ( Command ) error
2013-05-10 14:47:24 +00:00
file * os . File
2013-06-03 21:58:12 +00:00
path string
2013-05-10 14:47:24 +00:00
entries [ ] * LogEntry
commitIndex uint64
mutex sync . Mutex
2013-06-03 21:58:12 +00:00
startIndex uint64 // the index before the first entry in the Log entries
startTerm uint64
2013-04-14 21:37:33 +00:00
}
//------------------------------------------------------------------------------
//
// Constructor
//
//------------------------------------------------------------------------------
// Creates a new log.
func NewLog ( ) * Log {
2013-05-10 03:50:57 +00:00
return & Log { }
2013-04-14 21:37:33 +00:00
}
2013-04-28 04:51:17 +00:00
//------------------------------------------------------------------------------
//
// Accessors
//
//------------------------------------------------------------------------------
2013-06-03 21:58:12 +00:00
func ( l * Log ) SetStartIndex ( i uint64 ) {
l . startIndex = i
}
func ( l * Log ) StartIndex ( ) uint64 {
return l . startIndex
}
func ( l * Log ) SetStartTerm ( t uint64 ) {
2013-06-05 00:02:45 +00:00
l . startTerm = t
2013-06-03 21:58:12 +00:00
}
2013-04-28 04:51:17 +00:00
//--------------------------------------
// Log Indices
//--------------------------------------
// The current index in the log.
func ( l * Log ) CurrentIndex ( ) uint64 {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-28 21:23:21 +00:00
2013-04-28 04:51:17 +00:00
if len ( l . entries ) == 0 {
2013-06-03 21:58:12 +00:00
return l . startIndex
2013-04-28 04:51:17 +00:00
}
2013-05-08 20:22:08 +00:00
return l . entries [ len ( l . entries ) - 1 ] . Index
2013-04-28 04:51:17 +00:00
}
2013-06-05 00:02:45 +00:00
func ( l * Log ) internalCurrentIndex ( ) uint64 {
if len ( l . entries ) == 0 {
return l . startIndex
}
return l . entries [ len ( l . entries ) - 1 ] . Index
}
2013-04-28 04:51:17 +00:00
// The next index in the log.
func ( l * Log ) NextIndex ( ) uint64 {
return l . CurrentIndex ( ) + 1
}
// The last committed index in the log.
func ( l * Log ) CommitIndex ( ) uint64 {
return l . commitIndex
}
2013-05-08 03:56:32 +00:00
// Determines if the log contains zero entries.
func ( l * Log ) IsEmpty ( ) bool {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
return ( len ( l . entries ) == 0 )
}
2013-06-03 19:13:38 +00:00
// The name of the last command in the log.
func ( l * Log ) LastCommandName ( ) string {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
if len ( l . entries ) > 0 {
if command := l . entries [ len ( l . entries ) - 1 ] . Command ; command != nil {
return command . CommandName ( )
}
}
return ""
}
2013-04-28 04:51:17 +00:00
//--------------------------------------
// Log Terms
//--------------------------------------
// The current term in the log.
func ( l * Log ) CurrentTerm ( ) uint64 {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-28 21:23:21 +00:00
2013-04-28 04:51:17 +00:00
if len ( l . entries ) == 0 {
2013-06-03 21:58:12 +00:00
return l . startTerm
2013-04-28 04:51:17 +00:00
}
2013-05-08 20:22:08 +00:00
return l . entries [ len ( l . entries ) - 1 ] . Term
2013-04-28 04:51:17 +00:00
}
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Methods
//
//------------------------------------------------------------------------------
//--------------------------------------
// State
//--------------------------------------
// Opens the log file and reads existing entries. The log can remain open and
// continue to append entries to the end of the log.
func ( l * Log ) Open ( path string ) error {
2013-04-16 04:19:29 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-28 21:23:21 +00:00
2013-04-14 21:37:33 +00:00
// Read all the entries from the log if one exists.
2013-04-16 04:02:08 +00:00
var lastIndex int = 0
2013-04-14 21:37:33 +00:00
if _ , err := os . Stat ( path ) ; ! os . IsNotExist ( err ) {
// Open the log file.
file , err := os . Open ( path )
if err != nil {
return err
}
defer file . Close ( )
2013-04-16 02:47:59 +00:00
reader := bufio . NewReader ( file )
2013-04-28 21:23:21 +00:00
2013-04-14 21:37:33 +00:00
// Read the file and decode entries.
2013-04-16 02:47:59 +00:00
for {
if _ , err := reader . Peek ( 1 ) ; err == io . EOF {
break
}
2013-04-14 21:37:33 +00:00
// Instantiate log entry and decode into it.
entry := NewLogEntry ( l , 0 , 0 , nil )
2013-04-16 04:02:08 +00:00
n , err := entry . Decode ( reader )
2013-04-16 02:47:59 +00:00
if err != nil {
2013-04-16 04:02:08 +00:00
file . Close ( )
if err = os . Truncate ( path , int64 ( lastIndex ) ) ; err != nil {
return fmt . Errorf ( "raft.Log: Unable to recover: %v" , err )
}
break
2013-04-14 21:37:33 +00:00
}
2013-05-28 18:46:27 +00:00
// Apply the command.
if err = l . ApplyFunc ( entry . Command ) ; err != nil {
file . Close ( )
return err
}
2013-04-14 21:37:33 +00:00
// Append entry.
l . entries = append ( l . entries , entry )
2013-05-10 03:50:57 +00:00
2013-05-28 18:46:27 +00:00
l . commitIndex = entry . Index
lastIndex += n
2013-04-14 21:37:33 +00:00
}
2013-04-16 02:47:59 +00:00
file . Close ( )
2013-04-14 21:37:33 +00:00
}
// Open the file for appending.
var err error
2013-04-28 21:23:21 +00:00
l . file , err = os . OpenFile ( path , os . O_APPEND | os . O_CREATE | os . O_WRONLY , 0600 )
2013-04-14 21:37:33 +00:00
if err != nil {
return err
}
2013-06-03 21:58:12 +00:00
l . path = path
2013-04-14 21:37:33 +00:00
return nil
}
// Closes the log file.
func ( l * Log ) Close ( ) {
2013-04-16 04:19:29 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-14 21:37:33 +00:00
if l . file != nil {
l . file . Close ( )
l . file = nil
}
l . entries = make ( [ ] * LogEntry , 0 )
}
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Entries
2013-04-14 21:37:33 +00:00
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Creates a log entry associated with this log.
2013-04-28 21:23:21 +00:00
func ( l * Log ) CreateEntry ( term uint64 , command Command ) * LogEntry {
2013-04-28 04:51:17 +00:00
return NewLogEntry ( l , l . NextIndex ( ) , term , command )
}
2013-04-30 04:13:50 +00:00
// Checks if the log contains a given index/term combination.
func ( l * Log ) ContainsEntry ( index uint64 , term uint64 ) bool {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-06-03 21:58:12 +00:00
if index <= l . startIndex || index > ( l . startIndex + uint64 ( len ( l . entries ) ) ) {
2013-04-30 04:13:50 +00:00
return false
}
2013-05-08 20:22:08 +00:00
return ( l . entries [ index - 1 ] . Term == term )
2013-04-30 04:13:50 +00:00
}
2013-05-05 19:36:23 +00:00
// Retrieves a list of entries after a given index. This function also returns
// the term of the index provided.
func ( l * Log ) GetEntriesAfter ( index uint64 ) ( [ ] * LogEntry , uint64 ) {
2013-06-05 00:02:45 +00:00
// l.mutex.Lock()
// defer l.mutex.Unlock()
2013-05-05 19:36:23 +00:00
// Return an error if the index doesn't exist.
2013-06-03 21:58:12 +00:00
if index > ( uint64 ( len ( l . entries ) ) + l . startIndex ) {
2013-05-05 19:36:23 +00:00
panic ( fmt . Sprintf ( "raft.Log: Index is beyond end of log: %v" , index ) )
}
2013-06-05 00:02:45 +00:00
fmt . Println ( "getEA" , index , l . startIndex )
2013-05-05 19:36:23 +00:00
// If we're going from the beginning of the log then return the whole log.
2013-06-05 00:02:45 +00:00
if index == l . startIndex {
fmt . Println ( len ( l . entries ) )
if len ( l . entries ) > 0 {
fmt . Println ( l . entries [ 0 ] . Index )
}
return l . entries , l . startTerm
2013-05-05 19:36:23 +00:00
}
// Determine the term at the given entry and return a subslice.
2013-06-05 00:02:45 +00:00
term := l . entries [ index - 1 - l . startIndex ] . Term
2013-06-03 21:58:12 +00:00
return l . entries [ index - l . startIndex : ] , term
2013-05-05 19:36:23 +00:00
}
2013-04-28 22:36:46 +00:00
//--------------------------------------
// Commit
//--------------------------------------
// Retrieves the last index and term that has been committed to the log.
func ( l * Log ) CommitInfo ( ) ( index uint64 , term uint64 ) {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
// If we don't have any entries then just return zeros.
if l . commitIndex == 0 {
2013-06-05 00:02:45 +00:00
fmt . Println ( "commitinfo: zero" )
2013-04-28 22:36:46 +00:00
return 0 , 0
}
2013-06-03 21:58:12 +00:00
// just after snapshot
2013-06-05 00:02:45 +00:00
if l . commitIndex == l . startIndex {
fmt . Println ( "commitinfo: " , l . startIndex , " " , l . startTerm )
2013-06-03 21:58:12 +00:00
return l . startIndex , l . startTerm
}
2013-06-05 00:02:45 +00:00
fmt . Println ( l . entries )
fmt . Println ( l . commitIndex , " " , l . startIndex )
2013-04-28 22:36:46 +00:00
// Return the last index & term from the last committed entry.
2013-06-03 21:58:12 +00:00
lastCommitEntry := l . entries [ l . commitIndex - 1 - l . startIndex ]
2013-05-08 20:22:08 +00:00
return lastCommitEntry . Index , lastCommitEntry . Term
2013-04-28 22:36:46 +00:00
}
2013-06-05 00:02:45 +00:00
func ( l * Log ) UpdateCommitIndex ( index uint64 ) {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
l . commitIndex = index
}
2013-04-28 22:36:46 +00:00
// Updates the commit index and writes entries after that index to the stable storage.
2013-04-16 04:19:29 +00:00
func ( l * Log ) SetCommitIndex ( index uint64 ) error {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-28 22:36:46 +00:00
// Panic if we don't have any way to apply commands.
if l . ApplyFunc == nil {
panic ( "raft.Log: Apply function not set" )
}
2013-04-16 04:19:29 +00:00
// Do not allow previous indices to be committed again.
if index < l . commitIndex {
return fmt . Errorf ( "raft.Log: Commit index (%d) ahead of requested commit index (%d)" , l . commitIndex , index )
}
2013-06-05 00:02:45 +00:00
if index > l . startIndex + uint64 ( len ( l . entries ) ) {
2013-04-28 22:36:46 +00:00
return fmt . Errorf ( "raft.Log: Commit index (%d) out of range (%d)" , index , len ( l . entries ) )
}
2013-04-16 04:19:29 +00:00
// Find all entries whose index is between the previous index and the current index.
2013-04-28 22:36:46 +00:00
for i := l . commitIndex + 1 ; i <= index ; i ++ {
2013-06-03 21:58:12 +00:00
entry := l . entries [ i - 1 - l . startIndex ]
2013-04-28 21:23:21 +00:00
2013-05-28 18:46:27 +00:00
// Apply the changes to the state machine.
if err := l . ApplyFunc ( entry . Command ) ; err != nil {
return err
}
2013-04-28 22:36:46 +00:00
// Write to storage.
if err := entry . Encode ( l . file ) ; err != nil {
return err
2013-04-16 04:19:29 +00:00
}
2013-04-28 22:36:46 +00:00
// Update commit index.
2013-05-08 20:22:08 +00:00
l . commitIndex = entry . Index
2013-04-16 04:19:29 +00:00
}
2013-04-28 21:23:21 +00:00
2013-04-16 04:19:29 +00:00
return nil
}
2013-05-01 04:44:16 +00:00
//--------------------------------------
// Truncation
//--------------------------------------
// Truncates the log to the given index and term. This only works if the log
// at the index has not been committed.
func ( l * Log ) Truncate ( index uint64 , term uint64 ) error {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-05-01 05:11:23 +00:00
2013-05-01 04:44:16 +00:00
// Do not allow committed entries to be truncated.
if index < l . CommitIndex ( ) {
2013-06-05 00:02:45 +00:00
fmt . Printf ( "raft.Log: Index is already committed (%v): (IDX=%v, TERM=%v)\n" , l . CommitIndex ( ) , index , term )
2013-05-01 04:44:16 +00:00
return fmt . Errorf ( "raft.Log: Index is already committed (%v): (IDX=%v, TERM=%v)" , l . CommitIndex ( ) , index , term )
}
// Do not truncate past end of entries.
2013-06-05 00:02:45 +00:00
if index > l . startIndex + uint64 ( len ( l . entries ) ) {
fmt . Printf ( "raft.Log: Entry index does not exist (MAX=%v): (IDX=%v, TERM=%v)\n" , len ( l . entries ) , index , term )
2013-05-01 04:44:16 +00:00
return fmt . Errorf ( "raft.Log: Entry index does not exist (MAX=%v): (IDX=%v, TERM=%v)" , len ( l . entries ) , index , term )
}
// If we're truncating everything then just clear the entries.
2013-06-05 00:02:45 +00:00
if index == l . startIndex {
2013-05-01 04:44:16 +00:00
l . entries = [ ] * LogEntry { }
} else {
// Do not truncate if the entry at index does not have the matching term.
2013-06-03 21:58:12 +00:00
entry := l . entries [ index - l . startIndex - 1 ]
2013-05-08 20:22:08 +00:00
if len ( l . entries ) > 0 && entry . Term != term {
2013-06-05 00:02:45 +00:00
fmt . Printf ( "raft.Log: Entry at index does not have matching term (%v): (IDX=%v, TERM=%v)\n" , entry . Term , index , term )
2013-05-08 20:22:08 +00:00
return fmt . Errorf ( "raft.Log: Entry at index does not have matching term (%v): (IDX=%v, TERM=%v)" , entry . Term , index , term )
2013-05-01 04:44:16 +00:00
}
// Otherwise truncate up to the desired entry.
2013-06-05 00:02:45 +00:00
if index < l . startIndex + uint64 ( len ( l . entries ) ) {
2013-06-03 21:58:12 +00:00
l . entries = l . entries [ 0 : index - l . startIndex ]
2013-05-01 04:44:16 +00:00
}
}
2013-05-01 05:11:23 +00:00
2013-05-01 04:44:16 +00:00
return nil
}
2013-04-16 04:19:29 +00:00
//--------------------------------------
// Append
//--------------------------------------
2013-04-30 04:13:50 +00:00
// Appends a series of entries to the log. These entries are not written to
// disk until SetCommitIndex() is called.
func ( l * Log ) AppendEntries ( entries [ ] * LogEntry ) error {
2013-04-16 04:19:29 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-30 04:13:50 +00:00
// Append each entry but exit if we hit an error.
for _ , entry := range entries {
if err := l . appendEntry ( entry ) ; err != nil {
return err
}
}
return nil
}
// Appends a single entry to the log.
func ( l * Log ) AppendEntry ( entry * LogEntry ) error {
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
return l . appendEntry ( entry )
}
// Writes a single log entry to the end of the log. This function does not
// obtain a lock and should only be used internally. Use AppendEntries() and
// AppendEntry() to use it externally.
func ( l * Log ) appendEntry ( entry * LogEntry ) error {
2013-04-14 21:37:33 +00:00
if l . file == nil {
return errors . New ( "raft.Log: Log is not open" )
}
// Make sure the term and index are greater than the previous.
if len ( l . entries ) > 0 {
lastEntry := l . entries [ len ( l . entries ) - 1 ]
2013-05-08 20:22:08 +00:00
if entry . Term < lastEntry . Term {
return fmt . Errorf ( "raft.Log: Cannot append entry with earlier term (%x:%x <= %x:%x)" , entry . Term , entry . Index , lastEntry . Term , lastEntry . Index )
2013-05-10 14:47:24 +00:00
} else if entry . Term == lastEntry . Term && entry . Index <= lastEntry . Index {
2013-06-05 00:02:45 +00:00
fmt . Printf ( "raft.Log: Cannot append entry with earlier index in the same term (%x:%x <= %x:%x)" , entry . Term , entry . Index , lastEntry . Term , lastEntry . Index )
2013-05-08 20:22:08 +00:00
return fmt . Errorf ( "raft.Log: Cannot append entry with earlier index in the same term (%x:%x <= %x:%x)" , entry . Term , entry . Index , lastEntry . Term , lastEntry . Index )
2013-04-14 21:37:33 +00:00
}
}
2013-04-28 21:23:21 +00:00
2013-04-14 21:37:33 +00:00
// Append to entries list if stored on disk.
l . entries = append ( l . entries , entry )
return nil
}
2013-06-03 21:58:12 +00:00
//--------------------------------------
// Log compaction
//--------------------------------------
2013-06-05 00:02:45 +00:00
func ( l * Log ) Compaction ( index uint64 , term uint64 ) error {
2013-06-03 21:58:12 +00:00
var entries [ ] * LogEntry
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-06-05 00:02:45 +00:00
fmt . Println ( "Compaction: " , index , " " , l . internalCurrentIndex ( ) , " " , l . startIndex )
2013-06-03 21:58:12 +00:00
// recovery from a newer snapshot
2013-06-05 00:02:45 +00:00
if index >= l . internalCurrentIndex ( ) {
2013-06-03 21:58:12 +00:00
entries = make ( [ ] * LogEntry , 0 )
} else {
// get all log entries after index
entries = l . entries [ index - l . startIndex : ]
}
// create a new log file and add all the entries
file , err := os . OpenFile ( l . path + ".new" , os . O_APPEND | os . O_CREATE | os . O_WRONLY , 0600 )
if err != nil {
return err
}
2013-06-05 00:02:45 +00:00
for _ , entry := range entries {
2013-06-03 21:58:12 +00:00
err = entry . Encode ( file )
if err != nil {
return err
}
}
// close the current log file
l . file . Close ( )
// remove the current log file to .bak
2013-06-05 00:02:45 +00:00
os . Rename ( l . path , l . path + "." + string ( l . commitIndex ) )
2013-06-03 21:58:12 +00:00
// rename the new log file
os . Rename ( l . path + ".new" , l . path )
l . file = file
// compaction the in memory log
l . entries = entries
l . startIndex = index
2013-06-05 00:02:45 +00:00
l . startTerm = term
fmt . Println ( "Compaction: " , len ( l . entries ) )
2013-06-03 21:58:12 +00:00
return nil
}