2013-04-14 21:37:33 +00:00
package raft
import (
2013-04-16 02:47:59 +00:00
"bufio"
2013-08-02 00:58:03 +00:00
"code.google.com/p/goprotobuf/proto"
2013-04-14 21:37:33 +00:00
"errors"
"fmt"
2013-08-10 04:47:51 +00:00
"github.com/goraft/raft/protobuf"
2013-04-14 21:37:33 +00:00
"io"
"os"
2013-04-16 04:19:29 +00:00
"sync"
2013-04-14 21:37:33 +00:00
)
//------------------------------------------------------------------------------
//
// Typedefs
//
//------------------------------------------------------------------------------
// A log is a collection of log entries that are persisted to durable storage.
type Log struct {
2013-07-15 05:48:41 +00:00
ApplyFunc func ( Command ) ( interface { } , error )
file * os . File
path string
entries [ ] * LogEntry
results [ ] * logResult
commitIndex uint64
mutex sync . RWMutex
startIndex uint64 // the index before the first entry in the Log entries
startTerm uint64
2013-08-02 00:58:03 +00:00
pBuffer * proto . Buffer
pLogEntry * protobuf . ProtoLogEntry
2013-04-14 21:37:33 +00:00
}
2013-07-07 22:12:24 +00:00
// The results of the applying a log entry.
type logResult struct {
returnValue interface { }
2013-07-07 23:37:11 +00:00
err error
2013-07-07 22:12:24 +00:00
}
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Constructor
//
//------------------------------------------------------------------------------
// Creates a new log.
2013-07-06 04:49:47 +00:00
func newLog ( ) * Log {
2013-07-03 16:51:20 +00:00
return & Log {
2013-08-02 00:58:03 +00:00
entries : make ( [ ] * LogEntry , 0 ) ,
pBuffer : proto . NewBuffer ( nil ) ,
pLogEntry : & protobuf . ProtoLogEntry { } ,
2013-07-03 16:51:20 +00:00
}
2013-04-14 21:37:33 +00:00
}
2013-04-28 04:51:17 +00:00
//------------------------------------------------------------------------------
//
// Accessors
//
//------------------------------------------------------------------------------
//--------------------------------------
// Log Indices
//--------------------------------------
2013-07-07 22:12:24 +00:00
// The last committed index in the log.
func ( l * Log ) CommitIndex ( ) uint64 {
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
return l . commitIndex
}
2013-04-28 04:51:17 +00:00
// The current index in the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) currentIndex ( ) uint64 {
2013-07-07 22:12:24 +00:00
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-04-28 21:23:21 +00:00
2013-04-28 04:51:17 +00:00
if len ( l . entries ) == 0 {
2013-06-03 21:58:12 +00:00
return l . startIndex
2013-04-28 04:51:17 +00:00
}
2013-05-08 20:22:08 +00:00
return l . entries [ len ( l . entries ) - 1 ] . Index
2013-04-28 04:51:17 +00:00
}
2013-06-05 05:56:59 +00:00
// The current index in the log without locking
2013-06-05 00:02:45 +00:00
func ( l * Log ) internalCurrentIndex ( ) uint64 {
if len ( l . entries ) == 0 {
return l . startIndex
2013-04-28 04:51:17 +00:00
}
2013-05-08 20:22:08 +00:00
return l . entries [ len ( l . entries ) - 1 ] . Index
2013-04-28 04:51:17 +00:00
}
// The next index in the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) nextIndex ( ) uint64 {
return l . currentIndex ( ) + 1
2013-04-28 04:51:17 +00:00
}
2013-05-08 03:56:32 +00:00
// Determines if the log contains zero entries.
2013-07-06 04:49:47 +00:00
func ( l * Log ) isEmpty ( ) bool {
2013-07-07 22:12:24 +00:00
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-06-12 16:47:48 +00:00
return ( len ( l . entries ) == 0 ) && ( l . startIndex == 0 )
2013-05-08 03:56:32 +00:00
}
2013-06-03 19:13:38 +00:00
// The name of the last command in the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) lastCommandName ( ) string {
2013-07-07 22:12:24 +00:00
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-06-03 19:13:38 +00:00
if len ( l . entries ) > 0 {
2013-07-17 13:45:53 +00:00
if entry := l . entries [ len ( l . entries ) - 1 ] ; entry != nil {
return entry . CommandName
2013-06-03 19:13:38 +00:00
}
}
return ""
}
2013-04-28 04:51:17 +00:00
//--------------------------------------
// Log Terms
//--------------------------------------
// The current term in the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) currentTerm ( ) uint64 {
2013-07-07 22:12:24 +00:00
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-04-28 21:23:21 +00:00
2013-04-28 04:51:17 +00:00
if len ( l . entries ) == 0 {
2013-06-03 21:58:12 +00:00
return l . startTerm
2013-04-28 04:51:17 +00:00
}
2013-05-08 20:22:08 +00:00
return l . entries [ len ( l . entries ) - 1 ] . Term
2013-04-28 04:51:17 +00:00
}
2013-04-14 21:37:33 +00:00
//------------------------------------------------------------------------------
//
// Methods
//
//------------------------------------------------------------------------------
//--------------------------------------
// State
//--------------------------------------
// Opens the log file and reads existing entries. The log can remain open and
// continue to append entries to the end of the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) open ( path string ) error {
2013-04-14 21:37:33 +00:00
// Read all the entries from the log if one exists.
2013-07-22 21:55:05 +00:00
var readBytes int64
2013-07-21 02:07:16 +00:00
var err error
debugln ( "log.open.open " , path )
// open log file
l . file , err = os . OpenFile ( path , os . O_RDWR , 0600 )
l . path = path
2013-04-28 21:23:21 +00:00
2013-07-21 02:07:16 +00:00
if err != nil {
// if the log file does not exist before
// we create the log file and set commitIndex to 0
if os . IsNotExist ( err ) {
l . file , err = os . OpenFile ( path , os . O_WRONLY | os . O_CREATE , 0600 )
debugln ( "log.open.create " , path )
2013-04-16 02:47:59 +00:00
2013-07-22 21:29:54 +00:00
return err
2013-07-21 02:07:16 +00:00
}
return err
}
debugln ( "log.open.exist " , path )
2013-07-22 21:29:54 +00:00
2013-07-21 02:07:16 +00:00
// Read the file and decode entries.
for {
// Instantiate log entry and decode into it.
entry , _ := newLogEntry ( l , 0 , 0 , nil )
2013-07-23 22:30:14 +00:00
entry . Position , _ = l . file . Seek ( 0 , os . SEEK_CUR )
2013-07-25 00:14:21 +00:00
n , err := entry . decode ( l . file )
2013-07-21 02:07:16 +00:00
if err != nil {
2013-07-25 00:14:21 +00:00
if err == io . EOF {
debugln ( "open.log.append: finish " )
2013-07-25 00:21:30 +00:00
} else {
if err = os . Truncate ( path , readBytes ) ; err != nil {
return fmt . Errorf ( "raft.Log: Unable to recover: %v" , err )
}
2013-07-21 02:07:16 +00:00
}
2013-07-25 00:21:30 +00:00
break
2013-07-21 02:07:16 +00:00
}
2013-09-18 04:19:46 +00:00
if entry . Index > l . startIndex {
// Append entry.
l . entries = append ( l . entries , entry )
if entry . Index <= l . commitIndex {
command , err := newCommand ( entry . CommandName , entry . Command )
if err != nil {
continue
}
l . ApplyFunc ( command )
2013-08-13 21:31:19 +00:00
}
2013-09-18 04:19:46 +00:00
debugln ( "open.log.append log index " , entry . Index )
2013-08-13 21:31:19 +00:00
}
2013-07-21 02:07:16 +00:00
readBytes += int64 ( n )
2013-04-14 21:37:33 +00:00
}
2013-07-21 02:07:16 +00:00
l . results = make ( [ ] * logResult , len ( l . entries ) )
2013-08-06 02:33:07 +00:00
2013-07-21 02:07:16 +00:00
debugln ( "open.log.recovery number of log " , len ( l . entries ) )
2013-04-14 21:37:33 +00:00
return nil
}
// Closes the log file.
2013-07-06 04:49:47 +00:00
func ( l * Log ) close ( ) {
2013-04-16 04:19:29 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-14 21:37:33 +00:00
if l . file != nil {
l . file . Close ( )
l . file = nil
}
l . entries = make ( [ ] * LogEntry , 0 )
2013-07-07 22:12:24 +00:00
l . results = make ( [ ] * logResult , 0 )
2013-04-14 21:37:33 +00:00
}
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Entries
2013-04-14 21:37:33 +00:00
//--------------------------------------
2013-04-28 04:51:17 +00:00
// Creates a log entry associated with this log.
2013-07-17 13:45:53 +00:00
func ( l * Log ) createEntry ( term uint64 , command Command ) ( * LogEntry , error ) {
2013-07-06 04:49:47 +00:00
return newLogEntry ( l , l . nextIndex ( ) , term , command )
2013-04-28 04:51:17 +00:00
}
2013-07-06 19:41:42 +00:00
// Retrieves an entry from the log. If the entry has been eliminated because
// of a snapshot then nil is returned.
2013-07-18 23:29:06 +00:00
func ( l * Log ) getEntry ( index uint64 ) * LogEntry {
2013-07-17 00:43:12 +00:00
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-07-07 20:21:04 +00:00
2013-06-08 02:19:18 +00:00
if index <= l . startIndex || index > ( l . startIndex + uint64 ( len ( l . entries ) ) ) {
2013-07-06 19:41:42 +00:00
return nil
2013-04-30 04:13:50 +00:00
}
2013-07-17 00:43:12 +00:00
return l . entries [ index - l . startIndex - 1 ]
2013-07-06 19:41:42 +00:00
}
// Checks if the log contains a given index/term combination.
func ( l * Log ) containsEntry ( index uint64 , term uint64 ) bool {
2013-07-18 23:29:06 +00:00
entry := l . getEntry ( index )
2013-07-06 19:41:42 +00:00
return ( entry != nil && entry . Term == term )
2013-04-30 04:13:50 +00:00
}
2013-06-25 21:41:42 +00:00
// Retrieves a list of entries after a given index as well as the term of the
// index provided. A nil list of entries is returned if the index no longer
// exists because a snapshot was made.
2013-07-15 05:48:41 +00:00
func ( l * Log ) getEntriesAfter ( index uint64 , maxLogEntriesPerRequest uint64 ) ( [ ] * LogEntry , uint64 ) {
2013-06-25 21:41:42 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
// Return nil if index is before the start of the log.
if index < l . startIndex {
2013-07-06 19:41:42 +00:00
traceln ( "log.entriesAfter.before: " , index , " " , l . startIndex )
2013-06-25 21:41:42 +00:00
return nil , 0
}
2013-05-05 19:36:23 +00:00
// Return an error if the index doesn't exist.
2013-06-03 21:58:12 +00:00
if index > ( uint64 ( len ( l . entries ) ) + l . startIndex ) {
2013-06-27 17:22:49 +00:00
panic ( fmt . Sprintf ( "raft: Index is beyond end of log: %v %v" , len ( l . entries ) , index ) )
2013-05-05 19:36:23 +00:00
}
// If we're going from the beginning of the log then return the whole log.
2013-06-05 00:02:45 +00:00
if index == l . startIndex {
2013-07-06 19:41:42 +00:00
traceln ( "log.entriesAfter.beginning: " , index , " " , l . startIndex )
2013-06-05 00:02:45 +00:00
return l . entries , l . startTerm
2013-05-05 19:36:23 +00:00
}
2013-06-23 18:42:31 +00:00
2013-07-06 19:41:42 +00:00
traceln ( "log.entriesAfter.partial: " , index , " " , l . entries [ len ( l . entries ) - 1 ] . Index )
2013-06-23 18:42:31 +00:00
2013-07-15 04:38:42 +00:00
entries := l . entries [ index - l . startIndex : ]
length := len ( entries )
2013-09-18 04:19:46 +00:00
traceln ( "log.entriesAfter: startIndex:" , l . startIndex , " lenght" , len ( l . entries ) )
2013-07-15 05:48:41 +00:00
if uint64 ( length ) < maxLogEntriesPerRequest {
2013-07-15 04:38:42 +00:00
// Determine the term at the given entry and return a subslice.
return entries , l . entries [ index - 1 - l . startIndex ] . Term
} else {
2013-07-15 05:48:41 +00:00
return entries [ : maxLogEntriesPerRequest ] , l . entries [ index - 1 - l . startIndex ] . Term
2013-07-15 04:38:42 +00:00
}
2013-05-05 19:36:23 +00:00
}
2013-07-07 22:12:24 +00:00
// Retrieves the return value and error for an entry. The result can only exist
// after the entry has been committed.
func ( l * Log ) getEntryResult ( entry * LogEntry , clear bool ) ( interface { } , error ) {
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-06-08 02:19:18 +00:00
2013-06-05 17:57:31 +00:00
if entry == nil {
panic ( "raft: Log entry required for error retrieval" )
}
2013-08-03 02:00:11 +00:00
debugln ( "getEntryResult.result index: " , entry . Index - l . startIndex - 1 )
2013-07-07 22:12:24 +00:00
// If a result exists for the entry then return it with its error.
2013-08-03 02:00:11 +00:00
if entry . Index > l . startIndex && entry . Index <= l . startIndex + uint64 ( len ( l . results ) ) {
2013-07-17 00:40:19 +00:00
if result := l . results [ entry . Index - l . startIndex - 1 ] ; result != nil {
2013-07-08 17:56:26 +00:00
// keep the records before remove it
returnValue , err := result . returnValue , result . err
2013-07-07 22:12:24 +00:00
// Remove reference to result if it's being cleared after retrieval.
if clear {
result . returnValue = nil
}
2013-07-07 23:37:11 +00:00
2013-07-08 17:56:26 +00:00
return returnValue , err
2013-07-07 22:12:24 +00:00
}
2013-06-05 17:57:31 +00:00
}
2013-07-07 22:12:24 +00:00
return nil , nil
2013-06-05 17:57:31 +00:00
}
2013-04-28 22:36:46 +00:00
//--------------------------------------
// Commit
//--------------------------------------
// Retrieves the last index and term that has been committed to the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) commitInfo ( ) ( index uint64 , term uint64 ) {
2013-07-07 22:12:24 +00:00
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-08-03 02:00:11 +00:00
// If we don't have any committed entries then just return zeros.
2013-04-28 22:36:46 +00:00
if l . commitIndex == 0 {
return 0 , 0
}
2013-07-07 22:12:24 +00:00
// No new commit log after snapshot
2013-06-05 00:02:45 +00:00
if l . commitIndex == l . startIndex {
2013-06-03 21:58:12 +00:00
return l . startIndex , l . startTerm
}
2013-06-05 05:56:59 +00:00
2013-04-28 22:36:46 +00:00
// Return the last index & term from the last committed entry.
2013-08-03 02:00:11 +00:00
debugln ( "commitInfo.get.[" , l . commitIndex , "/" , l . startIndex , "]" )
2013-07-07 22:12:24 +00:00
entry := l . entries [ l . commitIndex - 1 - l . startIndex ]
return entry . Index , entry . Term
2013-04-28 22:36:46 +00:00
}
2013-06-23 18:42:31 +00:00
// Retrieves the last index and term that has been committed to the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) lastInfo ( ) ( index uint64 , term uint64 ) {
2013-07-07 22:12:24 +00:00
l . mutex . RLock ( )
defer l . mutex . RUnlock ( )
2013-06-23 18:42:31 +00:00
// If we don't have any entries then just return zeros.
if len ( l . entries ) == 0 {
return l . startIndex , l . startTerm
}
2013-06-24 16:52:51 +00:00
// Return the last index & term
2013-07-07 22:12:24 +00:00
entry := l . entries [ len ( l . entries ) - 1 ]
return entry . Index , entry . Term
2013-06-23 18:42:31 +00:00
}
2013-06-24 16:52:51 +00:00
// Updates the commit index
2013-07-06 04:49:47 +00:00
func ( l * Log ) updateCommitIndex ( index uint64 ) {
2013-06-05 00:02:45 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-09-18 04:19:46 +00:00
if index > l . commitIndex {
l . commitIndex = index
}
debugln ( "update.commit.index " , index )
2013-06-05 00:02:45 +00:00
}
2013-04-28 22:36:46 +00:00
// Updates the commit index and writes entries after that index to the stable storage.
2013-07-06 04:49:47 +00:00
func ( l * Log ) setCommitIndex ( index uint64 ) error {
2013-04-16 04:19:29 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-07-15 04:38:42 +00:00
// this is not error any more after limited the number of sending entries
// commit up to what we already have
2013-06-08 02:19:18 +00:00
if index > l . startIndex + uint64 ( len ( l . entries ) ) {
2013-07-15 04:38:42 +00:00
debugln ( "raft.Log: Commit index" , index , "set back to " , len ( l . entries ) )
index = l . startIndex + uint64 ( len ( l . entries ) )
2013-04-28 22:36:46 +00:00
}
2013-04-16 04:19:29 +00:00
2013-06-27 17:22:49 +00:00
// Do not allow previous indices to be committed again.
2013-06-28 23:58:31 +00:00
// This could happens, since the guarantee is that the new leader has up-to-dated
2013-06-27 17:22:49 +00:00
// log entires rather than has most up-to-dated committed index
// For example, Leader 1 send log 80 to follower 2 and follower 3
// follower 2 and follow 3 all got the new entries and reply
// leader 1 committed entry 80 and send reply to follower 2 and follower3
// follower 2 receive the new committed index and update committed index to 80
// leader 1 fail to send the committed index to follower 3
// follower 3 promote to leader (server 1 and server 2 will vote, since leader 3
2013-06-28 23:58:31 +00:00
// has up-to-dated the entries)
2013-06-27 17:22:49 +00:00
// when new leader 3 send heartbeat with committed index = 0 to follower 2,
// follower 2 should reply success and let leader 3 update the committed index to 80
if index < l . commitIndex {
return nil
}
2013-04-16 04:19:29 +00:00
// Find all entries whose index is between the previous index and the current index.
2013-04-28 22:36:46 +00:00
for i := l . commitIndex + 1 ; i <= index ; i ++ {
2013-06-06 03:32:52 +00:00
entryIndex := i - 1 - l . startIndex
2013-06-05 17:57:31 +00:00
entry := l . entries [ entryIndex ]
2013-05-28 18:46:27 +00:00
2013-04-28 22:36:46 +00:00
// Update commit index.
2013-05-08 20:22:08 +00:00
l . commitIndex = entry . Index
2013-06-05 17:57:31 +00:00
2013-07-17 13:45:53 +00:00
// Decode the command.
command , err := newCommand ( entry . CommandName , entry . Command )
if err != nil {
return err
}
2013-06-05 17:57:31 +00:00
// Apply the changes to the state machine and store the error code.
2013-07-17 13:45:53 +00:00
returnValue , err := l . ApplyFunc ( command )
2013-11-27 05:42:01 +00:00
debugf ( "setCommitIndex.set.result index: %v entires index: " , i , entryIndex )
2013-07-07 23:37:11 +00:00
l . results [ entryIndex ] = & logResult { returnValue : returnValue , err : err }
2013-04-16 04:19:29 +00:00
}
return nil
}
2013-07-22 22:01:32 +00:00
// Set the commitIndex at the head of the log file to the current
// commit Index. This should be called after obtained a log lock
2013-07-21 02:07:16 +00:00
func ( l * Log ) flushCommitIndex ( ) {
l . file . Seek ( 0 , os . SEEK_SET )
fmt . Fprintf ( l . file , "%8x\n" , l . commitIndex )
l . file . Seek ( 0 , os . SEEK_END )
}
2013-05-01 04:44:16 +00:00
//--------------------------------------
// Truncation
//--------------------------------------
// Truncates the log to the given index and term. This only works if the log
// at the index has not been committed.
2013-07-06 04:49:47 +00:00
func ( l * Log ) truncate ( index uint64 , term uint64 ) error {
2013-05-01 04:44:16 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-07-06 19:41:42 +00:00
debugln ( "log.truncate: " , index )
2013-05-01 04:44:16 +00:00
// Do not allow committed entries to be truncated.
2013-07-06 04:49:47 +00:00
if index < l . commitIndex {
2013-07-06 19:41:42 +00:00
debugln ( "log.truncate.before" )
2013-07-06 04:49:47 +00:00
return fmt . Errorf ( "raft.Log: Index is already committed (%v): (IDX=%v, TERM=%v)" , l . commitIndex , index , term )
2013-05-01 04:44:16 +00:00
}
// Do not truncate past end of entries.
2013-06-08 02:19:18 +00:00
if index > l . startIndex + uint64 ( len ( l . entries ) ) {
2013-07-06 19:41:42 +00:00
debugln ( "log.truncate.after" )
2013-05-01 04:44:16 +00:00
return fmt . Errorf ( "raft.Log: Entry index does not exist (MAX=%v): (IDX=%v, TERM=%v)" , len ( l . entries ) , index , term )
}
// If we're truncating everything then just clear the entries.
2013-06-05 00:02:45 +00:00
if index == l . startIndex {
2013-07-21 02:07:16 +00:00
debugln ( "log.truncate.clear" )
2013-07-23 22:30:14 +00:00
l . file . Truncate ( 0 )
l . file . Seek ( 0 , os . SEEK_SET )
2013-05-01 04:44:16 +00:00
l . entries = [ ] * LogEntry { }
} else {
// Do not truncate if the entry at index does not have the matching term.
2013-06-08 02:19:18 +00:00
entry := l . entries [ index - l . startIndex - 1 ]
2013-05-08 20:22:08 +00:00
if len ( l . entries ) > 0 && entry . Term != term {
2013-07-06 19:41:42 +00:00
debugln ( "log.truncate.termMismatch" )
2013-05-08 20:22:08 +00:00
return fmt . Errorf ( "raft.Log: Entry at index does not have matching term (%v): (IDX=%v, TERM=%v)" , entry . Term , index , term )
2013-05-01 04:44:16 +00:00
}
// Otherwise truncate up to the desired entry.
2013-06-08 02:19:18 +00:00
if index < l . startIndex + uint64 ( len ( l . entries ) ) {
2013-07-06 19:41:42 +00:00
debugln ( "log.truncate.finish" )
2013-07-21 02:07:16 +00:00
position := l . entries [ index - l . startIndex ] . Position
l . file . Truncate ( position )
l . file . Seek ( position , os . SEEK_SET )
2013-06-08 02:19:18 +00:00
l . entries = l . entries [ 0 : index - l . startIndex ]
2013-05-01 04:44:16 +00:00
}
}
2013-05-01 05:11:23 +00:00
2013-05-01 04:44:16 +00:00
return nil
}
2013-04-16 04:19:29 +00:00
//--------------------------------------
// Append
//--------------------------------------
2013-10-22 21:55:51 +00:00
// Appends a series of entries to the log.
2013-07-06 04:49:47 +00:00
func ( l * Log ) appendEntries ( entries [ ] * LogEntry ) error {
2013-04-16 04:19:29 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-07-24 18:03:20 +00:00
startPosition , _ := l . file . Seek ( 0 , os . SEEK_CUR )
w := bufio . NewWriter ( l . file )
var size int64
var err error
2013-04-30 04:13:50 +00:00
// Append each entry but exit if we hit an error.
for _ , entry := range entries {
2013-08-02 00:58:03 +00:00
entry . log = l
2013-07-24 18:43:33 +00:00
if size , err = l . writeEntry ( entry , w ) ; err != nil {
2013-04-30 04:13:50 +00:00
return err
}
2013-07-24 18:43:33 +00:00
entry . Position = startPosition
2013-07-24 18:03:20 +00:00
startPosition += size
2013-04-30 04:13:50 +00:00
}
2013-07-24 18:03:20 +00:00
w . Flush ( )
2013-10-22 21:55:51 +00:00
err = l . file . Sync ( )
if err != nil {
panic ( err )
}
2013-04-30 04:13:50 +00:00
return nil
}
2013-10-22 21:55:51 +00:00
// Writes a single log entry to the end of the log.
2013-04-30 04:13:50 +00:00
func ( l * Log ) appendEntry ( entry * LogEntry ) error {
2013-10-22 21:55:51 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-04-14 21:37:33 +00:00
if l . file == nil {
return errors . New ( "raft.Log: Log is not open" )
}
// Make sure the term and index are greater than the previous.
if len ( l . entries ) > 0 {
lastEntry := l . entries [ len ( l . entries ) - 1 ]
2013-05-08 20:22:08 +00:00
if entry . Term < lastEntry . Term {
return fmt . Errorf ( "raft.Log: Cannot append entry with earlier term (%x:%x <= %x:%x)" , entry . Term , entry . Index , lastEntry . Term , lastEntry . Index )
2013-05-10 14:47:24 +00:00
} else if entry . Term == lastEntry . Term && entry . Index <= lastEntry . Index {
2013-05-08 20:22:08 +00:00
return fmt . Errorf ( "raft.Log: Cannot append entry with earlier index in the same term (%x:%x <= %x:%x)" , entry . Term , entry . Index , lastEntry . Term , lastEntry . Index )
2013-04-14 21:37:33 +00:00
}
}
2013-04-28 21:23:21 +00:00
2013-07-21 02:07:16 +00:00
position , _ := l . file . Seek ( 0 , os . SEEK_CUR )
entry . Position = position
// Write to storage.
if _ , err := entry . encode ( l . file ) ; err != nil {
return err
}
2013-04-14 21:37:33 +00:00
// Append to entries list if stored on disk.
l . entries = append ( l . entries , entry )
2013-07-07 22:12:24 +00:00
l . results = append ( l . results , nil )
2013-04-14 21:37:33 +00:00
return nil
}
2013-06-03 21:58:12 +00:00
2013-07-24 18:03:20 +00:00
// appendEntry with Buffered io
2013-07-24 18:43:33 +00:00
func ( l * Log ) writeEntry ( entry * LogEntry , w io . Writer ) ( int64 , error ) {
2013-07-24 18:03:20 +00:00
if l . file == nil {
return - 1 , errors . New ( "raft.Log: Log is not open" )
}
// Make sure the term and index are greater than the previous.
if len ( l . entries ) > 0 {
lastEntry := l . entries [ len ( l . entries ) - 1 ]
if entry . Term < lastEntry . Term {
return - 1 , fmt . Errorf ( "raft.Log: Cannot append entry with earlier term (%x:%x <= %x:%x)" , entry . Term , entry . Index , lastEntry . Term , lastEntry . Index )
} else if entry . Term == lastEntry . Term && entry . Index <= lastEntry . Index {
return - 1 , fmt . Errorf ( "raft.Log: Cannot append entry with earlier index in the same term (%x:%x <= %x:%x)" , entry . Term , entry . Index , lastEntry . Term , lastEntry . Index )
}
}
// Write to storage.
2013-07-24 18:43:33 +00:00
size , err := entry . encode ( w )
if err != nil {
2013-07-24 18:03:20 +00:00
return - 1 , err
}
// Append to entries list if stored on disk.
l . entries = append ( l . entries , entry )
l . results = append ( l . results , nil )
return int64 ( size ) , nil
}
2013-06-03 21:58:12 +00:00
//--------------------------------------
// Log compaction
//--------------------------------------
2013-08-06 02:33:07 +00:00
// compact the log before index (including index)
2013-07-06 04:49:47 +00:00
func ( l * Log ) compact ( index uint64 , term uint64 ) error {
2013-06-03 21:58:12 +00:00
var entries [ ] * LogEntry
2013-08-03 02:00:11 +00:00
var results [ ] * logResult
2013-06-03 21:58:12 +00:00
l . mutex . Lock ( )
defer l . mutex . Unlock ( )
2013-06-05 05:56:59 +00:00
2013-08-06 02:33:07 +00:00
if index == 0 {
return nil
}
2013-06-05 05:56:59 +00:00
// nothing to compaction
2013-06-24 16:52:51 +00:00
// the index may be greater than the current index if
2013-06-05 05:56:59 +00:00
// we just recovery from on snapshot
2013-06-05 00:02:45 +00:00
if index >= l . internalCurrentIndex ( ) {
2013-06-03 21:58:12 +00:00
entries = make ( [ ] * LogEntry , 0 )
2013-08-03 02:00:11 +00:00
results = make ( [ ] * logResult , 0 )
2013-06-03 21:58:12 +00:00
} else {
// get all log entries after index
2013-06-08 02:19:18 +00:00
entries = l . entries [ index - l . startIndex : ]
2013-08-03 02:00:11 +00:00
results = l . results [ index - l . startIndex : ]
2013-06-03 21:58:12 +00:00
}
2013-06-05 05:56:59 +00:00
2013-06-03 21:58:12 +00:00
// create a new log file and add all the entries
2013-06-08 02:19:18 +00:00
file , err := os . OpenFile ( l . path + ".new" , os . O_APPEND | os . O_CREATE | os . O_WRONLY , 0600 )
2013-06-03 21:58:12 +00:00
if err != nil {
return err
}
2013-06-05 00:02:45 +00:00
for _ , entry := range entries {
2013-07-21 02:07:16 +00:00
position , _ := l . file . Seek ( 0 , os . SEEK_CUR )
entry . Position = position
2013-07-17 13:45:53 +00:00
if _ , err = entry . encode ( file ) ; err != nil {
2013-06-08 02:19:18 +00:00
return err
}
}
2013-06-03 21:58:12 +00:00
// close the current log file
l . file . Close ( )
// remove the current log file to .bak
2013-06-05 17:38:49 +00:00
err = os . Remove ( l . path )
if err != nil {
return err
}
2013-06-03 21:58:12 +00:00
// rename the new log file
2013-06-08 02:19:18 +00:00
err = os . Rename ( l . path + ".new" , l . path )
2013-06-05 17:38:49 +00:00
if err != nil {
return err
}
2013-06-03 21:58:12 +00:00
l . file = file
// compaction the in memory log
l . entries = entries
2013-08-03 02:00:11 +00:00
l . results = results
2013-06-03 21:58:12 +00:00
l . startIndex = index
2013-06-05 00:02:45 +00:00
l . startTerm = term
2013-06-03 21:58:12 +00:00
return nil
}