596 lines
17 KiB
596 lines
17 KiB
package wal
import (
logger "code.google.com/p/log4go"
type WAL struct {
state *GlobalState
config *configuration.Configuration
logFiles []*log
logIndex []*index
serverId uint32
nextLogFileSuffix uint32
entries chan interface{}
// counters to force index creation, bookmark and flushing
requestsSinceLastFlush int
requestsSinceLastBookmark int
requestsSinceLastIndex int
requestsSinceRotation int
const HOST_ID_OFFSET = uint64(10000)
func NewWAL(config *configuration.Configuration) (*WAL, error) {
if config.WalDir == "" {
return nil, fmt.Errorf("wal directory cannot be empty")
logger.Info("Opening wal in %s", config.WalDir)
_, err := os.Stat(config.WalDir)
if os.IsNotExist(err) {
err = os.MkdirAll(config.WalDir, 0755)
if err != nil {
return nil, err
dir, err := os.Open(config.WalDir)
if err != nil {
return nil, err
names, err := dir.Readdirnames(-1)
if err != nil {
return nil, err
state, err := newGlobalState(path.Join(config.WalDir, "bookmark"))
if err != nil {
logger.Error("Cannot open global state. Error: %s", err)
return nil, err
// sort the logfiles by the first request number in the log
wal := &WAL{
config: config,
logFiles: []*log{},
logIndex: []*index{},
state: state,
entries: make(chan interface{}, 10),
for _, name := range names {
if !strings.HasPrefix(name, "log.") {
log, _, err := wal.openLog(path.Join(config.WalDir, name))
if err != nil {
return nil, err
if suffix := log.suffix(); suffix > wal.nextLogFileSuffix {
wal.nextLogFileSuffix = suffix
// sort the log files by suffix first
sort.Sort(sortableLogSlice{wal.logFiles, wal.logIndex})
for idx, logFile := range wal.logFiles {
logger.Debug("suffix: %d, first suffix: %d", logFile.suffix(), wal.state.FirstSuffix)
if logFile.suffix() < wal.state.FirstSuffix {
wal.logFiles = append(wal.logFiles[idx:], wal.logFiles[:idx]...)
wal.logIndex = append(wal.logIndex[idx:], wal.logIndex[:idx]...)
wal.state.FirstSuffix = logFile.suffix()
go wal.processEntries()
return wal, err
func (self *WAL) SetServerId(id uint32) {
logger.Info("Setting server id to %d and recovering", id)
self.serverId = id
if err := self.recover(); err != nil {
// Marks a given request for a given server as committed
func (self *WAL) Commit(requestNumber uint32, serverId uint32) error {
confirmationChan := make(chan *confirmation)
self.entries <- &commitEntry{confirmationChan, serverId, requestNumber}
confirmation := <-confirmationChan
return confirmation.err
func (self *WAL) RecoverServerFromLastCommit(serverId uint32, shardIds []uint32, yield func(request *protocol.Request, shardId uint32) error) error {
requestNumber, ok := self.state.ServerLastRequestNumber[serverId]
requestNumber += 1
if !ok {
requestNumber = uint32(self.state.FirstSuffix)
logger.Info("Recovering server %d from request %d", serverId, requestNumber)
return self.RecoverServerFromRequestNumber(requestNumber, shardIds, yield)
func (self *WAL) isInRange(requestNumber uint32) bool {
rn := requestNumber
largestRequestNumber := self.state.LargestRequestNumber
if self.state.FirstSuffix > largestRequestNumber {
return rn <= largestRequestNumber || rn >= self.state.FirstSuffix
return rn >= self.state.FirstSuffix && rn <= largestRequestNumber
// In the case where this server is running and another one in the
// cluster stops responding, at some point this server will have to
// just write requests to disk. When the downed server comes back up,
// it's this server's responsibility to send out any writes that were
// queued up. If the yield function returns nil then the request is
// committed.
func (self *WAL) RecoverServerFromRequestNumber(requestNumber uint32, shardIds []uint32, yield func(request *protocol.Request, shardId uint32) error) error {
// don't replay if we don't have any log files yet
if len(self.logFiles) == 0 {
return nil
firstIndex := 0
firstOffset := int64(-1)
// find the log file from which replay will start if the request
// number is in range, otherwise replay from all log files
if !self.isInRange(requestNumber) {
return nil
for idx, logIndex := range self.logIndex {
logger.Debug("Trying to find request %d in %s", requestNumber, self.logFiles[idx].file.Name())
if firstOffset = logIndex.requestOffset(requestNumber); firstOffset != -1 {
logger.Debug("Found reqeust %d in %s at offset %d", requestNumber, self.logFiles[idx].file.Name(), firstOffset)
firstIndex = idx
// the request must be at the end of the current log file
if firstOffset == -1 {
firstIndex = len(self.logIndex) - 1
firstOffset = self.logIndex[firstIndex].requestOrLastOffset(requestNumber)
// issue #522. Copy the log files, otherwise a commit may cause
// self.logFiles to be shifted to the left and `idx` in the loop
// will be off by one, then by two, etc.
logFiles := make([]*log, len(self.logFiles))
copy(logFiles, self.logFiles)
for idx := firstIndex; idx < len(logFiles); idx++ {
logFile := logFiles[idx]
if idx > firstIndex {
firstOffset = -1
logger.Info("Replaying from %s:%d", logFile.file.Name(), firstOffset)
count := 0
ch, stopChan := logFile.dupAndReplayFromOffset(shardIds, firstOffset, requestNumber)
defer close(stopChan)
for {
x := <-ch
if x == nil {
logger.Info("%s yielded %d requests", logFile.file.Name(), count)
continue outer
if x.err != nil {
return x.err
logger.Debug("Yielding request %d", x.request.GetRequestNumber())
if err := yield(x.request, x.shardId); err != nil {
logger.Debug("Stopping replay due to error: %s", err)
stopChan <- struct{}{}
return err
return nil
func (self *WAL) Close() error {
return self.closeCommon(true)
func (self *WAL) closeWithoutBookmarking() error {
return self.closeCommon(false)
func (self *WAL) closeCommon(shouldBookmark bool) error {
confirmationChan := make(chan *confirmation)
self.entries <- &closeEntry{confirmationChan, shouldBookmark}
confirmation := <-confirmationChan
return confirmation.err
func (self *WAL) processClose(shouldBookmark bool) error {
logger.Info("Closing WAL")
for idx, logFile := range self.logFiles {
if shouldBookmark {
logger.Info("Closed WAL")
return nil
// PRIVATE functions
func (self *WAL) processEntries() {
for {
e := <-self.entries
switch x := e.(type) {
case *commitEntry:
case *appendEntry:
case *bookmarkEntry:
err := self.bookmark()
if err != nil {
x.confirmation <- &confirmation{0, err}
x.confirmation <- &confirmation{0, self.index()}
case *closeEntry:
x.confirmation <- &confirmation{0, self.processClose(x.shouldBookmark)}
logger.Info("Closing wal")
panic(fmt.Errorf("unknown entry type %T", e))
func (self *WAL) assignSequenceNumbers(shardId uint32, request *protocol.Request) {
if len(request.MultiSeries) == 0 {
sequenceNumber := self.state.getCurrentSequenceNumber(shardId)
for _, s := range request.MultiSeries {
for _, p := range s.Points {
if p.SequenceNumber != nil {
p.SequenceNumber = proto.Uint64(sequenceNumber*HOST_ID_OFFSET + uint64(self.serverId))
self.state.setCurrentSequenceNumber(shardId, sequenceNumber)
func (self *WAL) processAppendEntry(e *appendEntry) {
nextRequestNumber := self.state.getNextRequestNumber()
e.request.RequestNumber = proto.Uint32(nextRequestNumber)
self.assignSequenceNumbers(e.shardId, e.request)
if e.assignSeqOnly {
e.confirmation <- &confirmation{e.request.GetRequestNumber(), nil}
if len(self.logFiles) == 0 {
if _, err := self.createNewLog(nextRequestNumber); err != nil {
e.confirmation <- &confirmation{0, err}
self.state.FirstSuffix = nextRequestNumber
lastLogFile := self.logFiles[len(self.logFiles)-1]
logger.Debug("appending request %d", e.request.GetRequestNumber())
err := lastLogFile.appendRequest(e.request, e.shardId)
if err != nil {
e.confirmation <- &confirmation{0, err}
self.state.CurrentFileOffset = self.logFiles[len(self.logFiles)-1].offset()
logger.Debug("requestsSinceRotation: %d", self.requestsSinceRotation)
if rotated, err := self.rotateTheLogFile(nextRequestNumber); err != nil || rotated {
e.confirmation <- &confirmation{e.request.GetRequestNumber(), err}
e.confirmation <- &confirmation{e.request.GetRequestNumber(), nil}
func (self *WAL) processCommitEntry(e *commitEntry) {
logger.Debug("commiting %d for server %d", e.requestNumber, e.serverId)
self.state.commitRequestNumber(e.serverId, e.requestNumber)
idx := self.firstLogFile()
if idx == 0 {
e.confirmation <- &confirmation{0, nil}
var unusedLogFiles []*log
var unusedLogIndex []*index
logger.Debug("Removing some unneeded log files: %d", idx)
unusedLogFiles, self.logFiles = self.logFiles[:idx], self.logFiles[idx:]
unusedLogIndex, self.logIndex = self.logIndex[:idx], self.logIndex[idx:]
for logIdx, logFile := range unusedLogFiles {
logger.Info("Deleting %s", logFile.file.Name())
logIndex := unusedLogIndex[logIdx]
self.state.FirstSuffix = self.logFiles[0].suffix()
e.confirmation <- &confirmation{0, nil}
// creates a new log file using the next suffix and initializes its
// state with the state of the last log file
func (self *WAL) createNewLog(firstRequestNumber uint32) (*log, error) {
logFileName := path.Join(self.config.WalDir, fmt.Sprintf("log.%d", firstRequestNumber))
log, _, err := self.openLog(logFileName)
if err != nil {
return nil, err
self.state.CurrentFileSuffix = log.suffix()
self.state.CurrentFileOffset = 0
return log, nil
func (self *WAL) openLog(logFileName string) (*log, *index, error) {
logger.Info("Opening log file %s", logFileName)
logFile, err := os.OpenFile(logFileName, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0644)
if err != nil {
return nil, nil, err
log, err := newLog(logFile, self.config)
if err != nil {
return nil, nil, err
self.logFiles = append(self.logFiles, log)
suffix := strings.TrimPrefix(path.Base(logFileName), "log.")
indexFileName := path.Join(self.config.WalDir, "index."+suffix)
logger.Info("Opening index file %s", indexFileName)
index, err := newIndex(indexFileName)
if err != nil {
logger.Error("Cannot open index file %s", err)
return nil, nil, err
self.logIndex = append(self.logIndex, index)
return log, index, nil
// Will assign sequence numbers if null. Returns a unique id that
// should be marked as committed for each server as it gets confirmed.
func (self *WAL) AssignSequenceNumbersAndLog(request *protocol.Request, shard Shard) (uint32, error) {
confirmationChan := make(chan *confirmation)
self.entries <- &appendEntry{confirmationChan, request, shard.Id(), false}
confirmation := <-confirmationChan
// we should panic if the wal cannot append the request
if confirmation.err != nil {
return confirmation.requestNumber, confirmation.err
// Assigns sequence numbers if null.
func (self *WAL) AssignSequenceNumbers(request *protocol.Request) error {
confirmationChan := make(chan *confirmation)
self.entries <- &appendEntry{confirmationChan, request, 0, true}
confirmation := <-confirmationChan
// we should panic if the wal cannot append the request
if confirmation.err != nil {
return nil
// returns the first log file that contains the given request number
func (self *WAL) firstLogFile() int {
for idx, logIndex := range self.logIndex {
for _, requestNumber := range self.state.ServerLastRequestNumber {
// if no server needs to keep this log file arround we delete it
if logIndex.requestOffset(requestNumber) != -1 {
return idx
if len(self.logIndex) > 0 {
return len(self.logIndex) - 1
return 0
func (self *WAL) shouldRotateTheLogFile() bool {
return self.requestsSinceRotation >= self.config.WalRequestsPerLogFile
func (self *WAL) recover() error {
for idx, logFile := range self.logFiles {
self.requestsSinceLastIndex = 0
self.requestsSinceRotation = self.logIndex[idx].getLength()
lastOffset := self.logIndex[idx].getLastOffset()
logger.Debug("Getting file size for %s[%d]", logFile.file.Name(), logFile.file.Fd())
stat, err := logFile.file.Stat()
if err != nil {
return err
logger.Info("Checking %s, last: %d, size: %d", logFile.file.Name(), lastOffset, stat.Size())
replay, _ := logFile.dupAndReplayFromOffset(nil, lastOffset, 0)
firstOffset := int64(-1)
for {
replayRequest := <-replay
if replayRequest == nil {
self.state.LargestRequestNumber = replayRequest.requestNumber
if err := replayRequest.err; err != nil {
return err
for _, s := range replayRequest.request.MultiSeries {
for _, point := range s.Points {
sequenceNumber := (point.GetSequenceNumber() - uint64(self.serverId)) / HOST_ID_OFFSET
self.state.recover(replayRequest.shardId, sequenceNumber)
if firstOffset == -1 {
firstOffset = replayRequest.startOffset
logger.Debug("recovery requestsSinceLastIndex: %d, requestNumber: %d", self.requestsSinceLastIndex, replayRequest.request.GetRequestNumber())
logger.Debug("largestrequestnumber: %d\n", self.state.LargestRequestNumber)
if self.requestsSinceLastIndex < self.config.WalIndexAfterRequests {
logger.Debug("Finished wal recovery")
return nil
func (self *WAL) rotateTheLogFile(nextRequestNumber uint32) (bool, error) {
if !self.shouldRotateTheLogFile() && nextRequestNumber != math.MaxUint32 {
return false, nil
self.requestsSinceRotation = 0
if self.requestsSinceLastIndex > 0 {
lastEntryIndex := len(self.logFiles) - 1
lastLogFile := self.logFiles[lastEntryIndex]
lastIndex := self.logIndex[lastEntryIndex]
if err := lastLogFile.syncFile(); err != nil {
return false, err
if err := lastIndex.syncFile(); err != nil {
return false, err
lastLogFile, err := self.createNewLog(nextRequestNumber + 1)
if err != nil {
return false, err
logger.Info("Rotating log. New log file %s", lastLogFile.file.Name())
return true, nil
func (self *WAL) conditionalBookmarkAndIndex() {
shouldFlush := false
logger.Debug("requestsSinceLastIndex: %d", self.requestsSinceLastIndex)
if self.requestsSinceLastIndex >= self.config.WalIndexAfterRequests {
if self.requestsSinceLastBookmark >= self.config.WalBookmarkAfterRequests {
if self.requestsSinceLastFlush >= self.config.WalFlushAfterRequests || shouldFlush {
func (self *WAL) flush() error {
logger.Debug("Fsyncing the log file to disk")
self.requestsSinceLastFlush = 0
lastEntryIndex := len(self.logFiles) - 1
if err := self.logFiles[lastEntryIndex].syncFile(); err != nil {
return err
if err := self.logIndex[lastEntryIndex].syncFile(); err != nil {
return err
return nil
func (self *WAL) CreateCheckpoint() error {
confirmationChan := make(chan *confirmation)
self.entries <- &bookmarkEntry{confirmationChan}
confirmation := <-confirmationChan
return confirmation.err
func (self *WAL) bookmark() error {
if err := self.state.writeToFile(); err != nil {
logger.Error("Cannot write bookmark %s", err)
return err
self.requestsSinceLastBookmark = 0
return nil
func (self *WAL) index() error {
if len(self.logFiles) == 0 || self.requestsSinceLastBookmark == 0 {
return nil
lastIndex := self.logIndex[len(self.logIndex)-1]
firstOffset := lastIndex.getLastOffset()
self.requestsSinceLastIndex = 0
return nil