232 lines
5.9 KiB
Go
232 lines
5.9 KiB
Go
package cluster
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"time"
|
|
|
|
log "code.google.com/p/log4go"
|
|
c "github.com/influxdb/influxdb/configuration"
|
|
"github.com/influxdb/influxdb/protocol"
|
|
)
|
|
|
|
const (
|
|
HEARTBEAT_TIMEOUT = 100 * time.Millisecond
|
|
)
|
|
|
|
type ClusterServer struct {
|
|
Id uint32
|
|
RaftName string
|
|
State ServerState
|
|
RaftConnectionString string
|
|
ProtobufConnectionString string
|
|
connection ServerConnection
|
|
HeartbeatInterval time.Duration
|
|
Backoff time.Duration
|
|
MinBackoff time.Duration
|
|
MaxBackoff time.Duration
|
|
isUp bool
|
|
writeBuffer *WriteBuffer
|
|
heartbeatStarted bool
|
|
}
|
|
|
|
type ServerConnection interface {
|
|
Connect()
|
|
Close()
|
|
ClearRequests()
|
|
MakeRequest(*protocol.Request, ResponseChannel) error
|
|
CancelRequest(*protocol.Request)
|
|
}
|
|
|
|
type ServerState int
|
|
|
|
const (
|
|
LoadingRingData ServerState = iota
|
|
SendingRingData
|
|
DeletingOldData
|
|
Running
|
|
Potential
|
|
)
|
|
|
|
func (self *ClusterServer) GetStateName() (stateName string) {
|
|
switch {
|
|
case self.State == LoadingRingData:
|
|
return "LoadingRingData"
|
|
case self.State == SendingRingData:
|
|
return "SendingRingData"
|
|
case self.State == DeletingOldData:
|
|
return "DeletingOldData"
|
|
case self.State == Running:
|
|
return "Running"
|
|
case self.State == Potential:
|
|
return "Potential"
|
|
}
|
|
return "UNKNOWN"
|
|
}
|
|
|
|
func NewClusterServer(raftName, raftConnectionString, protobufConnectionString string, connection ServerConnection, config *c.Configuration) *ClusterServer {
|
|
|
|
s := &ClusterServer{
|
|
RaftName: raftName,
|
|
RaftConnectionString: raftConnectionString,
|
|
ProtobufConnectionString: protobufConnectionString,
|
|
connection: connection,
|
|
HeartbeatInterval: config.ProtobufHeartbeatInterval.Duration,
|
|
Backoff: config.ProtobufMinBackoff.Duration,
|
|
MinBackoff: config.ProtobufMinBackoff.Duration,
|
|
MaxBackoff: config.ProtobufMaxBackoff.Duration,
|
|
heartbeatStarted: false,
|
|
}
|
|
|
|
return s
|
|
}
|
|
|
|
func (self *ClusterServer) StartHeartbeat() {
|
|
if self.heartbeatStarted {
|
|
return
|
|
}
|
|
|
|
self.heartbeatStarted = true
|
|
self.isUp = true
|
|
go self.heartbeat()
|
|
}
|
|
|
|
func (self *ClusterServer) SetWriteBuffer(writeBuffer *WriteBuffer) {
|
|
self.writeBuffer = writeBuffer
|
|
}
|
|
|
|
func (self *ClusterServer) GetId() uint32 {
|
|
return self.Id
|
|
}
|
|
|
|
func (self *ClusterServer) Connect() {
|
|
if !shouldConnect(self.ProtobufConnectionString) {
|
|
return
|
|
}
|
|
|
|
log.Info("ClusterServer: %d connecting to: %s", self.Id, self.ProtobufConnectionString)
|
|
self.connection.Connect()
|
|
}
|
|
|
|
func (self *ClusterServer) MakeRequest(request *protocol.Request, responseStream chan<- *protocol.Response) {
|
|
rc := NewResponseChannelWrapper(responseStream)
|
|
err := self.connection.MakeRequest(request, rc)
|
|
if err != nil {
|
|
log.Error("Canceling request: %s", err)
|
|
self.connection.CancelRequest(request)
|
|
self.markServerAsDown()
|
|
}
|
|
}
|
|
|
|
func (self *ClusterServer) Write(request *protocol.Request) error {
|
|
responseChan := make(chan *protocol.Response, 1)
|
|
rc := NewResponseChannelWrapper(responseChan)
|
|
err := self.connection.MakeRequest(request, rc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Debug("Waiting for response to %d", request.GetRequestNumber())
|
|
response := <-responseChan
|
|
if response.ErrorMessage != nil {
|
|
return errors.New(*response.ErrorMessage)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (self *ClusterServer) BufferWrite(request *protocol.Request) {
|
|
self.writeBuffer.Write(request)
|
|
}
|
|
|
|
func (self *ClusterServer) IsUp() bool {
|
|
return self.isUp
|
|
}
|
|
|
|
// private methods
|
|
|
|
var HEARTBEAT_TYPE = protocol.Request_HEARTBEAT
|
|
|
|
func (self *ClusterServer) heartbeat() {
|
|
defer func() {
|
|
self.heartbeatStarted = false
|
|
}()
|
|
|
|
for {
|
|
// this chan is buffered and in the loop on purpose. This is so
|
|
// that if reading a heartbeat times out, and the heartbeat then comes through
|
|
// later, it will be dumped into this chan and not block the protobuf client reader.
|
|
responseChan := make(chan *protocol.Response, 1)
|
|
heartbeatRequest := &protocol.Request{
|
|
Type: &HEARTBEAT_TYPE,
|
|
Database: protocol.String(""),
|
|
}
|
|
self.MakeRequest(heartbeatRequest, responseChan)
|
|
err := self.getHeartbeatResponse(responseChan)
|
|
if err != nil {
|
|
self.handleHeartbeatError(err)
|
|
continue
|
|
}
|
|
|
|
if !self.isUp {
|
|
log.Warn("Server marked as up. Heartbeat succeeded")
|
|
}
|
|
// otherwise, reset the backoff and mark the server as up
|
|
self.isUp = true
|
|
self.Backoff = self.MinBackoff
|
|
time.Sleep(self.HeartbeatInterval)
|
|
}
|
|
}
|
|
|
|
func (self *ClusterServer) getHeartbeatResponse(responseChan <-chan *protocol.Response) error {
|
|
select {
|
|
case response := <-responseChan:
|
|
if response.ErrorMessage != nil {
|
|
return fmt.Errorf("Server %d returned error to heartbeat: %s", self.Id, *response.ErrorMessage)
|
|
}
|
|
|
|
if *response.Type != protocol.Response_HEARTBEAT {
|
|
return fmt.Errorf("Server returned a non heartbeat response")
|
|
}
|
|
|
|
case <-time.After(self.HeartbeatInterval):
|
|
return fmt.Errorf("Server failed to return heartbeat in %s: %d", self.HeartbeatInterval, self.Id)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (self *ClusterServer) markServerAsDown() {
|
|
self.isUp = false
|
|
self.connection.ClearRequests()
|
|
}
|
|
|
|
func (self *ClusterServer) handleHeartbeatError(err error) {
|
|
if self.isUp {
|
|
log.Warn("Server marked as down. Heartbeat error for server: %d - %s: %s", self.Id, self.ProtobufConnectionString, err)
|
|
}
|
|
self.markServerAsDown()
|
|
self.Backoff *= 2
|
|
if self.Backoff > self.MaxBackoff {
|
|
self.Backoff = self.MaxBackoff
|
|
}
|
|
time.Sleep(self.Backoff)
|
|
}
|
|
|
|
// in the coordinator test we don't want to create protobuf servers,
|
|
// so we just ignore creating a protobuf client when the connection
|
|
// string has a 0 port
|
|
func shouldConnect(addr string) bool {
|
|
log.Debug("SHOULD CONNECT: ", addr)
|
|
_, port, err := net.SplitHostPort(addr)
|
|
if err != nil {
|
|
log.Error("Error parsing address '%s': %s", addr, err)
|
|
return false
|
|
}
|
|
|
|
if port == "0" {
|
|
log.Warn("Cannot connect to port 0. Not creating a protobuf client")
|
|
return false
|
|
}
|
|
return true
|
|
}
|