342 lines
7.3 KiB
Go
342 lines
7.3 KiB
Go
package meta
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"net"
|
|
"os"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/influxdb/influxdb/services/meta/internal"
|
|
|
|
"github.com/gogo/protobuf/proto"
|
|
"github.com/hashicorp/raft"
|
|
)
|
|
|
|
// Retention policy settings.
|
|
const (
|
|
autoCreateRetentionPolicyName = "default"
|
|
autoCreateRetentionPolicyPeriod = 0
|
|
|
|
// maxAutoCreatedRetentionPolicyReplicaN is the maximum replication factor that will
|
|
// be set for auto-created retention policies.
|
|
maxAutoCreatedRetentionPolicyReplicaN = 3
|
|
)
|
|
|
|
// Raft configuration.
|
|
const (
|
|
raftListenerStartupTimeout = time.Second
|
|
)
|
|
|
|
type store struct {
|
|
mu sync.RWMutex
|
|
closing chan struct{}
|
|
|
|
config *Config
|
|
data *Data
|
|
raftState *raftState
|
|
dataChanged chan struct{}
|
|
path string
|
|
opened bool
|
|
logger *log.Logger
|
|
|
|
// Authentication cache.
|
|
authCache map[string]authUser
|
|
|
|
raftAddr string
|
|
httpAddr string
|
|
}
|
|
|
|
type authUser struct {
|
|
salt []byte
|
|
hash []byte
|
|
}
|
|
|
|
// newStore will create a new metastore with the passed in config
|
|
func newStore(c *Config, httpAddr, raftAddr string) *store {
|
|
s := store{
|
|
data: &Data{
|
|
Index: 1,
|
|
},
|
|
closing: make(chan struct{}),
|
|
dataChanged: make(chan struct{}),
|
|
path: c.Dir,
|
|
config: c,
|
|
httpAddr: httpAddr,
|
|
raftAddr: raftAddr,
|
|
}
|
|
if c.LoggingEnabled {
|
|
s.logger = log.New(os.Stderr, "[metastore] ", log.LstdFlags)
|
|
} else {
|
|
s.logger = log.New(ioutil.Discard, "", 0)
|
|
}
|
|
|
|
return &s
|
|
}
|
|
|
|
// open opens and initializes the raft store.
|
|
func (s *store) open(raftln net.Listener) error {
|
|
s.logger.Printf("Using data dir: %v", s.path)
|
|
|
|
// See if this server needs to join the raft consensus group
|
|
var initializePeers []string
|
|
if len(s.config.JoinPeers) > 0 {
|
|
c := NewClient(s.config.JoinPeers, s.config.HTTPSEnabled)
|
|
data := c.retryUntilSnapshot(0)
|
|
for _, n := range data.MetaNodes {
|
|
initializePeers = append(initializePeers, n.TCPHost)
|
|
}
|
|
initializePeers = append(initializePeers, s.raftAddr)
|
|
}
|
|
|
|
if err := func() error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
// Check if store has already been opened.
|
|
if s.opened {
|
|
return ErrStoreOpen
|
|
}
|
|
s.opened = true
|
|
|
|
// Create the root directory if it doesn't already exist.
|
|
if err := os.MkdirAll(s.path, 0777); err != nil {
|
|
return fmt.Errorf("mkdir all: %s", err)
|
|
}
|
|
|
|
// Open the raft store.
|
|
if err := s.openRaft(initializePeers, raftln); err != nil {
|
|
return fmt.Errorf("raft: %s", err)
|
|
}
|
|
|
|
return nil
|
|
}(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(s.config.JoinPeers) > 0 {
|
|
c := NewClient(s.config.JoinPeers, s.config.HTTPSEnabled)
|
|
if err := c.Open(); err != nil {
|
|
return err
|
|
}
|
|
defer c.Close()
|
|
|
|
if err := c.JoinMetaServer(s.httpAddr, s.raftAddr); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Wait for a leader to be elected so we know the raft log is loaded
|
|
// and up to date
|
|
if err := s.waitForLeader(0); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure this server is in the list of metanodes
|
|
peers, err := s.raftState.peers()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(peers) <= 1 {
|
|
// we have to loop here because if the hostname has changed
|
|
// raft will take a little bit to normalize so that this host
|
|
// will be marked as the leader
|
|
for {
|
|
err := s.setMetaNode(s.httpAddr, s.raftAddr)
|
|
if err == nil {
|
|
break
|
|
}
|
|
time.Sleep(100 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *store) openRaft(initializePeers []string, raftln net.Listener) error {
|
|
rs := newRaftState(s.config, s.raftAddr)
|
|
rs.logger = s.logger
|
|
rs.path = s.path
|
|
|
|
if err := rs.open(s, raftln, initializePeers); err != nil {
|
|
return err
|
|
}
|
|
s.raftState = rs
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *store) close() error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
select {
|
|
case <-s.closing:
|
|
// already closed
|
|
return nil
|
|
default:
|
|
close(s.closing)
|
|
return s.raftState.close()
|
|
}
|
|
}
|
|
|
|
func (s *store) snapshot() (*Data, error) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
return s.data.Clone(), nil
|
|
}
|
|
|
|
// afterIndex returns a channel that will be closed to signal
|
|
// the caller when an updated snapshot is available.
|
|
func (s *store) afterIndex(index uint64) <-chan struct{} {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
if index < s.data.Index {
|
|
// Client needs update so return a closed channel.
|
|
ch := make(chan struct{})
|
|
close(ch)
|
|
return ch
|
|
}
|
|
|
|
return s.dataChanged
|
|
}
|
|
|
|
// WaitForLeader sleeps until a leader is found or a timeout occurs.
|
|
// timeout == 0 means to wait forever.
|
|
func (s *store) waitForLeader(timeout time.Duration) error {
|
|
// Begin timeout timer.
|
|
timer := time.NewTimer(timeout)
|
|
defer timer.Stop()
|
|
|
|
// Continually check for leader until timeout.
|
|
ticker := time.NewTicker(100 * time.Millisecond)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-s.closing:
|
|
return errors.New("closing")
|
|
case <-timer.C:
|
|
if timeout != 0 {
|
|
return errors.New("timeout")
|
|
}
|
|
case <-ticker.C:
|
|
if s.leader() != "" {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// isLeader returns true if the store is currently the leader.
|
|
func (s *store) isLeader() bool {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
if s.raftState == nil {
|
|
return false
|
|
}
|
|
return s.raftState.raft.State() == raft.Leader
|
|
}
|
|
|
|
// leader returns what the store thinks is the current leader. An empty
|
|
// string indicates no leader exists.
|
|
func (s *store) leader() string {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
if s.raftState == nil {
|
|
return ""
|
|
}
|
|
return s.raftState.raft.Leader()
|
|
}
|
|
|
|
// leaderHTTP returns the HTTP API connection info for the metanode
|
|
// that is the raft leader
|
|
func (s *store) leaderHTTP() string {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
if s.raftState == nil {
|
|
return ""
|
|
}
|
|
l := s.raftState.raft.Leader()
|
|
|
|
for _, n := range s.data.MetaNodes {
|
|
if n.TCPHost == l {
|
|
return n.Host
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// index returns the current store index.
|
|
func (s *store) index() uint64 {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
return s.data.Index
|
|
}
|
|
|
|
// apply applies a command to raft.
|
|
func (s *store) apply(b []byte) error {
|
|
return s.raftState.apply(b)
|
|
}
|
|
|
|
// join adds a new server to the metaservice and raft
|
|
func (s *store) join(n *NodeInfo) error {
|
|
if err := s.raftState.addPeer(n.TCPHost); err != nil {
|
|
return err
|
|
}
|
|
|
|
return s.createMetaNode(n.Host, n.TCPHost)
|
|
}
|
|
|
|
// leave removes a server from the metaservice and raft
|
|
func (s *store) leave(n *NodeInfo) error {
|
|
return s.raftState.removePeer(n.TCPHost)
|
|
}
|
|
|
|
// createMetaNode is used by the join command to create the metanode int
|
|
// the metastore
|
|
func (s *store) createMetaNode(addr, raftAddr string) error {
|
|
val := &internal.CreateMetaNodeCommand{
|
|
HTTPAddr: proto.String(addr),
|
|
TCPAddr: proto.String(raftAddr),
|
|
}
|
|
t := internal.Command_CreateMetaNodeCommand
|
|
cmd := &internal.Command{Type: &t}
|
|
if err := proto.SetExtension(cmd, internal.E_CreateMetaNodeCommand_Command, val); err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
b, err := proto.Marshal(cmd)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return s.apply(b)
|
|
}
|
|
|
|
// setMetaNode is used when the raft group has only a single peer. It will
|
|
// either create a metanode or update the information for the one metanode
|
|
// that is there. It's used because hostnames can change
|
|
func (s *store) setMetaNode(addr, raftAddr string) error {
|
|
val := &internal.SetMetaNodeCommand{
|
|
HTTPAddr: proto.String(addr),
|
|
TCPAddr: proto.String(raftAddr),
|
|
}
|
|
t := internal.Command_SetMetaNodeCommand
|
|
cmd := &internal.Command{Type: &t}
|
|
if err := proto.SetExtension(cmd, internal.E_SetMetaNodeCommand_Command, val); err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
b, err := proto.Marshal(cmd)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return s.apply(b)
|
|
}
|