2015-07-14 17:07:08 +00:00
|
|
|
package meta
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/binary"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"log"
|
|
|
|
"net"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/gogo/protobuf/proto"
|
|
|
|
"github.com/influxdb/influxdb/meta/internal"
|
|
|
|
)
|
|
|
|
|
|
|
|
// RPC handles request/response style messaging between cluster nodes
|
|
|
|
type RPC struct {
|
|
|
|
Logger *log.Logger
|
|
|
|
|
|
|
|
store interface {
|
|
|
|
cachedData() *Data
|
2015-07-15 19:55:12 +00:00
|
|
|
IsLeader() bool
|
2015-07-14 17:07:08 +00:00
|
|
|
Leader() string
|
2015-07-15 05:06:39 +00:00
|
|
|
Peers() []string
|
|
|
|
AddPeer(host string) error
|
2015-07-15 21:02:59 +00:00
|
|
|
CreateNode(host string) (*NodeInfo, error)
|
|
|
|
NodeByHost(host string) (*NodeInfo, error)
|
2015-07-14 17:07:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-15 05:06:39 +00:00
|
|
|
type JoinResult struct {
|
|
|
|
RaftEnabled bool
|
|
|
|
Peers []string
|
2015-07-15 21:02:59 +00:00
|
|
|
NodeID uint64
|
2015-07-15 05:06:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type Reply interface {
|
|
|
|
GetHeader() *internal.ResponseHeader
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// proxyLeader proxies the connection to the current raft leader
|
|
|
|
func (r *RPC) proxyLeader(conn *net.TCPConn) {
|
|
|
|
if r.store.Leader() == "" {
|
|
|
|
// FIXME: return error to client
|
|
|
|
r.Logger.Printf("no leader")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
leaderConn, err := net.DialTimeout("tcp", r.store.Leader(), 10*time.Second)
|
|
|
|
if err != nil {
|
|
|
|
r.Logger.Printf("dial leader: %v", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer leaderConn.Close()
|
|
|
|
leaderConn.Write([]byte{MuxRPCHeader})
|
|
|
|
if err := proxy(leaderConn.(*net.TCPConn), conn); err != nil {
|
|
|
|
r.Logger.Printf("leader proxy error: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-14 17:07:08 +00:00
|
|
|
// handleRPCConn reads a command from the connection and executes it.
|
|
|
|
func (r *RPC) handleRPCConn(conn net.Conn) {
|
2015-07-15 19:55:12 +00:00
|
|
|
|
|
|
|
// RPC connections should execute on the leader. If we are not the leader,
|
|
|
|
// proxy the connection to the leader so that clients an connect to any node
|
|
|
|
// in the cluster.
|
|
|
|
r.Logger.Printf("rpc connection from: %v", conn.RemoteAddr())
|
|
|
|
if !r.store.IsLeader() {
|
|
|
|
r.proxyLeader(conn.(*net.TCPConn))
|
|
|
|
conn.Close()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-07-14 17:07:08 +00:00
|
|
|
// Read and execute request.
|
2015-07-15 05:06:39 +00:00
|
|
|
resp, err := func() (proto.Message, error) {
|
2015-07-14 17:07:08 +00:00
|
|
|
// Read request size.
|
|
|
|
var sz uint64
|
|
|
|
if err := binary.Read(conn, binary.BigEndian, &sz); err != nil {
|
|
|
|
return nil, fmt.Errorf("read size: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read request.
|
|
|
|
buf := make([]byte, sz)
|
|
|
|
if _, err := io.ReadFull(conn, buf); err != nil {
|
|
|
|
return nil, fmt.Errorf("read request: %s", err)
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// Determine the RPC type
|
2015-07-15 05:06:39 +00:00
|
|
|
rpcType := internal.RPCType(btou64(buf[0:8]))
|
|
|
|
buf = buf[8:]
|
|
|
|
|
|
|
|
r.Logger.Printf("recv %v request from: %v", rpcType, conn.RemoteAddr())
|
|
|
|
switch rpcType {
|
|
|
|
case internal.RPCType_FetchData:
|
|
|
|
return r.handleFetchData()
|
|
|
|
case internal.RPCType_Join:
|
|
|
|
var req internal.JoinRequest
|
|
|
|
if err := proto.Unmarshal(buf, &req); err != nil {
|
|
|
|
r.Logger.Printf("join request unmarshal: %v", err)
|
|
|
|
}
|
|
|
|
return r.handleJoinRequest(&req)
|
2015-07-14 17:07:08 +00:00
|
|
|
default:
|
2015-07-15 05:06:39 +00:00
|
|
|
return nil, fmt.Errorf("unknown rpc type:%v", rpcType)
|
2015-07-14 17:07:08 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// Set the status header and error message
|
2015-07-15 05:06:39 +00:00
|
|
|
if reply, ok := resp.(Reply); ok {
|
|
|
|
reply.GetHeader().OK = proto.Bool(err == nil)
|
|
|
|
if err != nil {
|
|
|
|
reply.GetHeader().Error = proto.String(err.Error())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// Marshal the response back to a protobuf
|
2015-07-15 05:06:39 +00:00
|
|
|
buf, err := proto.Marshal(resp)
|
2015-07-14 17:07:08 +00:00
|
|
|
if err != nil {
|
2015-07-15 05:06:39 +00:00
|
|
|
r.Logger.Printf("unable to marshal response: %v", err)
|
2015-07-14 17:07:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Encode response back to connection.
|
2015-07-15 05:06:39 +00:00
|
|
|
if err = binary.Write(conn, binary.BigEndian, uint64(len(buf))); err != nil {
|
2015-07-14 17:07:08 +00:00
|
|
|
r.Logger.Printf("unable to write rpc response size: %s", err)
|
2015-07-15 05:06:39 +00:00
|
|
|
} else if _, err = conn.Write(buf); err != nil {
|
2015-07-14 17:07:08 +00:00
|
|
|
r.Logger.Printf("unable to write rpc response: %s", err)
|
|
|
|
}
|
|
|
|
conn.Close()
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// handleFetchData handles a request for the current nodes meta data
|
2015-07-15 05:06:39 +00:00
|
|
|
func (r *RPC) handleFetchData() (*internal.FetchDataResponse, error) {
|
|
|
|
data := r.store.cachedData()
|
|
|
|
b, err := data.MarshalBinary()
|
|
|
|
|
|
|
|
return &internal.FetchDataResponse{
|
|
|
|
Header: &internal.ResponseHeader{
|
|
|
|
OK: proto.Bool(true),
|
|
|
|
},
|
|
|
|
Data: b}, err
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// handleJoinRequest handles a request to join the cluster
|
2015-07-15 05:06:39 +00:00
|
|
|
func (r *RPC) handleJoinRequest(req *internal.JoinRequest) (*internal.JoinResponse, error) {
|
2015-07-15 21:02:59 +00:00
|
|
|
|
|
|
|
node, err := func() (*NodeInfo, error) {
|
|
|
|
// attempt to create the node
|
|
|
|
node, err := r.store.CreateNode(*req.Addr)
|
|
|
|
// if it exists, return the exting node
|
|
|
|
if err == ErrNodeExists {
|
|
|
|
return r.store.NodeByHost(*req.Addr)
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, fmt.Errorf("create node: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have less than 3 nodes, add them as raft peers
|
|
|
|
if len(r.store.Peers()) < MaxRaftNodes {
|
|
|
|
if err = r.store.AddPeer(*req.Addr); err != nil {
|
|
|
|
return node, fmt.Errorf("add peer: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return node, err
|
|
|
|
}()
|
|
|
|
|
|
|
|
nodeID := uint64(0)
|
|
|
|
if node != nil {
|
|
|
|
nodeID = node.ID
|
|
|
|
}
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
r.Logger.Printf("join request failed: create node: %v", err)
|
2015-07-15 05:06:39 +00:00
|
|
|
}
|
2015-07-15 21:02:59 +00:00
|
|
|
|
2015-07-15 05:06:39 +00:00
|
|
|
r.Logger.Printf("recv join request from: %v", *req.Addr)
|
|
|
|
return &internal.JoinResponse{
|
|
|
|
Header: &internal.ResponseHeader{
|
|
|
|
OK: proto.Bool(true),
|
|
|
|
},
|
2015-07-15 21:02:59 +00:00
|
|
|
EnableRaft: proto.Bool(contains(r.store.Peers(), *req.Addr)),
|
2015-07-15 05:06:39 +00:00
|
|
|
Peers: r.store.Peers(),
|
2015-07-15 21:02:59 +00:00
|
|
|
NodeID: proto.Uint64(nodeID),
|
|
|
|
}, err
|
2015-07-15 05:06:39 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// pack returns a TLV style byte slice encoding the size of the payload, the RPC type
|
|
|
|
// and the RPC data
|
|
|
|
func (r *RPC) pack(typ internal.RPCType, b []byte) []byte {
|
|
|
|
buf := u64tob(uint64(len(b)) + 8)
|
|
|
|
buf = append(buf, u64tob(uint64(typ))...)
|
|
|
|
buf = append(buf, b...)
|
|
|
|
return buf
|
|
|
|
}
|
|
|
|
|
2015-07-14 17:07:08 +00:00
|
|
|
// fetchMetaData returns the latest copy of the meta store data from the current
|
|
|
|
// leader.
|
|
|
|
func (r *RPC) fetchMetaData() (*Data, error) {
|
2015-07-15 05:06:39 +00:00
|
|
|
// Retrieve the current known leader.
|
|
|
|
leader := r.store.Leader()
|
|
|
|
if leader == "" {
|
|
|
|
return nil, errors.New("no leader")
|
2015-07-14 17:07:08 +00:00
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
data, err := r.call(leader, r.pack(internal.RPCType_FetchData, nil))
|
2015-07-14 17:07:08 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-07-15 05:06:39 +00:00
|
|
|
// Unmarshal response.
|
|
|
|
var resp internal.FetchDataResponse
|
|
|
|
if err := proto.Unmarshal(data, &resp); err != nil {
|
|
|
|
return nil, fmt.Errorf("unmarshal response: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if !resp.GetHeader().GetOK() {
|
|
|
|
return nil, fmt.Errorf("rpc failed: %s", resp.GetHeader().GetError())
|
|
|
|
}
|
|
|
|
|
2015-07-14 17:07:08 +00:00
|
|
|
ms := &Data{}
|
2015-07-15 05:06:39 +00:00
|
|
|
if err := ms.UnmarshalBinary(resp.GetData()); err != nil {
|
2015-07-14 17:07:08 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return ms, nil
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
// join attempts to join a cluster at remoteAddr using localAddr as the current
|
|
|
|
// node's cluster address
|
2015-07-15 05:06:39 +00:00
|
|
|
func (r *RPC) join(localAddr, remoteAddr string) (*JoinResult, error) {
|
|
|
|
req := &internal.JoinRequest{
|
|
|
|
Addr: proto.String(localAddr),
|
|
|
|
}
|
|
|
|
b, err := proto.Marshal(req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-07-15 19:55:12 +00:00
|
|
|
data, err := r.call(remoteAddr, r.pack(internal.RPCType_Join, b))
|
2015-07-15 05:06:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
resp := &internal.JoinResponse{}
|
|
|
|
if err := proto.Unmarshal(data, resp); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !resp.GetHeader().GetOK() {
|
|
|
|
return nil, fmt.Errorf("rpc failed: %s", resp.GetHeader().GetError())
|
2015-07-14 17:07:08 +00:00
|
|
|
}
|
2015-07-15 05:06:39 +00:00
|
|
|
return &JoinResult{
|
|
|
|
RaftEnabled: resp.GetEnableRaft(),
|
|
|
|
Peers: resp.GetPeers(),
|
2015-07-15 21:02:59 +00:00
|
|
|
NodeID: resp.GetNodeID(),
|
2015-07-15 05:06:39 +00:00
|
|
|
}, nil
|
2015-07-14 17:07:08 +00:00
|
|
|
|
2015-07-15 05:06:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// call sends an encoded request to the remote leader and returns
|
|
|
|
// an encoded response value.
|
|
|
|
func (r *RPC) call(dest string, b []byte) ([]byte, error) {
|
2015-07-14 17:07:08 +00:00
|
|
|
// Create a connection to the leader.
|
2015-07-15 05:06:39 +00:00
|
|
|
conn, err := net.DialTimeout("tcp", dest, 10*time.Second)
|
2015-07-14 17:07:08 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer conn.Close()
|
|
|
|
|
|
|
|
// Write a marker byte for rpc messages.
|
|
|
|
_, err = conn.Write([]byte{MuxRPCHeader})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write request size & bytes.
|
2015-07-15 19:55:12 +00:00
|
|
|
if _, err := conn.Write(b); err != nil {
|
2015-07-14 17:07:08 +00:00
|
|
|
return nil, fmt.Errorf("write rpc: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read response bytes.
|
|
|
|
var sz uint64
|
|
|
|
if err := binary.Read(conn, binary.BigEndian, &sz); err != nil {
|
|
|
|
return nil, fmt.Errorf("read response size: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := make([]byte, sz)
|
|
|
|
if _, err := io.ReadFull(conn, buf); err != nil {
|
|
|
|
return nil, fmt.Errorf("read response: %s", err)
|
|
|
|
}
|
|
|
|
|
2015-07-15 05:06:39 +00:00
|
|
|
return buf, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func u64tob(v uint64) []byte {
|
|
|
|
b := make([]byte, 8)
|
|
|
|
binary.BigEndian.PutUint64(b, v)
|
|
|
|
return b
|
|
|
|
}
|
2015-07-14 17:07:08 +00:00
|
|
|
|
2015-07-15 05:06:39 +00:00
|
|
|
func btou64(b []byte) uint64 {
|
|
|
|
return binary.BigEndian.Uint64(b)
|
2015-07-14 17:07:08 +00:00
|
|
|
}
|
2015-07-15 21:02:59 +00:00
|
|
|
|
|
|
|
func contains(s []string, e string) bool {
|
|
|
|
for _, a := range s {
|
|
|
|
if a == e {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|