2015-05-29 20:12:00 +00:00
|
|
|
package graphite
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
import (
|
|
|
|
"bufio"
|
2015-09-01 03:32:48 +00:00
|
|
|
"expvar"
|
2015-06-08 22:35:38 +00:00
|
|
|
"fmt"
|
|
|
|
"log"
|
|
|
|
"net"
|
|
|
|
"os"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
2015-05-29 20:12:00 +00:00
|
|
|
|
2015-09-04 05:12:33 +00:00
|
|
|
"github.com/influxdb/influxdb"
|
2015-06-08 22:35:38 +00:00
|
|
|
"github.com/influxdb/influxdb/cluster"
|
2015-06-11 00:57:25 +00:00
|
|
|
"github.com/influxdb/influxdb/meta"
|
2015-09-01 03:32:48 +00:00
|
|
|
"github.com/influxdb/influxdb/monitor"
|
2015-06-08 22:35:38 +00:00
|
|
|
"github.com/influxdb/influxdb/tsdb"
|
|
|
|
)
|
2015-05-29 20:12:00 +00:00
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
const (
|
2015-06-11 18:00:31 +00:00
|
|
|
udpBufferSize = 65536
|
|
|
|
leaderWaitTimeout = 30 * time.Second
|
2015-06-08 22:35:38 +00:00
|
|
|
)
|
2015-05-29 20:12:00 +00:00
|
|
|
|
2015-09-01 03:32:48 +00:00
|
|
|
// statistics gathered by the graphite package.
|
|
|
|
const (
|
2015-10-29 03:59:10 +00:00
|
|
|
statPointsReceived = "pointsRx"
|
|
|
|
statBytesReceived = "bytesRx"
|
|
|
|
statPointsParseFail = "pointsParseFail"
|
|
|
|
statPointsUnsupported = "pointsUnsupportedFail"
|
|
|
|
statBatchesTrasmitted = "batchesTx"
|
|
|
|
statPointsTransmitted = "pointsTx"
|
|
|
|
statBatchesTransmitFail = "batchesTxFail"
|
|
|
|
statConnectionsActive = "connsActive"
|
|
|
|
statConnectionsHandled = "connsHandled"
|
2015-09-01 03:32:48 +00:00
|
|
|
)
|
|
|
|
|
2015-09-24 20:45:38 +00:00
|
|
|
type tcpConnection struct {
|
|
|
|
conn net.Conn
|
|
|
|
connectTime time.Time
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *tcpConnection) Close() {
|
|
|
|
c.conn.Close()
|
|
|
|
}
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
type Service struct {
|
2015-10-19 18:10:29 +00:00
|
|
|
mu sync.Mutex
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
bindAddress string
|
|
|
|
database string
|
|
|
|
protocol string
|
|
|
|
batchSize int
|
2015-09-08 22:18:14 +00:00
|
|
|
batchPending int
|
2015-06-08 22:35:38 +00:00
|
|
|
batchTimeout time.Duration
|
|
|
|
consistencyLevel cluster.ConsistencyLevel
|
|
|
|
|
2015-07-06 22:09:32 +00:00
|
|
|
batcher *tsdb.PointBatcher
|
|
|
|
parser *Parser
|
2015-06-08 22:35:38 +00:00
|
|
|
|
2015-09-24 20:45:38 +00:00
|
|
|
logger *log.Logger
|
|
|
|
statMap *expvar.Map
|
|
|
|
tcpConnectionsMu sync.Mutex
|
|
|
|
tcpConnections map[string]*tcpConnection
|
2015-06-08 22:35:38 +00:00
|
|
|
|
2015-09-05 07:30:59 +00:00
|
|
|
ln net.Listener
|
|
|
|
addr net.Addr
|
|
|
|
udpConn *net.UDPConn
|
2015-06-08 22:35:38 +00:00
|
|
|
|
|
|
|
wg sync.WaitGroup
|
|
|
|
done chan struct{}
|
|
|
|
|
2015-09-02 22:07:30 +00:00
|
|
|
Monitor interface {
|
2015-09-24 20:45:38 +00:00
|
|
|
RegisterDiagnosticsClient(name string, client monitor.DiagsClient)
|
|
|
|
DeregisterDiagnosticsClient(name string)
|
2015-09-01 03:32:48 +00:00
|
|
|
}
|
2015-06-08 22:35:38 +00:00
|
|
|
PointsWriter interface {
|
|
|
|
WritePoints(p *cluster.WritePointsRequest) error
|
|
|
|
}
|
2015-06-11 00:57:25 +00:00
|
|
|
MetaStore interface {
|
2015-06-11 18:00:31 +00:00
|
|
|
WaitForLeader(d time.Duration) error
|
2015-06-11 00:57:25 +00:00
|
|
|
CreateDatabaseIfNotExists(name string) (*meta.DatabaseInfo, error)
|
|
|
|
}
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewService returns an instance of the Graphite service.
|
2015-06-09 03:52:00 +00:00
|
|
|
func NewService(c Config) (*Service, error) {
|
2015-06-10 23:40:08 +00:00
|
|
|
// Use defaults where necessary.
|
|
|
|
d := c.WithDefaults()
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
s := Service{
|
2015-09-24 20:45:38 +00:00
|
|
|
bindAddress: d.BindAddress,
|
|
|
|
database: d.Database,
|
|
|
|
protocol: d.Protocol,
|
|
|
|
batchSize: d.BatchSize,
|
|
|
|
batchPending: d.BatchPending,
|
|
|
|
batchTimeout: time.Duration(d.BatchTimeout),
|
|
|
|
logger: log.New(os.Stderr, "[graphite] ", log.LstdFlags),
|
|
|
|
tcpConnections: make(map[string]*tcpConnection),
|
|
|
|
done: make(chan struct{}),
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
|
|
|
|
2015-06-10 23:40:08 +00:00
|
|
|
consistencyLevel, err := cluster.ParseConsistencyLevel(d.ConsistencyLevel)
|
2015-06-09 03:52:00 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
s.consistencyLevel = consistencyLevel
|
|
|
|
|
2015-06-24 18:08:36 +00:00
|
|
|
parser, err := NewParserWithOptions(Options{
|
|
|
|
Templates: d.Templates,
|
|
|
|
DefaultTags: d.DefaultTags(),
|
|
|
|
Separator: d.Separator})
|
|
|
|
|
2015-06-22 22:47:03 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-06-08 22:35:38 +00:00
|
|
|
s.parser = parser
|
|
|
|
|
2015-06-09 03:52:00 +00:00
|
|
|
return &s, nil
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Open starts the Graphite input processing data.
|
|
|
|
func (s *Service) Open() error {
|
2015-10-19 18:10:29 +00:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
2015-08-20 18:23:09 +00:00
|
|
|
s.logger.Printf("Starting graphite service, batch size %d, batch timeout %s", s.batchSize, s.batchTimeout)
|
2015-08-12 22:07:12 +00:00
|
|
|
|
2015-09-04 01:10:06 +00:00
|
|
|
// Configure expvar monitoring. It's OK to do this even if the service fails to open and
|
|
|
|
// should be done before any data could arrive for the service.
|
2015-09-04 05:12:33 +00:00
|
|
|
key := strings.Join([]string{"graphite", s.protocol, s.bindAddress}, ":")
|
|
|
|
tags := map[string]string{"proto": s.protocol, "bind": s.bindAddress}
|
|
|
|
s.statMap = influxdb.NewStatistics(key, "graphite", tags)
|
2015-09-04 01:10:06 +00:00
|
|
|
|
2015-09-24 20:45:38 +00:00
|
|
|
// Register diagnostics if a Monitor service is available.
|
|
|
|
if s.Monitor != nil {
|
|
|
|
s.Monitor.RegisterDiagnosticsClient(key, s)
|
|
|
|
}
|
2015-09-01 03:32:48 +00:00
|
|
|
|
2015-06-11 18:00:31 +00:00
|
|
|
if err := s.MetaStore.WaitForLeader(leaderWaitTimeout); err != nil {
|
2015-08-12 22:07:12 +00:00
|
|
|
s.logger.Printf("Failed to detect a cluster leader: %s", err.Error())
|
2015-06-11 18:00:31 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-06-08 22:35:38 +00:00
|
|
|
|
2015-06-11 00:57:25 +00:00
|
|
|
if _, err := s.MetaStore.CreateDatabaseIfNotExists(s.database); err != nil {
|
2015-08-12 22:07:12 +00:00
|
|
|
s.logger.Printf("Failed to ensure target database %s exists: %s", s.database, err.Error())
|
2015-06-11 00:57:25 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-09-08 22:18:14 +00:00
|
|
|
s.batcher = tsdb.NewPointBatcher(s.batchSize, s.batchPending, s.batchTimeout)
|
2015-07-06 22:09:32 +00:00
|
|
|
s.batcher.Start()
|
|
|
|
|
|
|
|
// Start processing batches.
|
|
|
|
s.wg.Add(1)
|
|
|
|
go s.processBatches(s.batcher)
|
|
|
|
|
2015-06-11 18:00:31 +00:00
|
|
|
var err error
|
2015-06-08 22:35:38 +00:00
|
|
|
if strings.ToLower(s.protocol) == "tcp" {
|
|
|
|
s.addr, err = s.openTCPServer()
|
|
|
|
} else if strings.ToLower(s.protocol) == "udp" {
|
|
|
|
s.addr, err = s.openUDPServer()
|
|
|
|
} else {
|
|
|
|
return fmt.Errorf("unrecognized Graphite input protocol %s", s.protocol)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-08-12 22:07:12 +00:00
|
|
|
s.logger.Printf("Listening on %s: %s", strings.ToUpper(s.protocol), s.addr.String())
|
2015-06-08 22:35:38 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-09-24 20:45:38 +00:00
|
|
|
func (s *Service) closeAllConnections() {
|
|
|
|
s.tcpConnectionsMu.Lock()
|
|
|
|
defer s.tcpConnectionsMu.Unlock()
|
|
|
|
for _, c := range s.tcpConnections {
|
|
|
|
c.Close()
|
|
|
|
}
|
|
|
|
}
|
2015-06-08 22:35:38 +00:00
|
|
|
|
|
|
|
// Close stops all data processing on the Graphite input.
|
|
|
|
func (s *Service) Close() error {
|
2015-10-19 18:10:29 +00:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
2015-09-24 20:45:38 +00:00
|
|
|
s.closeAllConnections()
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
if s.ln != nil {
|
|
|
|
s.ln.Close()
|
|
|
|
}
|
2015-09-05 07:30:59 +00:00
|
|
|
if s.udpConn != nil {
|
|
|
|
s.udpConn.Close()
|
|
|
|
}
|
2015-08-12 19:46:09 +00:00
|
|
|
|
2015-10-19 18:10:29 +00:00
|
|
|
if s.batcher != nil {
|
|
|
|
s.batcher.Stop()
|
|
|
|
}
|
2015-08-12 19:46:09 +00:00
|
|
|
close(s.done)
|
|
|
|
s.wg.Wait()
|
|
|
|
s.done = nil
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-06-10 15:27:57 +00:00
|
|
|
// SetLogger sets the internal logger to the logger passed in.
|
|
|
|
func (s *Service) SetLogger(l *log.Logger) {
|
|
|
|
s.logger = l
|
|
|
|
}
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
func (s *Service) Addr() net.Addr {
|
|
|
|
return s.addr
|
|
|
|
}
|
|
|
|
|
|
|
|
// openTCPServer opens the Graphite input in TCP mode and starts processing data.
|
|
|
|
func (s *Service) openTCPServer() (net.Addr, error) {
|
|
|
|
ln, err := net.Listen("tcp", s.bindAddress)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
s.ln = ln
|
|
|
|
|
|
|
|
s.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer s.wg.Done()
|
|
|
|
for {
|
|
|
|
conn, err := s.ln.Accept()
|
|
|
|
if opErr, ok := err.(*net.OpError); ok && !opErr.Temporary() {
|
|
|
|
s.logger.Println("graphite TCP listener closed")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
s.logger.Println("error accepting TCP connection", err.Error())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
s.wg.Add(1)
|
|
|
|
go s.handleTCPConnection(conn)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
return ln.Addr(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleTCPConnection services an individual TCP connection for the Graphite input.
|
|
|
|
func (s *Service) handleTCPConnection(conn net.Conn) {
|
2015-09-04 01:10:06 +00:00
|
|
|
defer s.wg.Done()
|
2015-06-08 22:35:38 +00:00
|
|
|
defer conn.Close()
|
2015-09-01 03:32:48 +00:00
|
|
|
defer s.statMap.Add(statConnectionsActive, -1)
|
2015-09-24 20:45:38 +00:00
|
|
|
defer s.untrackConnection(conn)
|
2015-09-01 03:32:48 +00:00
|
|
|
s.statMap.Add(statConnectionsActive, 1)
|
|
|
|
s.statMap.Add(statConnectionsHandled, 1)
|
2015-09-24 20:45:38 +00:00
|
|
|
s.trackConnection(conn)
|
2015-06-08 22:35:38 +00:00
|
|
|
|
|
|
|
reader := bufio.NewReader(conn)
|
|
|
|
|
|
|
|
for {
|
|
|
|
// Read up to the next newline.
|
|
|
|
buf, err := reader.ReadBytes('\n')
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Trim the buffer, even though there should be no padding
|
|
|
|
line := strings.TrimSpace(string(buf))
|
|
|
|
|
2015-09-01 03:32:48 +00:00
|
|
|
s.statMap.Add(statPointsReceived, 1)
|
|
|
|
s.statMap.Add(statBytesReceived, int64(len(buf)))
|
2015-07-06 22:09:32 +00:00
|
|
|
s.handleLine(line)
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-24 20:45:38 +00:00
|
|
|
func (s *Service) trackConnection(c net.Conn) {
|
|
|
|
s.tcpConnectionsMu.Lock()
|
|
|
|
defer s.tcpConnectionsMu.Unlock()
|
|
|
|
s.tcpConnections[c.RemoteAddr().String()] = &tcpConnection{
|
|
|
|
conn: c,
|
|
|
|
connectTime: time.Now().UTC(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func (s *Service) untrackConnection(c net.Conn) {
|
|
|
|
s.tcpConnectionsMu.Lock()
|
|
|
|
defer s.tcpConnectionsMu.Unlock()
|
|
|
|
delete(s.tcpConnections, c.RemoteAddr().String())
|
|
|
|
}
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
// openUDPServer opens the Graphite input in UDP mode and starts processing incoming data.
|
|
|
|
func (s *Service) openUDPServer() (net.Addr, error) {
|
|
|
|
addr, err := net.ResolveUDPAddr("udp", s.bindAddress)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-09-05 07:30:59 +00:00
|
|
|
s.udpConn, err = net.ListenUDP("udp", addr)
|
2015-06-08 22:35:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := make([]byte, udpBufferSize)
|
|
|
|
s.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer s.wg.Done()
|
|
|
|
for {
|
2015-09-05 07:30:59 +00:00
|
|
|
n, _, err := s.udpConn.ReadFromUDP(buf)
|
2015-06-08 22:35:38 +00:00
|
|
|
if err != nil {
|
2015-09-05 07:30:59 +00:00
|
|
|
s.udpConn.Close()
|
2015-06-08 22:35:38 +00:00
|
|
|
return
|
|
|
|
}
|
2015-09-01 03:32:48 +00:00
|
|
|
|
|
|
|
lines := strings.Split(string(buf[:n]), "\n")
|
|
|
|
for _, line := range lines {
|
2015-07-06 22:09:32 +00:00
|
|
|
s.handleLine(line)
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
2015-09-01 03:32:48 +00:00
|
|
|
s.statMap.Add(statPointsReceived, int64(len(lines)))
|
|
|
|
s.statMap.Add(statBytesReceived, int64(n))
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
|
|
|
}()
|
2015-09-05 07:30:59 +00:00
|
|
|
return s.udpConn.LocalAddr(), nil
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
|
|
|
|
2015-07-06 22:09:32 +00:00
|
|
|
func (s *Service) handleLine(line string) {
|
|
|
|
if line == "" {
|
|
|
|
return
|
|
|
|
}
|
2015-09-01 03:32:48 +00:00
|
|
|
|
2015-07-06 22:09:32 +00:00
|
|
|
// Parse it.
|
|
|
|
point, err := s.parser.Parse(line)
|
|
|
|
if err != nil {
|
2015-10-27 16:21:54 +00:00
|
|
|
s.logger.Printf("unable to parse line: %s: %s", line, err)
|
2015-09-01 03:32:48 +00:00
|
|
|
s.statMap.Add(statPointsParseFail, 1)
|
2015-07-06 22:09:32 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
s.batcher.In() <- point
|
|
|
|
}
|
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
// processBatches continually drains the given batcher and writes the batches to the database.
|
|
|
|
func (s *Service) processBatches(batcher *tsdb.PointBatcher) {
|
|
|
|
defer s.wg.Done()
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case batch := <-batcher.Out():
|
|
|
|
if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{
|
|
|
|
Database: s.database,
|
|
|
|
RetentionPolicy: "",
|
|
|
|
ConsistencyLevel: s.consistencyLevel,
|
|
|
|
Points: batch,
|
2015-09-01 03:32:48 +00:00
|
|
|
}); err == nil {
|
|
|
|
s.statMap.Add(statBatchesTrasmitted, 1)
|
|
|
|
s.statMap.Add(statPointsTransmitted, int64(len(batch)))
|
|
|
|
} else {
|
2015-06-08 22:35:38 +00:00
|
|
|
s.logger.Printf("failed to write point batch to database %q: %s", s.database, err)
|
2015-09-01 03:32:48 +00:00
|
|
|
s.statMap.Add(statBatchesTransmitFail, 1)
|
2015-06-08 22:35:38 +00:00
|
|
|
}
|
2015-09-01 03:32:48 +00:00
|
|
|
|
2015-06-08 22:35:38 +00:00
|
|
|
case <-s.done:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-09-24 20:45:38 +00:00
|
|
|
|
|
|
|
func (s *Service) Diagnostics() (*monitor.Diagnostic, error) {
|
|
|
|
s.tcpConnectionsMu.Lock()
|
|
|
|
defer s.tcpConnectionsMu.Unlock()
|
|
|
|
|
|
|
|
d := &monitor.Diagnostic{
|
|
|
|
Columns: []string{"local", "remote", "connect time"},
|
|
|
|
Rows: make([][]interface{}, 0, len(s.tcpConnections)),
|
|
|
|
}
|
|
|
|
for _, v := range s.tcpConnections {
|
|
|
|
_ = v
|
|
|
|
d.Rows = append(d.Rows, []interface{}{v.conn.LocalAddr().String(), v.conn.RemoteAddr().String(), v.connectTime})
|
|
|
|
}
|
|
|
|
return d, nil
|
|
|
|
}
|