394 lines
9.9 KiB
Go
394 lines
9.9 KiB
Go
package opentsdb
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"crypto/tls"
|
|
"expvar"
|
|
"io"
|
|
"log"
|
|
"net"
|
|
"net/http"
|
|
"net/textproto"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/influxdb/influxdb"
|
|
"github.com/influxdb/influxdb/cluster"
|
|
"github.com/influxdb/influxdb/models"
|
|
"github.com/influxdb/influxdb/services/meta"
|
|
"github.com/influxdb/influxdb/tsdb"
|
|
)
|
|
|
|
const leaderWaitTimeout = 30 * time.Second
|
|
|
|
// statistics gathered by the openTSDB package.
|
|
const (
|
|
statHTTPConnectionsHandled = "httpConnsHandled"
|
|
statTelnetConnectionsActive = "tlConnsActive"
|
|
statTelnetConnectionsHandled = "tlConnsHandled"
|
|
statTelnetPointsReceived = "tlPointsRx"
|
|
statTelnetBytesReceived = "tlBytesRx"
|
|
statTelnetReadError = "tlReadErr"
|
|
statTelnetBadLine = "tlBadLine"
|
|
statTelnetBadTime = "tlBadTime"
|
|
statTelnetBadTag = "tlBadTag"
|
|
statTelnetBadFloat = "tlBadFloat"
|
|
statBatchesTrasmitted = "batchesTx"
|
|
statPointsTransmitted = "pointsTx"
|
|
statBatchesTransmitFail = "batchesTxFail"
|
|
statConnectionsActive = "connsActive"
|
|
statConnectionsHandled = "connsHandled"
|
|
statDroppedPointsInvalid = "droppedPointsInvalid"
|
|
)
|
|
|
|
// Service manages the listener and handler for an HTTP endpoint.
|
|
type Service struct {
|
|
ln net.Listener // main listener
|
|
httpln *chanListener // http channel-based listener
|
|
|
|
mu sync.Mutex
|
|
wg sync.WaitGroup
|
|
done chan struct{}
|
|
err chan error
|
|
tls bool
|
|
cert string
|
|
|
|
BindAddress string
|
|
Database string
|
|
RetentionPolicy string
|
|
ConsistencyLevel cluster.ConsistencyLevel
|
|
|
|
PointsWriter interface {
|
|
WritePoints(p *cluster.WritePointsRequest) error
|
|
}
|
|
MetaClient interface {
|
|
CreateDatabase(name string) (*meta.DatabaseInfo, error)
|
|
}
|
|
|
|
// Points received over the telnet protocol are batched.
|
|
batchSize int
|
|
batchPending int
|
|
batchTimeout time.Duration
|
|
batcher *tsdb.PointBatcher
|
|
|
|
LogPointErrors bool
|
|
Logger *log.Logger
|
|
statMap *expvar.Map
|
|
}
|
|
|
|
// NewService returns a new instance of Service.
|
|
func NewService(c Config) (*Service, error) {
|
|
consistencyLevel, err := cluster.ParseConsistencyLevel(c.ConsistencyLevel)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s := &Service{
|
|
done: make(chan struct{}),
|
|
tls: c.TLSEnabled,
|
|
cert: c.Certificate,
|
|
err: make(chan error),
|
|
BindAddress: c.BindAddress,
|
|
Database: c.Database,
|
|
RetentionPolicy: c.RetentionPolicy,
|
|
ConsistencyLevel: consistencyLevel,
|
|
batchSize: c.BatchSize,
|
|
batchPending: c.BatchPending,
|
|
batchTimeout: time.Duration(c.BatchTimeout),
|
|
Logger: log.New(os.Stderr, "[opentsdb] ", log.LstdFlags),
|
|
LogPointErrors: c.LogPointErrors,
|
|
}
|
|
return s, nil
|
|
}
|
|
|
|
// Open starts the service
|
|
func (s *Service) Open() error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
s.Logger.Println("Starting OpenTSDB service")
|
|
|
|
// Configure expvar monitoring. It's OK to do this even if the service fails to open and
|
|
// should be done before any data could arrive for the service.
|
|
key := strings.Join([]string{"opentsdb", s.BindAddress}, ":")
|
|
tags := map[string]string{"bind": s.BindAddress}
|
|
s.statMap = influxdb.NewStatistics(key, "opentsdb", tags)
|
|
|
|
if _, err := s.MetaClient.CreateDatabase(s.Database); err != nil {
|
|
s.Logger.Printf("Failed to ensure target database %s exists: %s", s.Database, err.Error())
|
|
return err
|
|
}
|
|
|
|
s.batcher = tsdb.NewPointBatcher(s.batchSize, s.batchPending, s.batchTimeout)
|
|
s.batcher.Start()
|
|
|
|
// Start processing batches.
|
|
s.wg.Add(1)
|
|
go s.processBatches(s.batcher)
|
|
|
|
// Open listener.
|
|
if s.tls {
|
|
cert, err := tls.LoadX509KeyPair(s.cert, s.cert)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
listener, err := tls.Listen("tcp", s.BindAddress, &tls.Config{
|
|
Certificates: []tls.Certificate{cert},
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
s.Logger.Println("Listening on TLS:", listener.Addr().String())
|
|
s.ln = listener
|
|
} else {
|
|
listener, err := net.Listen("tcp", s.BindAddress)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
s.Logger.Println("Listening on:", listener.Addr().String())
|
|
s.ln = listener
|
|
}
|
|
s.httpln = newChanListener(s.ln.Addr())
|
|
|
|
// Begin listening for connections.
|
|
s.wg.Add(2)
|
|
go s.serveHTTP()
|
|
go s.serve()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Close closes the openTSDB service
|
|
func (s *Service) Close() error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if s.ln != nil {
|
|
return s.ln.Close()
|
|
}
|
|
|
|
if s.batcher != nil {
|
|
s.batcher.Stop()
|
|
}
|
|
close(s.done)
|
|
s.wg.Wait()
|
|
return nil
|
|
}
|
|
|
|
// SetLogger sets the internal logger to the logger passed in.
|
|
func (s *Service) SetLogger(l *log.Logger) { s.Logger = l }
|
|
|
|
// Err returns a channel for fatal errors that occur on the listener.
|
|
func (s *Service) Err() <-chan error { return s.err }
|
|
|
|
// Addr returns the listener's address. Returns nil if listener is closed.
|
|
func (s *Service) Addr() net.Addr {
|
|
if s.ln == nil {
|
|
return nil
|
|
}
|
|
return s.ln.Addr()
|
|
}
|
|
|
|
// serve serves the handler from the listener.
|
|
func (s *Service) serve() {
|
|
defer s.wg.Done()
|
|
|
|
for {
|
|
// Wait for next connection.
|
|
conn, err := s.ln.Accept()
|
|
if opErr, ok := err.(*net.OpError); ok && !opErr.Temporary() {
|
|
s.Logger.Println("openTSDB TCP listener closed")
|
|
return
|
|
} else if err != nil {
|
|
s.Logger.Println("error accepting openTSDB: ", err.Error())
|
|
continue
|
|
}
|
|
|
|
// Handle connection in separate goroutine.
|
|
go s.handleConn(conn)
|
|
}
|
|
}
|
|
|
|
// handleConn processes conn. This is run in a separate goroutine.
|
|
func (s *Service) handleConn(conn net.Conn) {
|
|
defer s.statMap.Add(statConnectionsActive, -1)
|
|
s.statMap.Add(statConnectionsActive, 1)
|
|
s.statMap.Add(statConnectionsHandled, 1)
|
|
|
|
// Read header into buffer to check if it's HTTP.
|
|
var buf bytes.Buffer
|
|
r := bufio.NewReader(io.TeeReader(conn, &buf))
|
|
|
|
// Attempt to parse connection as HTTP.
|
|
_, err := http.ReadRequest(r)
|
|
|
|
// Rebuild connection from buffer and remaining connection data.
|
|
bufr := bufio.NewReader(io.MultiReader(&buf, conn))
|
|
conn = &readerConn{Conn: conn, r: bufr}
|
|
|
|
// If no HTTP parsing error occurred then process as HTTP.
|
|
if err == nil {
|
|
s.statMap.Add(statHTTPConnectionsHandled, 1)
|
|
s.httpln.ch <- conn
|
|
return
|
|
}
|
|
|
|
// Otherwise handle in telnet format.
|
|
s.wg.Add(1)
|
|
s.handleTelnetConn(conn)
|
|
}
|
|
|
|
// handleTelnetConn accepts OpenTSDB's telnet protocol.
|
|
// Each telnet command consists of a line of the form:
|
|
// put sys.cpu.user 1356998400 42.5 host=webserver01 cpu=0
|
|
func (s *Service) handleTelnetConn(conn net.Conn) {
|
|
defer conn.Close()
|
|
defer s.wg.Done()
|
|
defer s.statMap.Add(statTelnetConnectionsActive, -1)
|
|
s.statMap.Add(statTelnetConnectionsActive, 1)
|
|
s.statMap.Add(statTelnetConnectionsHandled, 1)
|
|
|
|
// Get connection details.
|
|
remoteAddr := conn.RemoteAddr().String()
|
|
|
|
// Wrap connection in a text protocol reader.
|
|
r := textproto.NewReader(bufio.NewReader(conn))
|
|
for {
|
|
line, err := r.ReadLine()
|
|
if err != nil {
|
|
if err != io.EOF {
|
|
s.statMap.Add(statTelnetReadError, 1)
|
|
s.Logger.Println("error reading from openTSDB connection", err.Error())
|
|
}
|
|
return
|
|
}
|
|
s.statMap.Add(statTelnetPointsReceived, 1)
|
|
s.statMap.Add(statTelnetBytesReceived, int64(len(line)))
|
|
|
|
inputStrs := strings.Fields(line)
|
|
|
|
if len(inputStrs) == 1 && inputStrs[0] == "version" {
|
|
conn.Write([]byte("InfluxDB TSDB proxy"))
|
|
continue
|
|
}
|
|
|
|
if len(inputStrs) < 4 || inputStrs[0] != "put" {
|
|
s.statMap.Add(statTelnetBadLine, 1)
|
|
if s.LogPointErrors {
|
|
s.Logger.Printf("malformed line '%s' from %s", line, remoteAddr)
|
|
}
|
|
continue
|
|
}
|
|
|
|
measurement := inputStrs[1]
|
|
tsStr := inputStrs[2]
|
|
valueStr := inputStrs[3]
|
|
tagStrs := inputStrs[4:]
|
|
|
|
var t time.Time
|
|
ts, err := strconv.ParseInt(tsStr, 10, 64)
|
|
if err != nil {
|
|
s.statMap.Add(statTelnetBadTime, 1)
|
|
if s.LogPointErrors {
|
|
s.Logger.Printf("malformed time '%s' from %s", tsStr, remoteAddr)
|
|
}
|
|
}
|
|
|
|
switch len(tsStr) {
|
|
case 10:
|
|
t = time.Unix(ts, 0)
|
|
break
|
|
case 13:
|
|
t = time.Unix(ts/1000, (ts%1000)*1000)
|
|
break
|
|
default:
|
|
s.statMap.Add(statTelnetBadTime, 1)
|
|
if s.LogPointErrors {
|
|
s.Logger.Printf("bad time '%s' must be 10 or 13 chars, from %s ", tsStr, remoteAddr)
|
|
}
|
|
continue
|
|
}
|
|
|
|
tags := make(map[string]string)
|
|
for t := range tagStrs {
|
|
parts := strings.SplitN(tagStrs[t], "=", 2)
|
|
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
|
|
s.statMap.Add(statTelnetBadTag, 1)
|
|
if s.LogPointErrors {
|
|
s.Logger.Printf("malformed tag data '%v' from %s", tagStrs[t], remoteAddr)
|
|
}
|
|
continue
|
|
}
|
|
k := parts[0]
|
|
|
|
tags[k] = parts[1]
|
|
}
|
|
|
|
fields := make(map[string]interface{})
|
|
fv, err := strconv.ParseFloat(valueStr, 64)
|
|
if err != nil {
|
|
s.statMap.Add(statTelnetBadFloat, 1)
|
|
if s.LogPointErrors {
|
|
s.Logger.Printf("bad float '%s' from %s", valueStr, remoteAddr)
|
|
}
|
|
continue
|
|
}
|
|
fields["value"] = fv
|
|
|
|
pt, err := models.NewPoint(measurement, tags, fields, t)
|
|
if err != nil {
|
|
s.statMap.Add(statTelnetBadFloat, 1)
|
|
if s.LogPointErrors {
|
|
s.Logger.Printf("bad float '%s' from %s", valueStr, remoteAddr)
|
|
}
|
|
continue
|
|
}
|
|
s.batcher.In() <- pt
|
|
}
|
|
}
|
|
|
|
// serveHTTP handles connections in HTTP format.
|
|
func (s *Service) serveHTTP() {
|
|
srv := &http.Server{Handler: &Handler{
|
|
Database: s.Database,
|
|
RetentionPolicy: s.RetentionPolicy,
|
|
ConsistencyLevel: s.ConsistencyLevel,
|
|
PointsWriter: s.PointsWriter,
|
|
Logger: s.Logger,
|
|
statMap: s.statMap,
|
|
}}
|
|
srv.Serve(s.httpln)
|
|
}
|
|
|
|
// processBatches continually drains the given batcher and writes the batches to the database.
|
|
func (s *Service) processBatches(batcher *tsdb.PointBatcher) {
|
|
defer s.wg.Done()
|
|
for {
|
|
select {
|
|
case batch := <-batcher.Out():
|
|
if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{
|
|
Database: s.Database,
|
|
RetentionPolicy: s.RetentionPolicy,
|
|
ConsistencyLevel: s.ConsistencyLevel,
|
|
Points: batch,
|
|
}); err == nil {
|
|
s.statMap.Add(statBatchesTrasmitted, 1)
|
|
s.statMap.Add(statPointsTransmitted, int64(len(batch)))
|
|
} else {
|
|
s.Logger.Printf("failed to write point batch to database %q: %s", s.Database, err)
|
|
s.statMap.Add(statBatchesTransmitFail, 1)
|
|
}
|
|
|
|
case <-s.done:
|
|
return
|
|
}
|
|
}
|
|
}
|