influxdb/query/task_manager.go

339 lines
8.2 KiB
Go

package query
import (
"bytes"
"context"
"encoding/json"
"fmt"
"sync"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxql"
"go.uber.org/zap"
)
const (
// DefaultQueryTimeout is the default timeout for executing a query.
// A value of zero will have no query timeout.
DefaultQueryTimeout = time.Duration(0)
)
type TaskStatus int
const (
// RunningTask is set when the task is running.
RunningTask TaskStatus = iota + 1
// KilledTask is set when the task is killed, but resources are still
// being used.
KilledTask
)
var (
queryFieldNames []string = []string{"qid", "query", "database", "duration", "status"}
)
func (t TaskStatus) String() string {
switch t {
case RunningTask:
return "running"
case KilledTask:
return "killed"
default:
return "unknown"
}
}
func (t TaskStatus) MarshalJSON() ([]byte, error) {
s := t.String()
return json.Marshal(s)
}
func (t *TaskStatus) UnmarshalJSON(data []byte) error {
if bytes.Equal(data, []byte("running")) {
*t = RunningTask
} else if bytes.Equal(data, []byte("killed")) {
*t = KilledTask
} else if bytes.Equal(data, []byte("unknown")) {
*t = TaskStatus(0)
} else {
return fmt.Errorf("unknown task status: %s", string(data))
}
return nil
}
// TaskManager takes care of all aspects related to managing running queries.
type TaskManager struct {
// Query execution timeout.
QueryTimeout time.Duration
// Log queries if they are slower than this time.
// If zero, slow queries will never be logged.
LogQueriesAfter time.Duration
// Maximum number of concurrent queries.
MaxConcurrentQueries int
// Logger to use for all logging.
// Defaults to discarding all log output.
Logger *zap.Logger
// Used for managing and tracking running queries.
queries map[uint64]*Task
nextID uint64
mu sync.RWMutex
shutdown bool
}
// NewTaskManager creates a new TaskManager.
func NewTaskManager() *TaskManager {
return &TaskManager{
QueryTimeout: DefaultQueryTimeout,
Logger: zap.NewNop(),
queries: make(map[uint64]*Task),
nextID: 1,
}
}
// ExecuteStatement executes a statement containing one of the task management queries.
func (t *TaskManager) ExecuteStatement(ctx *ExecutionContext, stmt influxql.Statement) error {
switch stmt := stmt.(type) {
case *influxql.ShowQueriesStatement:
rows, err := t.executeShowQueriesStatement(stmt)
if err != nil {
return err
}
ctx.Send(&Result{
Series: rows,
})
case *influxql.KillQueryStatement:
var messages []*Message
if ctx.ReadOnly {
messages = append(messages, ReadOnlyWarning(stmt.String()))
}
if err := t.executeKillQueryStatement(stmt); err != nil {
return err
}
ctx.Send(&Result{
Messages: messages,
})
default:
return ErrInvalidQuery
}
return nil
}
func (t *TaskManager) executeKillQueryStatement(stmt *influxql.KillQueryStatement) error {
return t.KillQuery(stmt.QueryID)
}
func (t *TaskManager) executeShowQueriesStatement(q *influxql.ShowQueriesStatement) (models.Rows, error) {
t.mu.RLock()
defer t.mu.RUnlock()
now := time.Now()
values := make([][]interface{}, 0, len(t.queries))
for id, qi := range t.queries {
d := now.Sub(qi.startTime)
d = prettyTime(d)
values = append(values, []interface{}{id, qi.query, qi.database, d.String(), qi.status.String()})
}
return []*models.Row{{
Columns: queryFieldNames,
Values: values,
}}, nil
}
func prettyTime(d time.Duration) time.Duration {
switch {
case d >= time.Second:
d = d - (d % time.Second)
case d >= time.Millisecond:
d = d - (d % time.Millisecond)
case d >= time.Microsecond:
d = d - (d % time.Microsecond)
}
return d
}
func (t *TaskManager) LogCurrentQueries(logFunc func(string, ...zap.Field)) {
for _, queryInfo := range t.Queries() {
logFunc("Current Queries", zap.Uint64(queryFieldNames[0], queryInfo.ID),
zap.String(queryFieldNames[1], queryInfo.Query),
zap.String(queryFieldNames[2], queryInfo.Database),
zap.String(queryFieldNames[3], prettyTime(queryInfo.Duration).String()),
zap.String(queryFieldNames[4], queryInfo.Status.String()))
}
}
func (t *TaskManager) queryError(qid uint64, err error) {
t.mu.RLock()
query := t.queries[qid]
t.mu.RUnlock()
if query != nil {
query.setError(err)
}
}
// AttachQuery attaches a running query to be managed by the TaskManager.
// Returns the query id of the newly attached query or an error if it was
// unable to assign a query id or attach the query to the TaskManager.
// This function also returns a channel that will be closed when this
// query finishes running.
//
// After a query finishes running, the system is free to reuse a query id.
func (t *TaskManager) AttachQuery(q *influxql.Query, opt ExecutionOptions, interrupt <-chan struct{}) (*ExecutionContext, func(), error) {
t.mu.Lock()
defer t.mu.Unlock()
if t.shutdown {
return nil, nil, ErrQueryEngineShutdown
}
if t.MaxConcurrentQueries > 0 && len(t.queries) >= t.MaxConcurrentQueries {
return nil, nil, ErrMaxConcurrentQueriesLimitExceeded(len(t.queries), t.MaxConcurrentQueries)
}
qid := t.nextID
query := &Task{
query: q.String(),
database: opt.Database,
status: RunningTask,
startTime: time.Now(),
closing: make(chan struct{}),
monitorCh: make(chan error),
}
t.queries[qid] = query
go t.waitForQuery(qid, query.closing, interrupt, query.monitorCh)
if t.LogQueriesAfter != 0 {
go query.monitor(func(closing <-chan struct{}) error {
timer := time.NewTimer(t.LogQueriesAfter)
defer timer.Stop()
select {
case <-timer.C:
t.Logger.Warn(fmt.Sprintf("Detected slow query: %s (qid: %d, database: %s, threshold: %s)",
query.query, qid, query.database, t.LogQueriesAfter))
case <-closing:
}
return nil
})
}
t.nextID++
ctx := &ExecutionContext{
Context: context.Background(),
QueryID: qid,
task: query,
ExecutionOptions: opt,
}
ctx.watch()
return ctx, func() { t.DetachQuery(qid) }, nil
}
// KillQuery enters a query into the killed state and closes the channel
// from the TaskManager. This method can be used to forcefully terminate a
// running query.
func (t *TaskManager) KillQuery(qid uint64) error {
t.mu.Lock()
query := t.queries[qid]
t.mu.Unlock()
if query == nil {
return fmt.Errorf("no such query id: %d", qid)
}
return query.kill()
}
// DetachQuery removes a query from the query table. If the query is not in the
// killed state, this will also close the related channel.
func (t *TaskManager) DetachQuery(qid uint64) error {
t.mu.Lock()
defer t.mu.Unlock()
query := t.queries[qid]
if query == nil {
return fmt.Errorf("no such query id: %d", qid)
}
query.close()
delete(t.queries, qid)
return nil
}
// QueryInfo represents the information for a query.
type QueryInfo struct {
ID uint64 `json:"id"`
Query string `json:"query"`
Database string `json:"database"`
Duration time.Duration `json:"duration"`
Status TaskStatus `json:"status"`
}
// Queries returns a list of all running queries with information about them.
func (t *TaskManager) Queries() []QueryInfo {
t.mu.RLock()
defer t.mu.RUnlock()
now := time.Now()
queries := make([]QueryInfo, 0, len(t.queries))
for id, qi := range t.queries {
queries = append(queries, QueryInfo{
ID: id,
Query: qi.query,
Database: qi.database,
Duration: now.Sub(qi.startTime),
Status: qi.status,
})
}
return queries
}
func (t *TaskManager) waitForQuery(qid uint64, interrupt <-chan struct{}, closing <-chan struct{}, monitorCh <-chan error) {
var timerCh <-chan time.Time
if t.QueryTimeout != 0 {
timer := time.NewTimer(t.QueryTimeout)
timerCh = timer.C
defer timer.Stop()
}
select {
case <-closing:
t.queryError(qid, ErrQueryInterrupted)
case err := <-monitorCh:
if err == nil {
break
}
t.queryError(qid, err)
case <-timerCh:
t.queryError(qid, ErrQueryTimeoutLimitExceeded)
case <-interrupt:
// Query was manually closed so exit the select.
return
}
t.KillQuery(qid)
}
// Close kills all running queries and prevents new queries from being attached.
func (t *TaskManager) Close() error {
t.mu.Lock()
defer t.mu.Unlock()
t.shutdown = true
for _, query := range t.queries {
query.setError(ErrQueryEngineShutdown)
query.close()
}
t.queries = nil
return nil
}