feat: add the ability to log queries killed by `query-timeout` (#23978)
* feat: add the ability to log queries killed by `query-timeout` * chore: update example config * chore: improve logging detailspull/24036/head
parent
19d83dcad9
commit
88d2760bc8
|
@ -234,6 +234,7 @@ func NewServer(c *Config, buildInfo *BuildInfo) (*Server, error) {
|
||||||
s.QueryExecutor.TaskManager.QueryTimeout = time.Duration(c.Coordinator.QueryTimeout)
|
s.QueryExecutor.TaskManager.QueryTimeout = time.Duration(c.Coordinator.QueryTimeout)
|
||||||
s.QueryExecutor.TaskManager.LogQueriesAfter = time.Duration(c.Coordinator.LogQueriesAfter)
|
s.QueryExecutor.TaskManager.LogQueriesAfter = time.Duration(c.Coordinator.LogQueriesAfter)
|
||||||
s.QueryExecutor.TaskManager.MaxConcurrentQueries = c.Coordinator.MaxConcurrentQueries
|
s.QueryExecutor.TaskManager.MaxConcurrentQueries = c.Coordinator.MaxConcurrentQueries
|
||||||
|
s.QueryExecutor.TaskManager.LogTimedoutQueries = c.Coordinator.LogTimedOutQueries
|
||||||
|
|
||||||
// Initialize the monitor
|
// Initialize the monitor
|
||||||
s.Monitor.Version = s.buildInfo.Version
|
s.Monitor.Version = s.buildInfo.Version
|
||||||
|
@ -465,8 +466,10 @@ func (s *Server) Open() error {
|
||||||
s.TSDBStore.WithLogger(s.Logger)
|
s.TSDBStore.WithLogger(s.Logger)
|
||||||
if s.config.Data.QueryLogEnabled {
|
if s.config.Data.QueryLogEnabled {
|
||||||
s.QueryExecutor.WithLogger(s.Logger)
|
s.QueryExecutor.WithLogger(s.Logger)
|
||||||
} else if s.config.Coordinator.LogQueriesAfter > 0 {
|
} else if s.config.Coordinator.LogQueriesAfter > 0 || s.config.Coordinator.LogTimedOutQueries {
|
||||||
// Log long-running queries even if not logging all queries
|
// If we need to do any logging, add a logger.
|
||||||
|
// The TaskManager properly handles both of the above configs
|
||||||
|
// so it only logs as is appropriate.
|
||||||
s.QueryExecutor.TaskManager.Logger = s.Logger
|
s.QueryExecutor.TaskManager.Logger = s.Logger
|
||||||
}
|
}
|
||||||
s.PointsWriter.WithLogger(s.Logger)
|
s.PointsWriter.WithLogger(s.Logger)
|
||||||
|
|
|
@ -33,6 +33,7 @@ type Config struct {
|
||||||
MaxConcurrentQueries int `toml:"max-concurrent-queries"`
|
MaxConcurrentQueries int `toml:"max-concurrent-queries"`
|
||||||
QueryTimeout toml.Duration `toml:"query-timeout"`
|
QueryTimeout toml.Duration `toml:"query-timeout"`
|
||||||
LogQueriesAfter toml.Duration `toml:"log-queries-after"`
|
LogQueriesAfter toml.Duration `toml:"log-queries-after"`
|
||||||
|
LogTimedOutQueries bool `toml:"log-timedout-queries"`
|
||||||
MaxSelectPointN int `toml:"max-select-point"`
|
MaxSelectPointN int `toml:"max-select-point"`
|
||||||
MaxSelectSeriesN int `toml:"max-select-series"`
|
MaxSelectSeriesN int `toml:"max-select-series"`
|
||||||
MaxSelectBucketsN int `toml:"max-select-buckets"`
|
MaxSelectBucketsN int `toml:"max-select-buckets"`
|
||||||
|
@ -48,6 +49,7 @@ func NewConfig() Config {
|
||||||
MaxSelectPointN: DefaultMaxSelectPointN,
|
MaxSelectPointN: DefaultMaxSelectPointN,
|
||||||
MaxSelectSeriesN: DefaultMaxSelectSeriesN,
|
MaxSelectSeriesN: DefaultMaxSelectSeriesN,
|
||||||
TerminationQueryLog: false,
|
TerminationQueryLog: false,
|
||||||
|
LogTimedOutQueries: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,6 +60,7 @@ func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
|
||||||
"max-concurrent-queries": c.MaxConcurrentQueries,
|
"max-concurrent-queries": c.MaxConcurrentQueries,
|
||||||
"query-timeout": c.QueryTimeout,
|
"query-timeout": c.QueryTimeout,
|
||||||
"log-queries-after": c.LogQueriesAfter,
|
"log-queries-after": c.LogQueriesAfter,
|
||||||
|
"log-timedout-queries": c.LogTimedOutQueries,
|
||||||
"max-select-point": c.MaxSelectPointN,
|
"max-select-point": c.MaxSelectPointN,
|
||||||
"max-select-series": c.MaxSelectSeriesN,
|
"max-select-series": c.MaxSelectSeriesN,
|
||||||
"max-select-buckets": c.MaxSelectBucketsN,
|
"max-select-buckets": c.MaxSelectBucketsN,
|
||||||
|
|
|
@ -185,6 +185,9 @@
|
||||||
# discover slow or resource intensive queries. Setting the value to 0 disables the slow query logging.
|
# discover slow or resource intensive queries. Setting the value to 0 disables the slow query logging.
|
||||||
# log-queries-after = "0s"
|
# log-queries-after = "0s"
|
||||||
|
|
||||||
|
# Enables the logging of queries that are killed as a result of exceeding `query-timeout`
|
||||||
|
# log-timedout-queries = false
|
||||||
|
|
||||||
# The maximum number of points a SELECT can process. A value of 0 will make
|
# The maximum number of points a SELECT can process. A value of 0 will make
|
||||||
# the maximum point count unlimited. This will only be checked every second so queries will not
|
# the maximum point count unlimited. This will only be checked every second so queries will not
|
||||||
# be aborted immediately when hitting the limit.
|
# be aborted immediately when hitting the limit.
|
||||||
|
|
|
@ -72,6 +72,9 @@ type TaskManager struct {
|
||||||
// If zero, slow queries will never be logged.
|
// If zero, slow queries will never be logged.
|
||||||
LogQueriesAfter time.Duration
|
LogQueriesAfter time.Duration
|
||||||
|
|
||||||
|
// If true, queries that are killed due to `query-timeout` will be logged.
|
||||||
|
LogTimedoutQueries bool
|
||||||
|
|
||||||
// Maximum number of concurrent queries.
|
// Maximum number of concurrent queries.
|
||||||
MaxConcurrentQueries int
|
MaxConcurrentQueries int
|
||||||
|
|
||||||
|
@ -315,6 +318,14 @@ func (t *TaskManager) waitForQuery(qid uint64, interrupt <-chan struct{}, closin
|
||||||
|
|
||||||
t.queryError(qid, err)
|
t.queryError(qid, err)
|
||||||
case <-timerCh:
|
case <-timerCh:
|
||||||
|
if t.LogTimedoutQueries {
|
||||||
|
t.Logger.Warn(
|
||||||
|
"query killed for exceeding timeout limit",
|
||||||
|
zap.String("query", t.queries[qid].query),
|
||||||
|
zap.String("database", t.queries[qid].database),
|
||||||
|
zap.String("timeout", prettyTime(t.QueryTimeout).String()),
|
||||||
|
)
|
||||||
|
}
|
||||||
t.queryError(qid, ErrQueryTimeoutLimitExceeded)
|
t.queryError(qid, ErrQueryTimeoutLimitExceeded)
|
||||||
case <-interrupt:
|
case <-interrupt:
|
||||||
// Query was manually closed so exit the select.
|
// Query was manually closed so exit the select.
|
||||||
|
|
Loading…
Reference in New Issue