feat: add the ability to log queries killed by `query-timeout` (#23978)

* feat: add the ability to log queries killed by `query-timeout`

* chore: update example config

* chore: improve logging details
pull/24036/head
Jeffrey Smith II 2023-01-10 13:37:11 -05:00 committed by GitHub
parent 19d83dcad9
commit 88d2760bc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 2 deletions

View File

@ -234,6 +234,7 @@ func NewServer(c *Config, buildInfo *BuildInfo) (*Server, error) {
s.QueryExecutor.TaskManager.QueryTimeout = time.Duration(c.Coordinator.QueryTimeout)
s.QueryExecutor.TaskManager.LogQueriesAfter = time.Duration(c.Coordinator.LogQueriesAfter)
s.QueryExecutor.TaskManager.MaxConcurrentQueries = c.Coordinator.MaxConcurrentQueries
s.QueryExecutor.TaskManager.LogTimedoutQueries = c.Coordinator.LogTimedOutQueries
// Initialize the monitor
s.Monitor.Version = s.buildInfo.Version
@ -465,8 +466,10 @@ func (s *Server) Open() error {
s.TSDBStore.WithLogger(s.Logger)
if s.config.Data.QueryLogEnabled {
s.QueryExecutor.WithLogger(s.Logger)
} else if s.config.Coordinator.LogQueriesAfter > 0 {
// Log long-running queries even if not logging all queries
} else if s.config.Coordinator.LogQueriesAfter > 0 || s.config.Coordinator.LogTimedOutQueries {
// If we need to do any logging, add a logger.
// The TaskManager properly handles both of the above configs
// so it only logs as is appropriate.
s.QueryExecutor.TaskManager.Logger = s.Logger
}
s.PointsWriter.WithLogger(s.Logger)

View File

@ -33,6 +33,7 @@ type Config struct {
MaxConcurrentQueries int `toml:"max-concurrent-queries"`
QueryTimeout toml.Duration `toml:"query-timeout"`
LogQueriesAfter toml.Duration `toml:"log-queries-after"`
LogTimedOutQueries bool `toml:"log-timedout-queries"`
MaxSelectPointN int `toml:"max-select-point"`
MaxSelectSeriesN int `toml:"max-select-series"`
MaxSelectBucketsN int `toml:"max-select-buckets"`
@ -48,6 +49,7 @@ func NewConfig() Config {
MaxSelectPointN: DefaultMaxSelectPointN,
MaxSelectSeriesN: DefaultMaxSelectSeriesN,
TerminationQueryLog: false,
LogTimedOutQueries: false,
}
}
@ -58,6 +60,7 @@ func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
"max-concurrent-queries": c.MaxConcurrentQueries,
"query-timeout": c.QueryTimeout,
"log-queries-after": c.LogQueriesAfter,
"log-timedout-queries": c.LogTimedOutQueries,
"max-select-point": c.MaxSelectPointN,
"max-select-series": c.MaxSelectSeriesN,
"max-select-buckets": c.MaxSelectBucketsN,

View File

@ -185,6 +185,9 @@
# discover slow or resource intensive queries. Setting the value to 0 disables the slow query logging.
# log-queries-after = "0s"
# Enables the logging of queries that are killed as a result of exceeding `query-timeout`
# log-timedout-queries = false
# The maximum number of points a SELECT can process. A value of 0 will make
# the maximum point count unlimited. This will only be checked every second so queries will not
# be aborted immediately when hitting the limit.

View File

@ -72,6 +72,9 @@ type TaskManager struct {
// If zero, slow queries will never be logged.
LogQueriesAfter time.Duration
// If true, queries that are killed due to `query-timeout` will be logged.
LogTimedoutQueries bool
// Maximum number of concurrent queries.
MaxConcurrentQueries int
@ -315,6 +318,14 @@ func (t *TaskManager) waitForQuery(qid uint64, interrupt <-chan struct{}, closin
t.queryError(qid, err)
case <-timerCh:
if t.LogTimedoutQueries {
t.Logger.Warn(
"query killed for exceeding timeout limit",
zap.String("query", t.queries[qid].query),
zap.String("database", t.queries[qid].database),
zap.String("timeout", prettyTime(t.QueryTimeout).String()),
)
}
t.queryError(qid, ErrQueryTimeoutLimitExceeded)
case <-interrupt:
// Query was manually closed so exit the select.