influxdb/task/backend/scheduler/scheduler_metrics.go

142 lines
3.9 KiB
Go

package scheduler
import (
"time"
"github.com/prometheus/client_golang/prometheus"
)
type SchedulerMetrics struct {
totalExecuteCalls prometheus.Counter
totalExecuteFailure prometheus.Counter
scheduleCalls prometheus.Counter
scheduleFails prometheus.Counter
releaseCalls prometheus.Counter
executingTasks *executingTasks
scheduleDelay prometheus.Summary
executeDelta prometheus.Summary
}
type executingTasks struct {
desc *prometheus.Desc
ts *TreeScheduler
}
func NewSchedulerMetrics(te *TreeScheduler) *SchedulerMetrics {
const namespace = "task"
const subsystem = "scheduler"
return &SchedulerMetrics{
totalExecuteCalls: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "total_execution_calls",
Help: "Total number of executions across all tasks.",
}),
scheduleCalls: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "total_schedule_calls",
Help: "Total number of schedule requests.",
}),
scheduleFails: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "total_schedule_fails",
Help: "Total number of schedule requests that fail to schedule.",
}),
totalExecuteFailure: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "total_execute_failure",
Help: "Total number of times an execution has failed.",
}),
releaseCalls: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "total_release_calls",
Help: "Total number of release requests.",
}),
executingTasks: newExecutingTasks(te),
scheduleDelay: prometheus.NewSummary(prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "schedule_delay",
Help: "The duration between when a Item should be scheduled and when it is told to execute.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}),
executeDelta: prometheus.NewSummary(prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "execute_delta",
Help: "The duration in seconds between a run starting and finishing.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}),
}
}
// PrometheusCollectors satisfies the prom.PrometheusCollector interface.
func (em *SchedulerMetrics) PrometheusCollectors() []prometheus.Collector {
return []prometheus.Collector{
em.totalExecuteCalls,
em.totalExecuteFailure,
em.scheduleCalls,
em.scheduleFails,
em.releaseCalls,
em.executingTasks,
em.scheduleDelay,
em.executeDelta,
}
}
func (em *SchedulerMetrics) schedule(taskID ID) {
em.scheduleCalls.Inc()
}
func (em *SchedulerMetrics) scheduleFail(taskID ID) {
em.scheduleFails.Inc()
}
func (em *SchedulerMetrics) release(taskID ID) {
em.releaseCalls.Inc()
}
func (em *SchedulerMetrics) reportScheduleDelay(d time.Duration) {
em.scheduleDelay.Observe(d.Seconds())
}
func (em *SchedulerMetrics) reportExecution(err error, d time.Duration) {
em.totalExecuteCalls.Inc()
em.executeDelta.Observe(d.Seconds())
if err != nil {
em.totalExecuteFailure.Inc()
}
}
func newExecutingTasks(ts *TreeScheduler) *executingTasks {
return &executingTasks{
desc: prometheus.NewDesc(
"task_scheduler_current_execution",
"Number of tasks currently being executed",
nil,
prometheus.Labels{},
),
ts: ts,
}
}
// Describe returns all descriptions associated with the run collector.
func (r *executingTasks) Describe(ch chan<- *prometheus.Desc) {
ch <- r.desc
}
// Collect returns the current state of all metrics of the run collector.
func (r *executingTasks) Collect(ch chan<- prometheus.Metric) {
// TODO(docmerlin): fix this metric
ch <- prometheus.MustNewConstMetric(r.desc, prometheus.GaugeValue, float64(len(r.ts.workchans)))
}