142 lines
3.9 KiB
Go
142 lines
3.9 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
type SchedulerMetrics struct {
|
|
totalExecuteCalls prometheus.Counter
|
|
totalExecuteFailure prometheus.Counter
|
|
scheduleCalls prometheus.Counter
|
|
scheduleFails prometheus.Counter
|
|
releaseCalls prometheus.Counter
|
|
|
|
executingTasks *executingTasks
|
|
scheduleDelay prometheus.Summary
|
|
executeDelta prometheus.Summary
|
|
}
|
|
|
|
type executingTasks struct {
|
|
desc *prometheus.Desc
|
|
ts *TreeScheduler
|
|
}
|
|
|
|
func NewSchedulerMetrics(te *TreeScheduler) *SchedulerMetrics {
|
|
const namespace = "task"
|
|
const subsystem = "scheduler"
|
|
|
|
return &SchedulerMetrics{
|
|
totalExecuteCalls: prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "total_execution_calls",
|
|
Help: "Total number of executions across all tasks.",
|
|
}),
|
|
scheduleCalls: prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "total_schedule_calls",
|
|
Help: "Total number of schedule requests.",
|
|
}),
|
|
scheduleFails: prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "total_schedule_fails",
|
|
Help: "Total number of schedule requests that fail to schedule.",
|
|
}),
|
|
|
|
totalExecuteFailure: prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "total_execute_failure",
|
|
Help: "Total number of times an execution has failed.",
|
|
}),
|
|
|
|
releaseCalls: prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "total_release_calls",
|
|
Help: "Total number of release requests.",
|
|
}),
|
|
executingTasks: newExecutingTasks(te),
|
|
scheduleDelay: prometheus.NewSummary(prometheus.SummaryOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "schedule_delay",
|
|
Help: "The duration between when a Item should be scheduled and when it is told to execute.",
|
|
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
|
}),
|
|
|
|
executeDelta: prometheus.NewSummary(prometheus.SummaryOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "execute_delta",
|
|
Help: "The duration in seconds between a run starting and finishing.",
|
|
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
|
}),
|
|
}
|
|
}
|
|
|
|
// PrometheusCollectors satisfies the prom.PrometheusCollector interface.
|
|
func (em *SchedulerMetrics) PrometheusCollectors() []prometheus.Collector {
|
|
return []prometheus.Collector{
|
|
em.totalExecuteCalls,
|
|
em.totalExecuteFailure,
|
|
em.scheduleCalls,
|
|
em.scheduleFails,
|
|
em.releaseCalls,
|
|
em.executingTasks,
|
|
em.scheduleDelay,
|
|
em.executeDelta,
|
|
}
|
|
}
|
|
|
|
func (em *SchedulerMetrics) schedule(taskID ID) {
|
|
em.scheduleCalls.Inc()
|
|
}
|
|
|
|
func (em *SchedulerMetrics) scheduleFail(taskID ID) {
|
|
em.scheduleFails.Inc()
|
|
}
|
|
|
|
func (em *SchedulerMetrics) release(taskID ID) {
|
|
em.releaseCalls.Inc()
|
|
}
|
|
|
|
func (em *SchedulerMetrics) reportScheduleDelay(d time.Duration) {
|
|
em.scheduleDelay.Observe(d.Seconds())
|
|
}
|
|
|
|
func (em *SchedulerMetrics) reportExecution(err error, d time.Duration) {
|
|
em.totalExecuteCalls.Inc()
|
|
em.executeDelta.Observe(d.Seconds())
|
|
if err != nil {
|
|
em.totalExecuteFailure.Inc()
|
|
}
|
|
}
|
|
|
|
func newExecutingTasks(ts *TreeScheduler) *executingTasks {
|
|
return &executingTasks{
|
|
desc: prometheus.NewDesc(
|
|
"task_scheduler_current_execution",
|
|
"Number of tasks currently being executed",
|
|
nil,
|
|
prometheus.Labels{},
|
|
),
|
|
ts: ts,
|
|
}
|
|
}
|
|
|
|
// Describe returns all descriptions associated with the run collector.
|
|
func (r *executingTasks) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- r.desc
|
|
}
|
|
|
|
// Collect returns the current state of all metrics of the run collector.
|
|
func (r *executingTasks) Collect(ch chan<- prometheus.Metric) {
|
|
// TODO(docmerlin): fix this metric
|
|
ch <- prometheus.MustNewConstMetric(r.desc, prometheus.GaugeValue, float64(len(r.ts.workchans)))
|
|
}
|