feat(tasks): use influxdb errors for executor metrics (#14926)

pull/15110/head
Alirie Gray 2019-09-10 12:48:55 -07:00 committed by GitHub
parent cc84a43cea
commit 645df57102
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 45 additions and 14 deletions

View File

@ -13,7 +13,7 @@ type ExecutorMetrics struct {
activeRuns prometheus.Collector
queueDelta *prometheus.SummaryVec
runDuration *prometheus.SummaryVec
errorsCounter prometheus.Counter
errorsCounter *prometheus.CounterVec
manualRunsCounter *prometheus.CounterVec
resumeRunsCounter *prometheus.CounterVec
}
@ -55,12 +55,12 @@ func NewExecutorMetrics(te *TaskExecutor) *ExecutorMetrics {
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}, []string{"taskID"}),
errorsCounter: prometheus.NewCounter(prometheus.CounterOpts{
errorsCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "errors_counter",
Help: "The number of errors thrown by the executor.",
}),
Help: "The number of errors thrown by the executor with the type of error (ex. Flux compile, query, etc).",
}, []string{"errorType"}),
manualRunsCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
@ -131,8 +131,8 @@ func (em *ExecutorMetrics) FinishRun(taskID influxdb.ID, status backend.RunStatu
}
// LogError increments the count of errors.
func (em *ExecutorMetrics) LogError() {
em.errorsCounter.Inc()
func (em *ExecutorMetrics) LogError(err *influxdb.Error) {
em.errorsCounter.WithLabelValues(err.Code)
}
// Describe returns all descriptions associated with the run collector.

View File

@ -316,7 +316,7 @@ func (w *worker) start(p *Promise) {
w.te.metrics.StartRun(p.task.ID, time.Since(p.createdAt))
}
func (w *worker) finish(p *Promise, rs backend.RunStatus, err error) {
func (w *worker) finish(p *Promise, rs backend.RunStatus, err *influxdb.Error) {
// trace
span, ctx := tracing.StartSpanFromContext(p.ctx)
defer span.Finish()
@ -332,9 +332,9 @@ func (w *worker) finish(p *Promise, rs backend.RunStatus, err error) {
w.te.metrics.FinishRun(p.task.ID, rs, rd)
// log error
if err != nil {
if err.Err != nil {
w.te.logger.Debug("execution failed", zap.Error(err), zap.String("taskID", p.task.ID.String()))
w.te.metrics.LogError()
w.te.metrics.LogError(err)
p.err = err
} else {
w.te.logger.Debug("Completed successfully", zap.String("taskID", p.task.ID.String()))
@ -350,13 +350,13 @@ func (w *worker) executeQuery(p *Promise) {
pkg, err := flux.Parse(p.task.Flux)
if err != nil {
w.finish(p, backend.RunFail, err)
w.finish(p, backend.RunFail, influxdb.ErrFluxParseError(err))
return
}
sf, err := p.run.ScheduledForTime()
if err != nil {
w.finish(p, backend.RunFail, err)
w.finish(p, backend.RunFail, influxdb.ErrTaskTimeParse(err))
return
}
@ -372,7 +372,7 @@ func (w *worker) executeQuery(p *Promise) {
it, err := w.te.qs.Query(ctx, req)
if err != nil {
// Assume the error should not be part of the runResult.
w.finish(p, backend.RunFail, err)
w.finish(p, backend.RunFail, influxdb.ErrQueryError(err))
return
}
@ -400,7 +400,7 @@ func (w *worker) executeQuery(p *Promise) {
w.te.tcs.AddRunLog(p.ctx, p.task.ID, p.run.ID, time.Now(), string(b))
}
w.finish(p, backend.RunSuccess, runErr)
w.finish(p, backend.RunSuccess, influxdb.ErrResultIteratorError(runErr))
}
// RunsActive returns the current number of workers, which is equivalent to

View File

@ -267,6 +267,7 @@ func testLimitFunc(t *testing.T) {
t.Fatal(err)
}
forcedErr := errors.New("forced")
forcedQueryErr := influxdb.ErrQueryError(forcedErr)
tes.svc.FailNextQuery(forcedErr)
count := 0
@ -285,7 +286,7 @@ func testLimitFunc(t *testing.T) {
<-promise.Done()
if got := promise.Error(); got != forcedErr {
if got := promise.Error(); got.Error() != forcedQueryErr.Error() {
t.Fatal("failed to get failure from forced error")
}

View File

@ -86,6 +86,36 @@ var (
}
)
// ErrFluxParseError is returned when an error is thrown by Flux.Parse in the task executor
func ErrFluxParseError(err error) *Error {
return &Error{
Code: EInvalid,
Msg: fmt.Sprintf("could not parse Flux script; Err: %v", err),
Op: "kv/taskExecutor",
Err: err,
}
}
// ErrQueryError is returned when an error is thrown by Query service in the task executor
func ErrQueryError(err error) *Error {
return &Error{
Code: EInternal,
Msg: fmt.Sprintf("unexpected error from queryd; Err: %v", err),
Op: "kv/taskExecutor",
Err: err,
}
}
// ErrResultIteratorError is returned when an error is thrown by exhaustResultIterators in the executor
func ErrResultIteratorError(err error) *Error {
return &Error{
Code: EInternal,
Msg: fmt.Sprintf("Error exhausting result iterator; Err: %v", err),
Op: "kv/taskExecutor",
Err: err,
}
}
func ErrInternalTaskServiceError(err error) *Error {
return &Error{
Code: EInternal,