influxdb/gather/scheduler.go

package gather

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"time"

	"github.com/influxdata/influxdb"
	"github.com/influxdata/influxdb/kit/tracing"
	"github.com/influxdata/influxdb/nats"
	"go.uber.org/zap"
)

// nats subjects
const (
	MetricsSubject    = "metrics"
	promTargetSubject = "promTarget"
)

// Scheduler is struct to run scrape jobs.
type Scheduler struct {
	Targets influxdb.ScraperTargetStoreService
	// Interval is between each metrics gathering event.
	Interval time.Duration
	// Timeout is the maxisium time duration allowed by each TCP request
	Timeout time.Duration

	// Publisher will send the gather requests and gathered metrics to the queue.
	Publisher nats.Publisher

	log *zap.Logger

	gather chan struct{}
}

// NewScheduler creates a new Scheduler and subscriptions for scraper jobs.
func NewScheduler(
	log *zap.Logger,
	numScrapers int,
	targets influxdb.ScraperTargetStoreService,
	p nats.Publisher,
	s nats.Subscriber,
	interval time.Duration,
	timeout time.Duration,
) (*Scheduler, error) {
	if interval == 0 {
		interval = 60 * time.Second
	}
	if timeout == 0 {
		timeout = 30 * time.Second
	}
	scheduler := &Scheduler{
		Targets:   targets,
		Interval:  interval,
		Timeout:   timeout,
		Publisher: p,
		log:       log,
		gather:    make(chan struct{}, 100),
	}

	for i := 0; i < numScrapers; i++ {
		err := s.Subscribe(promTargetSubject, "metrics", &handler{
			Scraper:   new(prometheusScraper),
			Publisher: p,
			log:       log,
		})
		if err != nil {
			return nil, err
		}
	}

	return scheduler, nil
}

// Run will retrieve scraper targets from the target storage,
// and publish them to nats job queue for gather.
func (s *Scheduler) Run(ctx context.Context) error {
	go func(s *Scheduler, ctx context.Context) {
		for {
			select {
			case <-ctx.Done():
				return
			case <-time.After(s.Interval): // TODO: change to ticker because of garbage collection
				s.gather <- struct{}{}
			}
		}
	}(s, ctx)
	return s.run(ctx)
}

func (s *Scheduler) run(ctx context.Context) error {
	for {
		select {
		case <-ctx.Done():
			return nil
		case <-s.gather:
			s.doGather(ctx)
		}
	}
}

func (s *Scheduler) doGather(ctx context.Context) {
	ctx, cancel := context.WithTimeout(ctx, s.Timeout)
	defer cancel()
	span, ctx := tracing.StartSpanFromContext(ctx)
	defer span.Finish()

	targets, err := s.Targets.ListTargets(ctx, influxdb.ScraperTargetFilter{})
	if err != nil {
		s.log.Error("Cannot list targets", zap.Error(err))
		tracing.LogError(span, err)
		return
	}
	for _, target := range targets {
		if err := requestScrape(target, s.Publisher); err != nil {
			s.log.Error("JSON encoding error", zap.Error(err))
			tracing.LogError(span, err)
		}
	}
}

func requestScrape(t influxdb.ScraperTarget, publisher nats.Publisher) error {
	buf := new(bytes.Buffer)
	err := json.NewEncoder(buf).Encode(t)
	if err != nil {
		return err
	}
	switch t.Type {
	case influxdb.PrometheusScraperType:
		return publisher.Publish(promTargetSubject, buf)
	}
	return fmt.Errorf("unsupported target scrape type: %s", t.Type)
}
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`package gather`

			`import (`
			`"bytes"`
			`"context"`
			`"encoding/json"`
			`"fmt"`
			`"time"`

add(gather): add writer interface 2019-01-10 17:39:37 +00:00			`"github.com/influxdata/influxdb"`
chore(tracing): Cleanup (#13296) * chore(tracing): Cleanup * broken test * fix unused var * fix test 2019-04-11 02:28:21 +00:00			`"github.com/influxdata/influxdb/kit/tracing"`
chore: rename imports from platform to influxdb I did this with a dumb editor macro, so some comments changed too. Also rename root package from platform to influxdb. In interest of minimizing risk, anyone importing the root package has now aliased it to "platform" so that no changes beyond imports were necessary in those files. Lastly, replace the old platform module to local path /dev/null so that nobody can accidentally reintroduce a platform dependency while migrating platform code to influxdb. 2019-01-08 00:37:16 +00:00			`"github.com/influxdata/influxdb/nats"`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`"go.uber.org/zap"`
			`)`

try to restore the test 2018-09-25 17:45:32 +00:00			`// nats subjects`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`const (`
			`MetricsSubject = "metrics"`
			`promTargetSubject = "promTarget"`
			`)`

			`// Scheduler is struct to run scrape jobs.`
			`type Scheduler struct {`
add(gather): add writer interface 2019-01-10 17:39:37 +00:00			`Targets influxdb.ScraperTargetStoreService`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`// Interval is between each metrics gathering event.`
			`Interval time.Duration`
			`// Timeout is the maxisium time duration allowed by each TCP request`
			`Timeout time.Duration`

			`// Publisher will send the gather requests and gathered metrics to the queue.`
			`Publisher nats.Publisher`

chore: Remove several instances of WithLogger (#15996) * chore: Remove several instances of WithLogger * chore: unexport Logger fields * chore: unexport some more Logger fields * chore: go fmt chore: fix test chore: s/logger/log chore: fix test chore: revert http.Handler.Handler constructor initialization * refactor: integrate review feedback, fix all test nop loggers * refactor: capitalize all log messages * refactor: rename two logger to log 2019-12-04 23:10:23 +00:00			`log *zap.Logger`
try to restore the test 2018-09-25 17:45:32 +00:00
			`gather chan struct{}`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`}`

			`// NewScheduler creates a new Scheduler and subscriptions for scraper jobs.`
			`func NewScheduler(`
chore: Remove several instances of WithLogger (#15996) * chore: Remove several instances of WithLogger * chore: unexport Logger fields * chore: unexport some more Logger fields * chore: go fmt chore: fix test chore: s/logger/log chore: fix test chore: revert http.Handler.Handler constructor initialization * refactor: integrate review feedback, fix all test nop loggers * refactor: capitalize all log messages * refactor: rename two logger to log 2019-12-04 23:10:23 +00:00			`log *zap.Logger,`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`numScrapers int,`
add(gather): add writer interface 2019-01-10 17:39:37 +00:00			`targets influxdb.ScraperTargetStoreService,`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`p nats.Publisher,`
			`s nats.Subscriber,`
			`interval time.Duration,`
			`timeout time.Duration,`
			`) (*Scheduler, error) {`
			`if interval == 0 {`
			`interval = 60 * time.Second`
			`}`
			`if timeout == 0 {`
			`timeout = 30 * time.Second`
			`}`
			`scheduler := &Scheduler{`
			`Targets: targets,`
			`Interval: interval,`
			`Timeout: timeout,`
			`Publisher: p,`
chore: Remove several instances of WithLogger (#15996) * chore: Remove several instances of WithLogger * chore: unexport Logger fields * chore: unexport some more Logger fields * chore: go fmt chore: fix test chore: s/logger/log chore: fix test chore: revert http.Handler.Handler constructor initialization * refactor: integrate review feedback, fix all test nop loggers * refactor: capitalize all log messages * refactor: rename two logger to log 2019-12-04 23:10:23 +00:00			`log: log,`
try to restore the test 2018-09-25 17:45:32 +00:00			`gather: make(chan struct{}, 100),`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`}`

			`for i := 0; i < numScrapers; i++ {`
fix(gather): add all subscribers to a single nats queue group Previously, scrapers would scrape the target 10 times. This was because each scraper subscriber was not put into a queue group. I've added the queue group "metrics" and now we the subcribers will only scrape the target once. Additionally, I moved to using nats memory instead of nats file store. We don't need durability for scraper runs across restarts. 2019-01-23 04:57:59 +00:00			`err := s.Subscribe(promTargetSubject, "metrics", &handler{`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`Scraper: new(prometheusScraper),`
			`Publisher: p,`
chore: Remove several instances of WithLogger (#15996) * chore: Remove several instances of WithLogger * chore: unexport Logger fields * chore: unexport some more Logger fields * chore: go fmt chore: fix test chore: s/logger/log chore: fix test chore: revert http.Handler.Handler constructor initialization * refactor: integrate review feedback, fix all test nop loggers * refactor: capitalize all log messages * refactor: rename two logger to log 2019-12-04 23:10:23 +00:00			`log: log,`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`})`
			`if err != nil {`
			`return nil, err`
			`}`
			`}`

			`return scheduler, nil`
			`}`

			`// Run will retrieve scraper targets from the target storage,`
			`// and publish them to nats job queue for gather.`
			`func (s *Scheduler) Run(ctx context.Context) error {`
refactor(cmd/influxd): use kit and close all services 2018-10-30 06:57:48 +00:00			`go func(s *Scheduler, ctx context.Context) {`
try to restore the test 2018-09-25 17:45:32 +00:00			`for {`
			`select {`
refactor(cmd/influxd): use kit and close all services 2018-10-30 06:57:48 +00:00			`case <-ctx.Done():`
			`return`
			`case <-time.After(s.Interval): // TODO: change to ticker because of garbage collection`
try to restore the test 2018-09-25 17:45:32 +00:00			`s.gather <- struct{}{}`
			`}`
			`}`
refactor(cmd/influxd): use kit and close all services 2018-10-30 06:57:48 +00:00			`}(s, ctx)`
try to restore the test 2018-09-25 17:45:32 +00:00			`return s.run(ctx)`
			`}`

			`func (s *Scheduler) run(ctx context.Context) error {`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`for {`
			`select {`
			`case <-ctx.Done():`
			`return nil`
try to restore the test 2018-09-25 17:45:32 +00:00			`case <-s.gather:`
chore(tracing): Cleanup (#13296) * chore(tracing): Cleanup * broken test * fix unused var * fix test 2019-04-11 02:28:21 +00:00			`s.doGather(ctx)`
			`}`
			`}`
			`}`

			`func (s *Scheduler) doGather(ctx context.Context) {`
			`ctx, cancel := context.WithTimeout(ctx, s.Timeout)`
			`defer cancel()`
			`span, ctx := tracing.StartSpanFromContext(ctx)`
			`defer span.Finish()`

feat(influxdb): add scraper filter 2019-04-12 16:45:48 +00:00			`targets, err := s.Targets.ListTargets(ctx, influxdb.ScraperTargetFilter{})`
chore(tracing): Cleanup (#13296) * chore(tracing): Cleanup * broken test * fix unused var * fix test 2019-04-11 02:28:21 +00:00			`if err != nil {`
chore: Remove several instances of WithLogger (#15996) * chore: Remove several instances of WithLogger * chore: unexport Logger fields * chore: unexport some more Logger fields * chore: go fmt chore: fix test chore: s/logger/log chore: fix test chore: revert http.Handler.Handler constructor initialization * refactor: integrate review feedback, fix all test nop loggers * refactor: capitalize all log messages * refactor: rename two logger to log 2019-12-04 23:10:23 +00:00			`s.log.Error("Cannot list targets", zap.Error(err))`
chore(tracing): Cleanup (#13296) * chore(tracing): Cleanup * broken test * fix unused var * fix test 2019-04-11 02:28:21 +00:00			`tracing.LogError(span, err)`
			`return`
			`}`
			`for _, target := range targets {`
			`if err := requestScrape(target, s.Publisher); err != nil {`
chore: Remove several instances of WithLogger (#15996) * chore: Remove several instances of WithLogger * chore: unexport Logger fields * chore: unexport some more Logger fields * chore: go fmt chore: fix test chore: s/logger/log chore: fix test chore: revert http.Handler.Handler constructor initialization * refactor: integrate review feedback, fix all test nop loggers * refactor: capitalize all log messages * refactor: rename two logger to log 2019-12-04 23:10:23 +00:00			`s.log.Error("JSON encoding error", zap.Error(err))`
chore(tracing): Cleanup (#13296) * chore(tracing): Cleanup * broken test * fix unused var * fix test 2019-04-11 02:28:21 +00:00			`tracing.LogError(span, err)`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`}`
			`}`
			`}`

add(gather): add writer interface 2019-01-10 17:39:37 +00:00			`func requestScrape(t influxdb.ScraperTarget, publisher nats.Publisher) error {`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`buf := new(bytes.Buffer)`
			`err := json.NewEncoder(buf).Encode(t)`
			`if err != nil {`
			`return err`
			`}`
			`switch t.Type {`
add(gather): add writer interface 2019-01-10 17:39:37 +00:00			`case influxdb.PrometheusScraperType:`
feat(scraper): add scraper lib 2018-09-07 15:45:28 +00:00			`return publisher.Publish(promTargetSubject, buf)`
			`}`
			`return fmt.Errorf("unsupported target scrape type: %s", t.Type)`
			`}`