diff --git a/Makefile b/Makefile index 78bffc3b8d..006273cac4 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ GO_ARGS=-tags '$(GO_TAGS)' # Test vars can be used by all recursive Makefiles export GOOS=$(shell go env GOOS) export GO_BUILD=env GO111MODULE=on go build $(GO_ARGS) -export GO_TEST=env GO111MODULE=on go test $(GO_ARGS) +export GO_TEST=env GOTRACEBACK=all GO111MODULE=on go test $(GO_ARGS) # Do not add GO111MODULE=on to the call to go generate so it doesn't pollute the environment. export GO_GENERATE=go generate $(GO_ARGS) export GO_VET=env GO111MODULE=on go vet $(GO_ARGS) @@ -120,7 +120,7 @@ test-integration: test: test-go test-js test-go-race: - $(GO_TEST) -race -count=1 ./... + $(GO_TEST) -v -race -count=1 ./... vet: $(GO_VET) -v ./... diff --git a/cmd/influxd/main.go b/cmd/influxd/main.go index 65be31b065..d0a5e402ae 100644 --- a/cmd/influxd/main.go +++ b/cmd/influxd/main.go @@ -265,12 +265,13 @@ func (m *Main) run(ctx context.Context) (err error) { { m.engine = storage.NewEngine(m.enginePath, storage.NewConfig(), storage.WithRetentionEnforcer(bucketSvc)) m.engine.WithLogger(m.logger) - reg.MustRegister(m.engine.PrometheusCollectors()...) if err := m.engine.Open(); err != nil { m.logger.Error("failed to open engine", zap.Error(err)) return err } + // The Engine's metrics must be registered after it opens. + reg.MustRegister(m.engine.PrometheusCollectors()...) pointsWriter = m.engine diff --git a/pkg/rhh/metrics.go b/pkg/rhh/metrics.go new file mode 100644 index 0000000000..947743d996 --- /dev/null +++ b/pkg/rhh/metrics.go @@ -0,0 +1,118 @@ +package rhh + +import ( + "sort" + + "github.com/prometheus/client_golang/prometheus" +) + +type Metrics struct { + LoadFactor *prometheus.GaugeVec // Load factor of the hashmap. + Size *prometheus.GaugeVec // Number of items in hashmap. + GetDuration *prometheus.HistogramVec // Sample of get times. + LastGetDuration *prometheus.GaugeVec // Sample of most recent get time. + InsertDuration *prometheus.HistogramVec // Sample of insertion times. + LastInsertDuration *prometheus.GaugeVec // Sample of most recent insertion time. + LastGrowDuration *prometheus.GaugeVec // Most recent growth time. + MeanProbeCount *prometheus.GaugeVec // Average number of probes for each element. + + // These metrics have an extra label status = {"hit", "miss"} + Gets *prometheus.CounterVec // Number of times item retrieved. + Puts *prometheus.CounterVec // Number of times item inserted. +} + +// NewMetrics initialises prometheus metrics for tracking an RHH hashmap. +func NewMetrics(namespace, subsystem string, labels prometheus.Labels) *Metrics { + var names []string + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + getPutNames := append(append([]string(nil), names...), "status") + sort.Strings(getPutNames) + + return &Metrics{ + LoadFactor: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "load_percent", + Help: "Load factor of the hashmap.", + }, names), + Size: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "size", + Help: "Number of items in the hashmap.", + }, names), + GetDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "get_duration_ns", + Help: "Times taken to retrieve elements in nanoseconds (sampled every 10% of retrievals).", + // 15 buckets spaced exponentially between 100 and ~30,000. + Buckets: prometheus.ExponentialBuckets(100., 1.5, 15), + }, names), + LastGetDuration: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "get_duration_last_ns", + Help: "Last retrieval duration in nanoseconds (sampled every 10% of retrievals)", + }, names), + InsertDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "put_duration_ns", + Help: "Times taken to insert elements in nanoseconds (sampled every 10% of insertions).", + // 15 buckets spaced exponentially between 100 and ~30,000. + Buckets: prometheus.ExponentialBuckets(100., 1.5, 15), + }, names), + LastInsertDuration: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "put_duration_last_ns", + Help: "Last insertion duration in nanoseconds (sampled every 10% of insertions)", + }, names), + LastGrowDuration: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "grow_duration_s", + Help: "Time in seconds to last grow the hashmap.", + }, names), + MeanProbeCount: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "mean_probes", + Help: "Average probe count of all elements (sampled every 0.5% of insertions).", + }, names), + + Gets: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "get_total", + Help: "Number of times elements retrieved.", + }, getPutNames), + Puts: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "put_total", + Help: "Number of times elements inserted.", + }, getPutNames), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *Metrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.LoadFactor, + m.Size, + m.GetDuration, + m.LastGetDuration, + m.InsertDuration, + m.LastInsertDuration, + m.LastGrowDuration, + m.MeanProbeCount, + m.Gets, + m.Puts, + } +} diff --git a/pkg/rhh/metrics_test.go b/pkg/rhh/metrics_test.go new file mode 100644 index 0000000000..263534d50b --- /dev/null +++ b/pkg/rhh/metrics_test.go @@ -0,0 +1,108 @@ +package rhh + +import ( + "testing" + + "github.com/influxdata/platform/kit/prom/promtest" + "github.com/prometheus/client_golang/prometheus" +) + +func TestMetrics_Metrics(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := NewMetrics("test", "sub", prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newRHHTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) + t2 := newRHHTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + base := "test_sub_" + + // All the metric names + gauges := []string{ + base + "load_percent", + base + "size", + base + "get_duration_last_ns", + base + "put_duration_last_ns", + base + "grow_duration_s", + base + "mean_probes", + } + + counters := []string{ + base + "get_total", + base + "put_total", + } + + histograms := []string{ + base + "get_duration_ns", + base + "put_duration_ns", + } + + // Generate some measurements. + for i, tracker := range []*rhhTracker{t1, t2} { + tracker.SetLoadFactor(float64(i + len(gauges[0]))) + tracker.SetSize(uint64(i + len(gauges[1]))) + + labels := tracker.Labels() + tracker.metrics.LastGetDuration.With(labels).Set(float64(i + len(gauges[2]))) + tracker.metrics.LastInsertDuration.With(labels).Set(float64(i + len(gauges[3]))) + tracker.metrics.LastGrowDuration.With(labels).Set(float64(i + len(gauges[4]))) + tracker.SetProbeCount(float64(i + len(gauges[5]))) + + labels = tracker.Labels() + labels["status"] = "ok" + tracker.metrics.Gets.With(labels).Add(float64(i + len(counters[0]))) + tracker.metrics.Puts.With(labels).Add(float64(i + len(counters[1]))) + + labels = tracker.Labels() + tracker.metrics.GetDuration.With(labels).Observe(float64(i + len(histograms[0]))) + tracker.metrics.InsertDuration.With(labels).Observe(float64(i + len(histograms[1]))) + } + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + // The label variants for the two caches. + labelVariants := []prometheus.Labels{ + prometheus.Labels{"engine_id": "0", "node_id": "0"}, + prometheus.Labels{"engine_id": "1", "node_id": "0"}, + } + + for i, labels := range labelVariants { + for _, name := range gauges { + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetGauge().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range counters { + exp := float64(i + len(name)) + + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["status"] = "ok" + + metric := promtest.MustFindMetric(t, mfs, name, l) + if got := metric.GetCounter().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range histograms { + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetHistogram().GetSampleSum(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + } +} diff --git a/pkg/rhh/rhh.go b/pkg/rhh/rhh.go index bb8db4be7c..bd986e86ee 100644 --- a/pkg/rhh/rhh.go +++ b/pkg/rhh/rhh.go @@ -3,9 +3,12 @@ package rhh import ( "bytes" "encoding/binary" + "math/rand" "sort" + "time" "github.com/cespare/xxhash" + "github.com/prometheus/client_golang/prometheus" ) // HashMap represents a hash map that implements Robin Hood Hashing. @@ -21,12 +24,20 @@ type HashMap struct { loadFactor int tmpKey []byte + + tracker *rhhTracker } +// NewHashMap initialises a new Hashmap with the provided options. func NewHashMap(opt Options) *HashMap { + if opt.Metrics == nil { + opt.Metrics = NewMetrics("", "", nil) + } + m := &HashMap{ capacity: pow2(opt.Capacity), // Limited to 2^64. loadFactor: opt.LoadFactor, + tracker: newRHHTracker(opt.Metrics, opt.Labels), } m.alloc() return m @@ -39,17 +50,41 @@ func (m *HashMap) Reset() { m.elems[i].reset() } m.n = 0 + m.tracker.SetSize(0) } +// Get returns the value for a key from the Hashmap, or nil if no key exists. func (m *HashMap) Get(key []byte) interface{} { + var now time.Time + var sample bool + if rand.Float64() < 0.1 { + now = time.Now() + sample = true + } + i := m.index(key) + + if sample { + m.tracker.ObserveGet(time.Since(now)) + } + if i == -1 { + m.tracker.IncGetMiss() return nil } + m.tracker.IncGetHit() return m.elems[i].value } -func (m *HashMap) Put(key []byte, val interface{}) { +func (m *HashMap) put(key []byte, val interface{}, instrument bool) { + var now time.Time + var samplePut bool + + if instrument && rand.Float64() < 0.1 { + now = time.Now() + samplePut = true + } + // Grow the map if we've run out of slots. m.n++ if m.n > m.threshold { @@ -58,11 +93,35 @@ func (m *HashMap) Put(key []byte, val interface{}) { // If the key was overwritten then decrement the size. overwritten := m.insert(HashKey(key), key, val) + if instrument && samplePut { + m.tracker.ObservePut(time.Since(now)) + } + if overwritten { m.n-- + if instrument { + m.tracker.IncPutHit() + } + } else if instrument { + m.tracker.SetSize(uint64(m.n)) + m.tracker.SetLoadFactor(float64(m.n) / float64(m.capacity) * 100.0) + m.tracker.IncPutMiss() } } +// Put stores the value at key in the Hashmap, overwriting an existing value if +// one exists. If the maximum load of the Hashmap is reached, the Hashmap will +// first resize itself. +func (m *HashMap) Put(key []byte, val interface{}) { + m.put(key, val, true) +} + +// PutQuiet is equivalent to Put, but no instrumentation code is executed. It can +// be faster when many keys are being inserted into the Hashmap. +func (m *HashMap) PutQuiet(key []byte, val interface{}) { + m.put(key, val, false) +} + func (m *HashMap) insert(hash int64, key []byte, val interface{}) (overwritten bool) { pos := hash & m.mask var dist int64 @@ -186,7 +245,7 @@ func (m *HashMap) AverageProbeCount() float64 { } sum += float64(Dist(hash, i, m.capacity)) } - return sum/float64(m.n) + 1.0 + return sum / (float64(m.n) + 1.0) } // Keys returns a list of sorted keys. @@ -203,6 +262,81 @@ func (m *HashMap) Keys() [][]byte { return a } +// PrometheusCollectors returns the metrics associated with this hashmap. +func (m *HashMap) PrometheusCollectors() []prometheus.Collector { + return m.tracker.metrics.PrometheusCollectors() +} + +type rhhTracker struct { + metrics *Metrics + labels prometheus.Labels +} + +// Labels returns a copy of the default labels used by the tracker's metrics. +// The returned map is safe for modification. +func (t *rhhTracker) Labels() prometheus.Labels { + labels := make(prometheus.Labels, len(t.labels)) + for k, v := range t.labels { + labels[k] = v + } + return labels +} + +func newRHHTracker(metrics *Metrics, defaultLabels prometheus.Labels) *rhhTracker { + return &rhhTracker{metrics: metrics, labels: defaultLabels} +} + +func (t *rhhTracker) SetLoadFactor(load float64) { + labels := t.Labels() + t.metrics.LoadFactor.With(labels).Set(load) +} + +func (t *rhhTracker) SetSize(sz uint64) { + labels := t.Labels() + t.metrics.Size.With(labels).Set(float64(sz)) +} + +func (t *rhhTracker) ObserveGet(d time.Duration) { + labels := t.Labels() + t.metrics.GetDuration.With(labels).Observe(float64(d.Nanoseconds())) + t.metrics.LastGetDuration.With(labels).Set(float64(d.Nanoseconds())) +} + +func (t *rhhTracker) ObservePut(d time.Duration) { + labels := t.Labels() + t.metrics.InsertDuration.With(labels).Observe(float64(d.Nanoseconds())) + t.metrics.LastInsertDuration.With(labels).Set(float64(d.Nanoseconds())) +} + +func (t *rhhTracker) SetGrowDuration(d time.Duration) { + labels := t.Labels() + t.metrics.LastGrowDuration.With(labels).Set(d.Seconds()) +} + +// TODO(edd): currently no safe way to calculate this concurrently. +func (t *rhhTracker) SetProbeCount(length float64) { + labels := t.Labels() + t.metrics.MeanProbeCount.With(labels).Set(length) +} + +func (t *rhhTracker) incGet(status string) { + labels := t.Labels() + labels["status"] = status + t.metrics.Gets.With(labels).Inc() +} + +func (t *rhhTracker) IncGetHit() { t.incGet("hit") } +func (t *rhhTracker) IncGetMiss() { t.incGet("miss") } + +func (t *rhhTracker) incPut(status string) { + labels := t.Labels() + labels["status"] = status + t.metrics.Puts.With(labels).Inc() +} + +func (t *rhhTracker) IncPutHit() { t.incPut("hit") } +func (t *rhhTracker) IncPutMiss() { t.incPut("miss") } + type hashElem struct { key []byte value interface{} @@ -225,6 +359,8 @@ func (e *hashElem) setKey(v []byte) { type Options struct { Capacity int64 LoadFactor int + Metrics *Metrics + Labels prometheus.Labels } // DefaultOptions represents a default set of options to pass to NewHashMap(). diff --git a/storage/engine.go b/storage/engine.go index 379af0ba81..535a177c17 100644 --- a/storage/engine.go +++ b/storage/engine.go @@ -39,6 +39,8 @@ type Engine struct { wal *tsm1.WAL retentionEnforcer *retentionEnforcer + defaultMetricLabels prometheus.Labels + // Tracks all goroutines started by the Engine. wg sync.WaitGroup @@ -61,6 +63,7 @@ func WithTSMFilenameFormatter(fn tsm1.FormatFileNameFunc) Option { func WithEngineID(id int) Option { return func(e *Engine) { e.engineID = &id + e.defaultMetricLabels["engine_id"] = fmt.Sprint(*e.engineID) } } @@ -69,6 +72,7 @@ func WithEngineID(id int) Option { func WithNodeID(id int) Option { return func(e *Engine) { e.nodeID = &id + e.defaultMetricLabels["node_id"] = fmt.Sprint(*e.nodeID) } } @@ -78,17 +82,6 @@ func WithNodeID(id int) Option { func WithRetentionEnforcer(finder BucketFinder) Option { return func(e *Engine) { e.retentionEnforcer = newRetentionEnforcer(e, finder) - - if e.engineID != nil { - e.retentionEnforcer.defaultMetricLabels["engine_id"] = fmt.Sprint(*e.engineID) - } - - if e.nodeID != nil { - e.retentionEnforcer.defaultMetricLabels["node_id"] = fmt.Sprint(*e.nodeID) - } - - // As new labels may have been set, set the new metrics on the enforcer. - e.retentionEnforcer.retentionMetrics = newRetentionMetrics(e.retentionEnforcer.defaultMetricLabels) } } @@ -110,9 +103,11 @@ func WithCompactionPlanner(planner tsm1.CompactionPlanner) Option { // TSM engine. func NewEngine(path string, c Config, options ...Option) *Engine { e := &Engine{ - config: c, - path: path, - logger: zap.NewNop(), + config: c, + path: path, + sfile: tsdb.NewSeriesFile(c.GetSeriesFilePath(path)), + defaultMetricLabels: prometheus.Labels{}, + logger: zap.NewNop(), } // Initialize series file. @@ -140,6 +135,11 @@ func NewEngine(path string, c Config, options ...Option) *Engine { for _, option := range options { option(e) } + // Set default metrics labels. + e.engine.SetDefaultMetricLabels(e.defaultMetricLabels) + e.sfile.SetDefaultMetricLabels(e.defaultMetricLabels) + e.index.SetDefaultMetricLabels(e.defaultMetricLabels) + return e } @@ -151,7 +151,7 @@ func (e *Engine) WithLogger(log *zap.Logger) { } if e.engineID != nil { - fields = append(fields, zap.Int("engine_id", *e.nodeID)) + fields = append(fields, zap.Int("engine_id", *e.engineID)) } fields = append(fields, zap.String("service", "storage-engine")) @@ -166,9 +166,9 @@ func (e *Engine) WithLogger(log *zap.Logger) { // the engine and its components. func (e *Engine) PrometheusCollectors() []prometheus.Collector { var metrics []prometheus.Collector - // TODO(edd): Get prom metrics for TSM. - // TODO(edd): Get prom metrics for index. - // TODO(edd): Get prom metrics for series file. + metrics = append(metrics, tsdb.PrometheusCollectors()...) + metrics = append(metrics, tsi1.PrometheusCollectors()...) + metrics = append(metrics, tsm1.PrometheusCollectors()...) metrics = append(metrics, e.retentionEnforcer.PrometheusCollectors()...) return metrics } @@ -197,6 +197,7 @@ func (e *Engine) Open() error { e.engine.SetCompactionsEnabled(true) // TODO(edd):is this needed? e.closing = make(chan struct{}) + // TODO(edd) background tasks will be run in priority order via a scheduler. // For now we will just run on an interval as we only have the retention // policy enforcer. @@ -221,6 +222,11 @@ func (e *Engine) runRetentionEnforcer() { return } + if e.retentionEnforcer != nil { + // Set default metric labels on retention enforcer. + e.retentionEnforcer.metrics = newRetentionMetrics(e.defaultMetricLabels) + } + l := e.logger.With(zap.String("component", "retention_enforcer"), logger.DurationLiteral("check_interval", interval)) l.Info("Starting") diff --git a/storage/metrics.go b/storage/metrics.go index 6bbe690a0a..277386ab40 100644 --- a/storage/metrics.go +++ b/storage/metrics.go @@ -1,6 +1,10 @@ package storage -import "github.com/prometheus/client_golang/prometheus" +import ( + "sort" + + "github.com/prometheus/client_golang/prometheus" +) // namespace is the leading part of all published metrics for the Storage service. const namespace = "storage" @@ -9,6 +13,7 @@ const retentionSubsystem = "retention" // sub-system associated with metrics for // retentionMetrics is a set of metrics concerned with tracking data about retention policies. type retentionMetrics struct { + labels prometheus.Labels Checks *prometheus.CounterVec CheckDuration *prometheus.HistogramVec Unprocessable *prometheus.CounterVec @@ -20,8 +25,11 @@ func newRetentionMetrics(labels prometheus.Labels) *retentionMetrics { for k := range labels { names = append(names, k) } + names = append(names, "status") // All metrics include status + sort.Strings(names) return &retentionMetrics{ + labels: labels, Checks: prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: retentionSubsystem, @@ -54,6 +62,15 @@ func newRetentionMetrics(labels prometheus.Labels) *retentionMetrics { } } +// Labels returns a copy of labels for use with retention metrics. +func (m *retentionMetrics) Labels() prometheus.Labels { + l := make(map[string]string, len(m.labels)) + for k, v := range m.labels { + l[k] = v + } + return l +} + // PrometheusCollectors satisfies the prom.PrometheusCollector interface. func (rm *retentionMetrics) PrometheusCollectors() []prometheus.Collector { return []prometheus.Collector{ diff --git a/storage/retention.go b/storage/retention.go index 6bae09b3b4..8b11e855b1 100644 --- a/storage/retention.go +++ b/storage/retention.go @@ -48,8 +48,7 @@ type retentionEnforcer struct { logger *zap.Logger - retentionMetrics *retentionMetrics - defaultMetricLabels prometheus.Labels // N.B this must not be mutated after Open is called. + metrics *retentionMetrics } // newRetentionEnforcer returns a new enforcer that ensures expired data is @@ -57,24 +56,14 @@ type retentionEnforcer struct { // disabling the service. func newRetentionEnforcer(engine Deleter, bucketService BucketFinder) *retentionEnforcer { s := &retentionEnforcer{ - Engine: engine, - BucketService: bucketService, - logger: zap.NewNop(), - defaultMetricLabels: prometheus.Labels{"status": ""}, + Engine: engine, + BucketService: bucketService, + logger: zap.NewNop(), } - s.retentionMetrics = newRetentionMetrics(s.defaultMetricLabels) + s.metrics = newRetentionMetrics(nil) return s } -// metricLabels returns a new copy of the default metric labels. -func (s *retentionEnforcer) metricLabels() prometheus.Labels { - labels := make(map[string]string, len(s.defaultMetricLabels)) - for k, v := range s.defaultMetricLabels { - labels[k] = v - } - return labels -} - // WithLogger sets the logger l on the service. It must be called before Open. func (s *retentionEnforcer) WithLogger(l *zap.Logger) { if s == nil { @@ -96,15 +85,15 @@ func (s *retentionEnforcer) run() { } now := time.Now().UTC() - labels := s.metricLabels() + labels := s.metrics.Labels() labels["status"] = "ok" if err := s.expireData(rpByBucketID, now); err != nil { log.Error("Deletion not successful", zap.Error(err)) labels["status"] = "error" } - s.retentionMetrics.CheckDuration.With(labels).Observe(time.Since(now).Seconds()) - s.retentionMetrics.Checks.With(labels).Inc() + s.metrics.CheckDuration.With(labels).Observe(time.Since(now).Seconds()) + s.metrics.Checks.With(labels).Inc() } // expireData runs a delete operation on the storage engine. @@ -162,21 +151,21 @@ func (s *retentionEnforcer) expireData(rpByBucketID map[platform.ID]time.Duratio } defer func() { - if s.retentionMetrics == nil { + if s.metrics == nil { return } - labels := s.metricLabels() + labels := s.metrics.Labels() labels["status"] = "bad_measurement" - s.retentionMetrics.Unprocessable.With(labels).Add(float64(len(badMSketch))) + s.metrics.Unprocessable.With(labels).Add(float64(len(badMSketch))) labels["status"] = "missing_bucket" - s.retentionMetrics.Unprocessable.With(labels).Add(float64(len(missingBSketch))) + s.metrics.Unprocessable.With(labels).Add(float64(len(missingBSketch))) labels["status"] = "ok" - s.retentionMetrics.Series.With(labels).Add(float64(atomic.LoadUint64(&seriesDeleted))) + s.metrics.Series.With(labels).Add(float64(atomic.LoadUint64(&seriesDeleted))) labels["status"] = "skipped" - s.retentionMetrics.Series.With(labels).Add(float64(atomic.LoadUint64(&seriesSkipped))) + s.metrics.Series.With(labels).Add(float64(atomic.LoadUint64(&seriesSkipped))) }() return s.Engine.DeleteSeriesRangeWithPredicate(newSeriesIteratorAdapter(cur), fn) @@ -200,7 +189,7 @@ func (s *retentionEnforcer) getRetentionPeriodPerBucket() (map[platform.ID]time. // PrometheusCollectors satisfies the prom.PrometheusCollector interface. func (s *retentionEnforcer) PrometheusCollectors() []prometheus.Collector { - return s.retentionMetrics.PrometheusCollectors() + return s.metrics.PrometheusCollectors() } // A BucketService is an platform.BucketService that the retentionEnforcer can open, diff --git a/tsdb/metrics.go b/tsdb/metrics.go new file mode 100644 index 0000000000..47429ab94b --- /dev/null +++ b/tsdb/metrics.go @@ -0,0 +1,127 @@ +package tsdb + +import ( + "sort" + "sync" + + "github.com/influxdata/platform/pkg/rhh" + + "github.com/prometheus/client_golang/prometheus" +) + +// The following package variables act as singletons, to be shared by all +// storage.Engine instantiations. This allows multiple Series Files to be +// monitored within the same process. +var ( + sms *seriesFileMetrics // main metrics + ims *rhh.Metrics // hashmap specific metrics + mmu sync.RWMutex +) + +// PrometheusCollectors returns all the metrics associated with the tsdb package. +func PrometheusCollectors() []prometheus.Collector { + mmu.RLock() + defer mmu.RUnlock() + + var collectors []prometheus.Collector + if sms != nil { + collectors = append(collectors, sms.PrometheusCollectors()...) + } + + if ims != nil { + collectors = append(collectors, ims.PrometheusCollectors()...) + } + return collectors +} + +// namespace is the leading part of all published metrics for the Storage service. +const namespace = "storage" + +const seriesFileSubsystem = "series_file" // sub-system associated with metrics for the Series File. + +type seriesFileMetrics struct { + SeriesCreated *prometheus.CounterVec // Number of series created in Series File. + Series *prometheus.GaugeVec // Number of series. + DiskSize *prometheus.GaugeVec // Size occupied on disk. + Segments *prometheus.GaugeVec // Number of segment files. + + CompactionsActive *prometheus.GaugeVec // Number of active compactions. + CompactionDuration *prometheus.HistogramVec // Duration of compactions. + // The following metrics include a ``"status" = {ok, error}` label + Compactions *prometheus.CounterVec // Total number of compactions. +} + +// newSeriesFileMetrics initialises the prometheus metrics for tracking the Series File. +func newSeriesFileMetrics(labels prometheus.Labels) *seriesFileMetrics { + names := []string{"series_file_partition"} // All metrics have this label. + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + totalCompactions := append(append([]string(nil), names...), "status") + sort.Strings(totalCompactions) + + durationCompaction := append(append([]string(nil), names...), "component") + sort.Strings(durationCompaction) + + return &seriesFileMetrics{ + SeriesCreated: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: seriesFileSubsystem, + Name: "series_created", + Help: "Number of series created in Series File.", + }, names), + Series: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: seriesFileSubsystem, + Name: "series_total", + Help: "Number of series in Series File.", + }, names), + DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: seriesFileSubsystem, + Name: "disk_bytes", + Help: "Number of bytes Series File is using on disk.", + }, names), + Segments: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: seriesFileSubsystem, + Name: "segments_total", + Help: "Number of segment files in Series File.", + }, names), + CompactionsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: seriesFileSubsystem, + Name: "index_compactions_active", + Help: "Number of active index compactions.", + }, durationCompaction), + CompactionDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: seriesFileSubsystem, + Name: "index_compactions_duration_seconds", + Help: "Time taken for a successful compaction of index.", + // 30 buckets spaced exponentially between 5s and ~53 minutes. + Buckets: prometheus.ExponentialBuckets(5.0, 1.25, 30), + }, durationCompaction), + Compactions: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: seriesFileSubsystem, + Name: "compactions_total", + Help: "Number of compactions.", + }, totalCompactions), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *seriesFileMetrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.SeriesCreated, + m.Series, + m.DiskSize, + m.Segments, + m.CompactionsActive, + m.CompactionDuration, + m.Compactions, + } +} diff --git a/tsdb/metrics_test.go b/tsdb/metrics_test.go new file mode 100644 index 0000000000..8f84099466 --- /dev/null +++ b/tsdb/metrics_test.go @@ -0,0 +1,132 @@ +package tsdb + +import ( + "testing" + + "github.com/influxdata/platform/kit/prom/promtest" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +func TestMetrics_SeriesPartition(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := newSeriesFileMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newSeriesPartitionTracker(metrics, prometheus.Labels{"series_file_partition": "0", "engine_id": "0", "node_id": "0"}) + t2 := newSeriesPartitionTracker(metrics, prometheus.Labels{"series_file_partition": "0", "engine_id": "1", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + base := namespace + "_" + seriesFileSubsystem + "_" + + // All the metric names + gauges := []string{ + base + "series_total", + base + "disk_bytes", + base + "segments_total", + base + "index_compactions_active", + } + + counters := []string{ + base + "series_created", + base + "compactions_total", + } + + histograms := []string{ + base + "index_compactions_duration_seconds", + } + + // Generate some measurements. + for i, tracker := range []*seriesPartitionTracker{t1, t2} { + tracker.SetSeries(uint64(i + len(gauges[0]))) + tracker.SetDiskSize(uint64(i + len(gauges[1]))) + tracker.SetSegments(uint64(i + len(gauges[2]))) + + labels := tracker.Labels() + labels["component"] = "index" + tracker.metrics.CompactionsActive.With(labels).Add(float64(i + len(gauges[3]))) + + tracker.AddSeriesCreated(uint64(i + len(counters[0]))) + labels = tracker.Labels() + labels["status"] = "ok" + tracker.metrics.Compactions.With(labels).Add(float64(i + len(counters[1]))) + + labels = tracker.Labels() + labels["component"] = "index" + tracker.metrics.CompactionDuration.With(labels).Observe(float64(i + len(histograms[0]))) + } + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + // The label variants for the two caches. + labelVariants := []prometheus.Labels{ + prometheus.Labels{"engine_id": "0", "node_id": "0"}, + prometheus.Labels{"engine_id": "1", "node_id": "0"}, + } + + for i, labels := range labelVariants { + labels["series_file_partition"] = "0" + var metric *dto.Metric + + for _, name := range gauges { + exp := float64(i + len(name)) + + if name == base+"index_compactions_active" { + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["component"] = "index" + metric = promtest.MustFindMetric(t, mfs, name, l) + } else { + metric = promtest.MustFindMetric(t, mfs, name, labels) + } + + if got := metric.GetGauge().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range counters { + exp := float64(i + len(name)) + + if name == base+"compactions_total" { + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["status"] = "ok" + + metric = promtest.MustFindMetric(t, mfs, name, l) + } else { + metric = promtest.MustFindMetric(t, mfs, name, labels) + } + + if got := metric.GetCounter().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range histograms { + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["component"] = "index" + + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, l) + if got := metric.GetHistogram().GetSampleSum(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + } +} diff --git a/tsdb/series_file.go b/tsdb/series_file.go index eeb384128f..7ed8703935 100644 --- a/tsdb/series_file.go +++ b/tsdb/series_file.go @@ -10,9 +10,13 @@ import ( "sort" "sync" + "github.com/influxdata/platform/logger" + "github.com/influxdata/platform/pkg/rhh" + "github.com/cespare/xxhash" "github.com/influxdata/platform/models" "github.com/influxdata/platform/pkg/binaryutil" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" "golang.org/x/sync/errgroup" ) @@ -35,6 +39,12 @@ type SeriesFile struct { path string partitions []*SeriesPartition + // N.B we have many partitions, but they must share the same metrics, so the + // metrics are managed in a single shared package variable and + // each partition decorates the same metric measurements with different + // partition id label values. + defaultMetricLabels prometheus.Labels + refs sync.RWMutex // RWMutex to track references to the SeriesFile that are in use. Logger *zap.Logger @@ -43,7 +53,9 @@ type SeriesFile struct { // NewSeriesFile returns a new instance of SeriesFile. func NewSeriesFile(path string) *SeriesFile { return &SeriesFile{ - path: path, + path: path, + // partitionMetrics: newSeriesFileMetrics(nil), + // indexMetrics: rhh.NewMetrics(namespace, seriesFileSubsystem+"_index", nil), Logger: zap.NewNop(), } } @@ -53,8 +65,20 @@ func (f *SeriesFile) WithLogger(log *zap.Logger) { f.Logger = log.With(zap.String("service", "series-file")) } +// SetDefaultMetricLabels sets the default labels for metrics on the Series File. +// It must be called before the SeriesFile is opened. +func (f *SeriesFile) SetDefaultMetricLabels(labels prometheus.Labels) { + f.defaultMetricLabels = make(prometheus.Labels, len(labels)) + for k, v := range labels { + f.defaultMetricLabels[k] = v + } +} + // Open memory maps the data file at the file's path. func (f *SeriesFile) Open() error { + _, logEnd := logger.NewOperation(f.Logger, "Opening Series File", "series_file_open", zap.String("path", f.path)) + defer logEnd() + // Wait for all references to be released and prevent new ones from being acquired. f.refs.Lock() defer f.refs.Unlock() @@ -64,12 +88,44 @@ func (f *SeriesFile) Open() error { return err } + // Initialise metrics for trackers. + mmu.Lock() + if sms == nil { + sms = newSeriesFileMetrics(f.defaultMetricLabels) + } + if ims == nil { + // Make a copy of the default labels so that another label can be provided. + labels := make(prometheus.Labels, len(f.defaultMetricLabels)) + for k, v := range f.defaultMetricLabels { + labels[k] = v + } + labels["series_file_partition"] = "" // All partitions have this label. + ims = rhh.NewMetrics(namespace, seriesFileSubsystem+"_index", labels) + } + mmu.Unlock() + // Open partitions. f.partitions = make([]*SeriesPartition, 0, SeriesFilePartitionN) for i := 0; i < SeriesFilePartitionN; i++ { // TODO(edd): These partition initialisation should be moved up to NewSeriesFile. p := NewSeriesPartition(i, f.SeriesPartitionPath(i)) p.Logger = f.Logger.With(zap.Int("partition", p.ID())) + + // For each series file index, rhh trackers are used to track the RHH Hashmap. + // Each of the trackers needs to be given slightly different default + // labels to ensure the correct partition_ids are set as labels. + labels := make(prometheus.Labels, len(f.defaultMetricLabels)) + for k, v := range f.defaultMetricLabels { + labels[k] = v + } + labels["series_file_partition"] = fmt.Sprint(p.ID()) + + p.index.rhhMetrics = ims + p.index.rhhLabels = labels + + // Set the metric trackers on the partition with any injected default labels. + p.tracker = newSeriesPartitionTracker(sms, labels) + if err := p.Open(); err != nil { f.Close() return err diff --git a/tsdb/series_file_test.go b/tsdb/series_file_test.go index 8dff5df24e..14fa6620d4 100644 --- a/tsdb/series_file_test.go +++ b/tsdb/series_file_test.go @@ -119,7 +119,7 @@ func TestSeriesFileCompactor(t *testing.T) { // Compact in-place for each partition. for _, p := range sfile.Partitions() { compactor := tsdb.NewSeriesPartitionCompactor() - if err := compactor.Compact(p); err != nil { + if _, err := compactor.Compact(p); err != nil { t.Fatal(err) } } @@ -267,7 +267,7 @@ func (f *SeriesFile) Reopen() error { // ForceCompact executes an immediate compaction across all partitions. func (f *SeriesFile) ForceCompact() error { for _, p := range f.Partitions() { - if err := tsdb.NewSeriesPartitionCompactor().Compact(p); err != nil { + if _, err := tsdb.NewSeriesPartitionCompactor().Compact(p); err != nil { return err } } diff --git a/tsdb/series_index.go b/tsdb/series_index.go index 1dab0ac095..170d0cc01c 100644 --- a/tsdb/series_index.go +++ b/tsdb/series_index.go @@ -10,6 +10,7 @@ import ( "github.com/influxdata/platform/models" "github.com/influxdata/platform/pkg/mmap" "github.com/influxdata/platform/pkg/rhh" + "github.com/prometheus/client_golang/prometheus" ) const ( @@ -43,6 +44,11 @@ type SeriesIndex struct { maxSeriesID SeriesID maxOffset int64 + // metrics stores a shard instance of some Prometheus metrics. metrics + // must be set before Open is called. + rhhMetrics *rhh.Metrics + rhhLabels prometheus.Labels + data []byte // mmap data keyIDData []byte // key/id mmap data idOffsetData []byte // id/offset mmap data @@ -86,7 +92,11 @@ func (idx *SeriesIndex) Open() (err error) { return err } - idx.keyIDMap = rhh.NewHashMap(rhh.DefaultOptions) + options := rhh.DefaultOptions + options.Metrics = idx.rhhMetrics + options.Labels = idx.rhhLabels + + idx.keyIDMap = rhh.NewHashMap(options) idx.idOffsetMap = make(map[SeriesID]int64) idx.tombstones = make(map[SeriesID]struct{}) return nil @@ -109,7 +119,11 @@ func (idx *SeriesIndex) Close() (err error) { // Recover rebuilds the in-memory index for all new entries. func (idx *SeriesIndex) Recover(segments []*SeriesSegment) error { // Allocate new in-memory maps. - idx.keyIDMap = rhh.NewHashMap(rhh.DefaultOptions) + options := rhh.DefaultOptions + options.Metrics = idx.rhhMetrics + options.Labels = idx.rhhLabels + + idx.keyIDMap = rhh.NewHashMap(options) idx.idOffsetMap = make(map[SeriesID]int64) idx.tombstones = make(map[SeriesID]struct{}) @@ -144,6 +158,16 @@ func (idx *SeriesIndex) OnDiskCount() uint64 { return idx.count } // InMemCount returns the number of series in the in-memory index. func (idx *SeriesIndex) InMemCount() uint64 { return uint64(len(idx.idOffsetMap)) } +// OnDiskSize returns the on-disk size of the index in bytes. +func (idx *SeriesIndex) OnDiskSize() uint64 { return uint64(len(idx.data)) } + +// InMemSize returns the heap size of the index in bytes. The returned value is +// an estimation and does not include include all allocated memory. +func (idx *SeriesIndex) InMemSize() uint64 { + n := len(idx.idOffsetMap) + return uint64(2*8*n) + uint64(len(idx.tombstones)*8) +} + func (idx *SeriesIndex) Insert(key []byte, id SeriesIDTyped, offset int64) { idx.execEntry(SeriesEntryInsertFlag, id, offset, key) } @@ -166,7 +190,7 @@ func (idx *SeriesIndex) execEntry(flag uint8, id SeriesIDTyped, offset int64, ke untypedID := id.SeriesID() switch flag { case SeriesEntryInsertFlag: - idx.keyIDMap.Put(key, id) + idx.keyIDMap.PutQuiet(key, id) idx.idOffsetMap[untypedID] = offset if untypedID.Greater(idx.maxSeriesID) { diff --git a/tsdb/series_partition.go b/tsdb/series_partition.go index abde072f85..4ef1178be0 100644 --- a/tsdb/series_partition.go +++ b/tsdb/series_partition.go @@ -8,10 +8,12 @@ import ( "os" "path/filepath" "sync" + "time" "github.com/influxdata/platform/logger" "github.com/influxdata/platform/models" "github.com/influxdata/platform/pkg/rhh" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -44,19 +46,23 @@ type SeriesPartition struct { CompactThreshold int - Logger *zap.Logger + tracker *seriesPartitionTracker + Logger *zap.Logger } // NewSeriesPartition returns a new instance of SeriesPartition. func NewSeriesPartition(id int, path string) *SeriesPartition { - return &SeriesPartition{ + p := &SeriesPartition{ id: id, path: path, closing: make(chan struct{}), CompactThreshold: DefaultSeriesPartitionCompactThreshold, + tracker: newSeriesPartitionTracker(newSeriesFileMetrics(nil), nil), Logger: zap.NewNop(), seq: uint64(id) + 1, } + p.index = NewSeriesIndex(p.IndexPath()) + return p } // Open memory maps the data file at the partition's path. @@ -75,25 +81,24 @@ func (p *SeriesPartition) Open() error { if err := p.openSegments(); err != nil { return err } - // Init last segment for writes. if err := p.activeSegment().InitForWrite(); err != nil { return err } - p.index = NewSeriesIndex(p.IndexPath()) if err := p.index.Open(); err != nil { return err } else if p.index.Recover(p.segments); err != nil { return err } - return nil }(); err != nil { p.Close() return err } + p.tracker.SetSeries(p.index.Count()) // Set series count metric. + p.tracker.SetDiskSize(p.DiskSize()) // Set on-disk size metric. return nil } @@ -134,6 +139,7 @@ func (p *SeriesPartition) openSegments() error { p.segments = append(p.segments, segment) } + p.tracker.SetSegments(uint64(len(p.segments))) return nil } @@ -170,7 +176,7 @@ func (p *SeriesPartition) ID() int { return p.id } // Path returns the path to the partition. func (p *SeriesPartition) Path() string { return p.path } -// Path returns the path to the series index. +// IndexPath returns the path to the series index. func (p *SeriesPartition) IndexPath() string { return filepath.Join(p.path, "index") } // CreateSeriesListIfNotExists creates a list of series in bulk if they don't exist. @@ -283,6 +289,8 @@ func (p *SeriesPartition) CreateSeriesListIfNotExists(collection *SeriesCollecti for _, keyRange := range newKeyRanges { p.index.Insert(p.seriesKeyByOffset(keyRange.offset), keyRange.id, keyRange.offset) } + p.tracker.AddSeriesCreated(uint64(len(newKeyRanges))) // Track new series in metric. + p.tracker.AddSeries(uint64(len(newKeyRanges))) // Check if we've crossed the compaction threshold. if p.compactionsEnabled() && !p.compacting && p.CompactThreshold != 0 && p.index.InMemCount() >= uint64(p.CompactThreshold) { @@ -290,13 +298,18 @@ func (p *SeriesPartition) CreateSeriesListIfNotExists(collection *SeriesCollecti log, logEnd := logger.NewOperation(p.Logger, "Series partition compaction", "series_partition_compaction", zap.String("path", p.path)) p.wg.Add(1) + p.tracker.IncCompactionsActive() go func() { defer p.wg.Done() compactor := NewSeriesPartitionCompactor() compactor.cancel = p.closing - if err := compactor.Compact(p); err != nil { + duration, err := compactor.Compact(p) + if err != nil { + p.tracker.IncCompactionErr() log.Error("series partition compaction failed", zap.Error(err)) + } else { + p.tracker.IncCompactionOK(duration) } logEnd() @@ -305,6 +318,10 @@ func (p *SeriesPartition) CreateSeriesListIfNotExists(collection *SeriesCollecti p.mu.Lock() p.compacting = false p.mu.Unlock() + p.tracker.DecCompactionsActive() + + // Disk size may have changed due to compaction. + p.tracker.SetDiskSize(p.DiskSize()) }() } @@ -348,7 +365,7 @@ func (p *SeriesPartition) DeleteSeriesID(id SeriesID) error { // Mark tombstone in memory. p.index.Delete(id) - + p.tracker.SubSeries(1) return nil } @@ -417,6 +434,21 @@ func (p *SeriesPartition) SeriesCount() uint64 { return n } +// DiskSize returns the number of bytes taken up on disk by the partition. +func (p *SeriesPartition) DiskSize() uint64 { + p.mu.RLock() + defer p.mu.RUnlock() + return p.diskSize() +} + +func (p *SeriesPartition) diskSize() uint64 { + totalSize := p.index.OnDiskSize() + for _, segment := range p.segments { + totalSize += uint64(len(segment.Data())) + } + return totalSize +} + func (p *SeriesPartition) DisableCompactions() { p.mu.Lock() defer p.mu.Unlock() @@ -503,7 +535,8 @@ func (p *SeriesPartition) createSegment() (*SeriesSegment, error) { if err := segment.InitForWrite(); err != nil { return nil, err } - + p.tracker.SetSegments(uint64(len(p.segments))) + p.tracker.SetDiskSize(p.diskSize()) // Disk size will change with new segment. return segment, nil } @@ -525,6 +558,101 @@ func (p *SeriesPartition) seriesKeyByOffset(offset int64) []byte { return nil } +type seriesPartitionTracker struct { + metrics *seriesFileMetrics + labels prometheus.Labels +} + +func newSeriesPartitionTracker(metrics *seriesFileMetrics, defaultLabels prometheus.Labels) *seriesPartitionTracker { + return &seriesPartitionTracker{ + metrics: metrics, + labels: defaultLabels, + } +} + +// Labels returns a copy of labels for use with Series File metrics. +func (t *seriesPartitionTracker) Labels() prometheus.Labels { + l := make(map[string]string, len(t.labels)) + for k, v := range t.labels { + l[k] = v + } + return l +} + +// AddSeriesCreated increases the number of series created in the partition by n. +func (t *seriesPartitionTracker) AddSeriesCreated(n uint64) { + labels := t.Labels() + t.metrics.SeriesCreated.With(labels).Add(float64(n)) +} + +// SetSeries sets the number of series in the partition. +func (t *seriesPartitionTracker) SetSeries(n uint64) { + labels := t.Labels() + t.metrics.Series.With(labels).Set(float64(n)) +} + +// AddSeries increases the number of series in the partition by n. +func (t *seriesPartitionTracker) AddSeries(n uint64) { + labels := t.Labels() + t.metrics.Series.With(labels).Add(float64(n)) +} + +// SubSeries decreases the number of series in the partition by n. +func (t *seriesPartitionTracker) SubSeries(n uint64) { + labels := t.Labels() + t.metrics.Series.With(labels).Sub(float64(n)) +} + +// SetDiskSize sets the number of bytes used by files for in partition. +func (t *seriesPartitionTracker) SetDiskSize(sz uint64) { + labels := t.Labels() + t.metrics.DiskSize.With(labels).Set(float64(sz)) +} + +// SetSegments sets the number of segments files for the partition. +func (t *seriesPartitionTracker) SetSegments(n uint64) { + labels := t.Labels() + t.metrics.Segments.With(labels).Set(float64(n)) +} + +// IncCompactionsActive increments the number of active compactions for the +// components of a partition (index and segments). +func (t *seriesPartitionTracker) IncCompactionsActive() { + labels := t.Labels() + labels["component"] = "index" // TODO(edd): when we add segment compactions we will add a new label value. + t.metrics.CompactionsActive.With(labels).Inc() +} + +// DecCompactionsActive decrements the number of active compactions for the +// components of a partition (index and segments). +func (t *seriesPartitionTracker) DecCompactionsActive() { + labels := t.Labels() + labels["component"] = "index" // TODO(edd): when we add segment compactions we will add a new label value. + t.metrics.CompactionsActive.With(labels).Dec() +} + +// incCompactions increments the number of compactions for the partition. +// Callers should use IncCompactionOK and IncCompactionErr. +func (t *seriesPartitionTracker) incCompactions(status string, duration time.Duration) { + if duration > 0 { + labels := t.Labels() + labels["component"] = "index" + t.metrics.CompactionDuration.With(labels).Observe(duration.Seconds()) + } + + labels := t.Labels() + labels["status"] = status + t.metrics.Compactions.With(labels).Inc() +} + +// IncCompactionOK increments the number of successful compactions for the partition. +func (t *seriesPartitionTracker) IncCompactionOK(duration time.Duration) { + t.incCompactions("ok", duration) +} + +// IncCompactionErr increments the number of failed compactions for the partition. +func (t *seriesPartitionTracker) IncCompactionErr() { t.incCompactions("error", 0) } + // SeriesPartitionCompactor represents an object reindexes a series partition and optionally compacts segments. type SeriesPartitionCompactor struct { cancel <-chan struct{} @@ -536,7 +664,7 @@ func NewSeriesPartitionCompactor() *SeriesPartitionCompactor { } // Compact rebuilds the series partition index. -func (c *SeriesPartitionCompactor) Compact(p *SeriesPartition) error { +func (c *SeriesPartitionCompactor) Compact(p *SeriesPartition) (time.Duration, error) { // Snapshot the partitions and index so we can check tombstones and replay at the end under lock. p.mu.RLock() segments := CloneSeriesSegments(p.segments) @@ -544,11 +672,14 @@ func (c *SeriesPartitionCompactor) Compact(p *SeriesPartition) error { seriesN := p.index.Count() p.mu.RUnlock() + now := time.Now() + // Compact index to a temporary location. indexPath := index.path + ".compacting" if err := c.compactIndexTo(index, seriesN, segments, indexPath); err != nil { - return err + return 0, err } + duration := time.Since(now) // Swap compacted index under lock & replay since compaction. if err := func() error { @@ -570,10 +701,10 @@ func (c *SeriesPartitionCompactor) Compact(p *SeriesPartition) error { } return nil }(); err != nil { - return err + return 0, err } - return nil + return duration, nil } func (c *SeriesPartitionCompactor) compactIndexTo(index *SeriesIndex, seriesN uint64, segments []*SeriesSegment, path string) error { diff --git a/tsdb/tsi1/cache.go b/tsdb/tsi1/cache.go index 5a2cb88401..f995cac9e5 100644 --- a/tsdb/tsi1/cache.go +++ b/tsdb/tsi1/cache.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/influxdata/platform/tsdb" + "github.com/prometheus/client_golang/prometheus" ) // TagValueSeriesIDCache is an LRU cache for series id sets associated with @@ -24,6 +25,7 @@ type TagValueSeriesIDCache struct { cache map[string]map[string]map[string]*list.Element evictor *list.List + tracker *cacheTracker capacity int } @@ -32,6 +34,7 @@ func NewTagValueSeriesIDCache(c int) *TagValueSeriesIDCache { return &TagValueSeriesIDCache{ cache: map[string]map[string]map[string]*list.Element{}, evictor: list.New(), + tracker: newCacheTracker(newCacheMetrics(nil), nil), capacity: c, } } @@ -48,11 +51,13 @@ func (c *TagValueSeriesIDCache) get(name, key, value []byte) *tsdb.SeriesIDSet { if mmap, ok := c.cache[string(name)]; ok { if tkmap, ok := mmap[string(key)]; ok { if ele, ok := tkmap[string(value)]; ok { + c.tracker.IncGetHit() c.evictor.MoveToFront(ele) // This now becomes most recently used. return ele.Value.(*seriesIDCacheElement).SeriesIDSet } } } + c.tracker.IncGetMiss() return nil } @@ -100,6 +105,7 @@ func (c *TagValueSeriesIDCache) Put(name, key, value []byte, ss *tsdb.SeriesIDSe // Check under the write lock if the relevant item is now in the cache. if c.exists(name, key, value) { c.Unlock() + c.tracker.IncPutHit() return } defer c.Unlock() @@ -136,6 +142,7 @@ func (c *TagValueSeriesIDCache) Put(name, key, value []byte, ss *tsdb.SeriesIDSe EVICT: c.checkEviction() + c.tracker.IncPutMiss() } // Delete removes x from the tuple {name, key, value} if it exists. @@ -153,16 +160,21 @@ func (c *TagValueSeriesIDCache) delete(name, key, value []byte, x tsdb.SeriesID) if ele, ok := tkmap[string(value)]; ok { if ss := ele.Value.(*seriesIDCacheElement).SeriesIDSet; ss != nil { ele.Value.(*seriesIDCacheElement).SeriesIDSet.Remove(x) + c.tracker.IncDeletesHit() + return } } } } + c.tracker.IncDeletesMiss() } // checkEviction checks if the cache is too big, and evicts the least recently used // item if it is. func (c *TagValueSeriesIDCache) checkEviction() { - if c.evictor.Len() <= c.capacity { + l := c.evictor.Len() + c.tracker.SetSize(uint64(l)) + if l <= c.capacity { return } @@ -184,6 +196,13 @@ func (c *TagValueSeriesIDCache) checkEviction() { if len(c.cache[string(name)]) == 0 { delete(c.cache, string(name)) } + c.tracker.IncEvictions() +} + +func (c *TagValueSeriesIDCache) PrometheusCollectors() []prometheus.Collector { + var collectors []prometheus.Collector + collectors = append(collectors, c.tracker.metrics.PrometheusCollectors()...) + return collectors } // seriesIDCacheElement is an item stored within a cache. @@ -193,3 +212,58 @@ type seriesIDCacheElement struct { value []byte SeriesIDSet *tsdb.SeriesIDSet } + +type cacheTracker struct { + metrics *cacheMetrics + labels prometheus.Labels +} + +func newCacheTracker(metrics *cacheMetrics, defaultLabels prometheus.Labels) *cacheTracker { + return &cacheTracker{metrics: metrics, labels: defaultLabels} +} + +// Labels returns a copy of labels for use with index cache metrics. +func (t *cacheTracker) Labels() prometheus.Labels { + l := make(map[string]string, len(t.labels)) + for k, v := range t.labels { + l[k] = v + } + return l +} + +func (t *cacheTracker) SetSize(sz uint64) { + labels := t.Labels() + t.metrics.Size.With(labels).Set(float64(sz)) +} + +func (t *cacheTracker) incGet(status string) { + labels := t.Labels() + labels["status"] = status + t.metrics.Gets.With(labels).Inc() +} + +func (t *cacheTracker) IncGetHit() { t.incGet("hit") } +func (t *cacheTracker) IncGetMiss() { t.incGet("miss") } + +func (t *cacheTracker) incPut(status string) { + labels := t.Labels() + labels["status"] = status + t.metrics.Puts.With(labels).Inc() +} + +func (t *cacheTracker) IncPutHit() { t.incPut("hit") } +func (t *cacheTracker) IncPutMiss() { t.incPut("miss") } + +func (t *cacheTracker) incDeletes(status string) { + labels := t.Labels() + labels["status"] = status + t.metrics.Deletes.With(labels).Inc() +} + +func (t *cacheTracker) IncDeletesHit() { t.incDeletes("hit") } +func (t *cacheTracker) IncDeletesMiss() { t.incDeletes("miss") } + +func (t *cacheTracker) IncEvictions() { + labels := t.Labels() + t.metrics.Evictions.With(labels).Inc() +} diff --git a/tsdb/tsi1/index.go b/tsdb/tsi1/index.go index 7411e04c31..3baf428a82 100644 --- a/tsdb/tsi1/index.go +++ b/tsdb/tsi1/index.go @@ -13,6 +13,8 @@ import ( "sync/atomic" "unsafe" + "github.com/prometheus/client_golang/prometheus" + "bytes" "sort" @@ -109,7 +111,10 @@ type Index struct { partitions []*Partition opened bool - tagValueCache *TagValueSeriesIDCache + defaultLabels prometheus.Labels + + tagValueCache *TagValueSeriesIDCache + partitionMetrics *partitionMetrics // Maintain a single set of partition metrics to be shared by partition. // The following may be set when initializing an Index. path string // Root directory of the index partitions. @@ -136,12 +141,13 @@ func (i *Index) UniqueReferenceID() uintptr { // NewIndex returns a new instance of Index. func NewIndex(sfile *tsdb.SeriesFile, c Config, options ...IndexOption) *Index { idx := &Index{ - tagValueCache: NewTagValueSeriesIDCache(DefaultSeriesIDSetCacheSize), - maxLogFileSize: int64(c.MaxIndexLogFileSize), - logger: zap.NewNop(), - version: Version, - sfile: sfile, - PartitionN: DefaultPartitionN, + tagValueCache: NewTagValueSeriesIDCache(DefaultSeriesIDSetCacheSize), + partitionMetrics: newPartitionMetrics(nil), + maxLogFileSize: int64(c.MaxIndexLogFileSize), + logger: zap.NewNop(), + version: Version, + sfile: sfile, + PartitionN: DefaultPartitionN, } for _, option := range options { @@ -151,6 +157,14 @@ func NewIndex(sfile *tsdb.SeriesFile, c Config, options ...IndexOption) *Index { return idx } +// SetDefaultMetricLabels sets the default labels on the trackers. +func (i *Index) SetDefaultMetricLabels(labels prometheus.Labels) { + i.defaultLabels = make(prometheus.Labels, len(labels)) + for k, v := range labels { + i.defaultLabels[k] = v + } +} + // Bytes estimates the memory footprint of this Index, in bytes. func (i *Index) Bytes() int { var b int @@ -210,6 +224,18 @@ func (i *Index) Open() error { return err } + mmu.Lock() + if cms == nil { + cms = newCacheMetrics(i.defaultLabels) + } + if pms == nil { + pms = newPartitionMetrics(i.defaultLabels) + } + mmu.Unlock() + + // Set the correct shared metrics on the cache + i.tagValueCache.tracker = newCacheTracker(cms, i.defaultLabels) + // Initialize index partitions. i.partitions = make([]*Partition, i.PartitionN) for j := 0; j < len(i.partitions); j++ { @@ -218,6 +244,15 @@ func (i *Index) Open() error { p.nosync = i.disableFsync p.logbufferSize = i.logfileBufferSize p.logger = i.logger.With(zap.String("tsi1_partition", fmt.Sprint(j+1))) + + // Each of the trackers needs to be given slightly different default + // labels to ensure the correct partition ids are set as labels. + labels := make(prometheus.Labels, len(i.defaultLabels)) + for k, v := range i.defaultLabels { + labels[k] = v + } + labels["index_partition"] = fmt.Sprint(j) + p.tracker = newPartitionTracker(pms, labels) i.partitions[j] = p } diff --git a/tsdb/tsi1/metrics.go b/tsdb/tsi1/metrics.go new file mode 100644 index 0000000000..84ac5aab56 --- /dev/null +++ b/tsdb/tsi1/metrics.go @@ -0,0 +1,228 @@ +package tsi1 + +import ( + "sort" + "sync" + + "github.com/prometheus/client_golang/prometheus" +) + +// The following package variables act as singletons, to be shared by all +// storage.Engine instantiations. This allows multiple TSI indexes to be +// monitored within the same process. +var ( + cms *cacheMetrics // TSI index cache metrics + pms *partitionMetrics // TSI partition metrics + mmu sync.RWMutex +) + +// PrometheusCollectors returns all prometheus metrics for the tsm1 package. +func PrometheusCollectors() []prometheus.Collector { + mmu.RLock() + defer mmu.RUnlock() + + var collectors []prometheus.Collector + if cms != nil { + collectors = append(collectors, cms.PrometheusCollectors()...) + } + if pms != nil { + collectors = append(collectors, pms.PrometheusCollectors()...) + } + return collectors +} + +// namespace is the leading part of all published metrics for the Storage service. +const namespace = "storage" + +const cacheSubsystem = "tsi_cache" // sub-system associated with TSI index cache. +const partitionSubsystem = "tsi_index" // sub-system associated with the TSI index. + +type cacheMetrics struct { + Size *prometheus.GaugeVec // Size of the cache. + + // These metrics have an extra label status = {"hit", "miss"} + Gets *prometheus.CounterVec // Number of times item retrieved. + Puts *prometheus.CounterVec // Number of times item inserted. + Deletes *prometheus.CounterVec // Number of times item deleted. + Evictions *prometheus.CounterVec // Number of times item deleted. +} + +// newCacheMetrics initialises the prometheus metrics for tracking the Series File. +func newCacheMetrics(labels prometheus.Labels) *cacheMetrics { + var names []string + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + statusNames := append(append([]string(nil), names...), "status") + sort.Strings(statusNames) + + return &cacheMetrics{ + Size: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "size", + Help: "Number of items residing in the cache.", + }, names), + Gets: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "get_total", + Help: "Total number of gets on cache.", + }, statusNames), + Puts: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "put_total", + Help: "Total number of insertions in cache.", + }, statusNames), + Deletes: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "deletes_total", + Help: "Total number of deletions in cache.", + }, statusNames), + Evictions: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "evictions_total", + Help: "Total number of cache evictions.", + }, names), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *cacheMetrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.Size, + m.Gets, + m.Puts, + m.Deletes, + m.Evictions, + } +} + +type partitionMetrics struct { + SeriesCreated *prometheus.CounterVec // Number of series created in Series File. + SeriesCreatedDuration *prometheus.HistogramVec // Distribution of time to insert series. + SeriesDropped *prometheus.CounterVec // Number of series removed from index. + Series *prometheus.GaugeVec // Number of series. + Measurements *prometheus.GaugeVec // Number of measurements. + DiskSize *prometheus.GaugeVec // Size occupied on disk. + + // This metrics has a "type" = {index, log} + FilesTotal *prometheus.GaugeVec // files on disk. + + // This metric has a "level" metric. + CompactionsActive *prometheus.GaugeVec // Number of active compactions. + + // These metrics have a "level" metric. + // The following metrics include a "status" = {ok, error}` label + CompactionDuration *prometheus.HistogramVec // Duration of compactions. + Compactions *prometheus.CounterVec // Total number of compactions. +} + +// newPartitionMetrics initialises the prometheus metrics for tracking the TSI partitions. +func newPartitionMetrics(labels prometheus.Labels) *partitionMetrics { + names := []string{"index_partition"} // All metrics have a partition + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + // type = {"index", "log"} + fileNames := append(append([]string(nil), names...), "type") + sort.Strings(fileNames) + + // level = [0, 7] + compactionNames := append(append([]string(nil), names...), "level") + sort.Strings(compactionNames) + + // status = {"ok", "error"} + attemptedCompactionNames := append(append([]string(nil), compactionNames...), "status") + sort.Strings(attemptedCompactionNames) + + return &partitionMetrics{ + SeriesCreated: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "series_created", + Help: "Number of series created in the partition.", + }, names), + SeriesCreatedDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "series_created_duration_ns", + Help: "Time taken in nanosecond to create single series.", + // 30 buckets spaced exponentially between 100ns and ~19 us. + Buckets: prometheus.ExponentialBuckets(100.0, 1.2, 30), + }, names), + SeriesDropped: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "series_dropped", + Help: "Number of series dropped from the partition.", + }, names), + Series: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "series_total", + Help: "Number of series in the partition.", + }, names), + Measurements: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "measurements_total", + Help: "Number of series in the partition.", + }, names), + FilesTotal: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "files_total", + Help: "Number of files in the partition.", + }, fileNames), + DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "disk_bytes", + Help: "Number of bytes TSI partition is using on disk.", + }, names), + CompactionsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "compactions_active", + Help: "Number of active partition compactions.", + }, compactionNames), + CompactionDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "compactions_duration_seconds", + Help: "Time taken for a successful compaction of partition.", + // 30 buckets spaced exponentially between 1s and ~10 minutes. + Buckets: prometheus.ExponentialBuckets(1.0, 1.25, 30), + }, compactionNames), + Compactions: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: partitionSubsystem, + Name: "compactions_total", + Help: "Number of compactions.", + }, attemptedCompactionNames), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *partitionMetrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.SeriesCreated, + m.SeriesCreatedDuration, + m.SeriesDropped, + m.Series, + m.Measurements, + m.FilesTotal, + m.DiskSize, + m.CompactionsActive, + m.CompactionDuration, + m.Compactions, + } +} diff --git a/tsdb/tsi1/metrics_test.go b/tsdb/tsi1/metrics_test.go new file mode 100644 index 0000000000..2f054a74fd --- /dev/null +++ b/tsdb/tsi1/metrics_test.go @@ -0,0 +1,232 @@ +package tsi1 + +import ( + "testing" + + "github.com/influxdata/platform/kit/prom/promtest" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +func TestMetrics_Cache(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := newCacheMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) + t2 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + base := namespace + "_" + cacheSubsystem + "_" + + // All the metric names + gauges := []string{base + "size"} + + counters := []string{ + base + "get_total", + base + "put_total", + base + "deletes_total", + base + "evictions_total", + } + + // Generate some measurements. + for i, tracker := range []*cacheTracker{t1, t2} { + tracker.SetSize(uint64(i + len(gauges[0]))) + + labels := tracker.Labels() + labels["status"] = "hit" + tracker.metrics.Gets.With(labels).Add(float64(i + len(counters[0]))) + tracker.metrics.Puts.With(labels).Add(float64(i + len(counters[1]))) + tracker.metrics.Deletes.With(labels).Add(float64(i + len(counters[2]))) + + tracker.metrics.Evictions.With(tracker.Labels()).Add(float64(i + len(counters[3]))) + } + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + // The label variants for the two caches. + labelVariants := []prometheus.Labels{ + prometheus.Labels{"engine_id": "0", "node_id": "0"}, + prometheus.Labels{"engine_id": "1", "node_id": "0"}, + } + + for i, labels := range labelVariants { + for _, name := range gauges { + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetGauge().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + var metric *dto.Metric + for _, name := range counters { + exp := float64(i + len(name)) + + if name != counters[3] { + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["status"] = "hit" + + metric = promtest.MustFindMetric(t, mfs, name, l) + } else { + metric = promtest.MustFindMetric(t, mfs, name, labels) + } + + if got := metric.GetCounter().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + } +} + +func TestMetrics_Partition(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := newPartitionMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newPartitionTracker(metrics, prometheus.Labels{"engine_id": "0", "index_partition": "0", "node_id": "0"}) + t2 := newPartitionTracker(metrics, prometheus.Labels{"engine_id": "1", "index_partition": "0", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + base := namespace + "_" + partitionSubsystem + "_" + + // All the metric names + gauges := []string{ + base + "series_total", + base + "measurements_total", + base + "files_total", + base + "disk_bytes", + base + "compactions_active", + } + + counters := []string{ + base + "series_created", + base + "series_dropped", + base + "compactions_total", + } + + histograms := []string{ + base + "series_created_duration_ns", + base + "compactions_duration_seconds", + } + + // Generate some measurements. + for i, tracker := range []*partitionTracker{t1, t2} { + tracker.SetSeries(uint64(i + len(gauges[0]))) + tracker.SetMeasurements(uint64(i + len(gauges[1]))) + labels := tracker.Labels() + labels["type"] = "index" + tracker.metrics.FilesTotal.With(labels).Add(float64(i + len(gauges[2]))) + tracker.SetDiskSize(uint64(i + len(gauges[3]))) + labels = tracker.Labels() + labels["level"] = "2" + tracker.metrics.CompactionsActive.With(labels).Add(float64(i + len(gauges[4]))) + + tracker.metrics.SeriesCreated.With(tracker.Labels()).Add(float64(i + len(counters[0]))) + tracker.AddSeriesDropped(uint64(i + len(counters[1]))) + labels = tracker.Labels() + labels["level"] = "2" + labels["status"] = "ok" + tracker.metrics.Compactions.With(labels).Add(float64(i + len(counters[2]))) + + tracker.metrics.SeriesCreatedDuration.With(tracker.Labels()).Observe(float64(i + len(histograms[0]))) + labels = tracker.Labels() + labels["level"] = "2" + tracker.metrics.CompactionDuration.With(labels).Observe(float64(i + len(histograms[1]))) + } + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + // The label variants for the two caches. + labelVariants := []prometheus.Labels{ + prometheus.Labels{"engine_id": "0", "index_partition": "0", "node_id": "0"}, + prometheus.Labels{"engine_id": "1", "index_partition": "0", "node_id": "0"}, + } + + for j, labels := range labelVariants { + var metric *dto.Metric + + for i, name := range gauges { + exp := float64(j + len(name)) + + if i == 2 { + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["type"] = "index" + metric = promtest.MustFindMetric(t, mfs, name, l) + } else if i == 4 { + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["level"] = "2" + metric = promtest.MustFindMetric(t, mfs, name, l) + } else { + metric = promtest.MustFindMetric(t, mfs, name, labels) + } + + if got := metric.GetGauge().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for i, name := range counters { + exp := float64(j + len(name)) + + if i == 2 { + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["status"] = "ok" + l["level"] = "2" + + metric = promtest.MustFindMetric(t, mfs, name, l) + } else { + metric = promtest.MustFindMetric(t, mfs, name, labels) + } + + if got := metric.GetCounter().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for i, name := range histograms { + exp := float64(j + len(name)) + + if i == 1 { + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["level"] = "2" + + metric = promtest.MustFindMetric(t, mfs, name, l) + } else { + metric = promtest.MustFindMetric(t, mfs, name, labels) + } + + if got := metric.GetHistogram().GetSampleSum(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + } +} diff --git a/tsdb/tsi1/partition.go b/tsdb/tsi1/partition.go index fbfe1b7e8e..57e3809b7c 100644 --- a/tsdb/tsi1/partition.go +++ b/tsdb/tsi1/partition.go @@ -19,6 +19,7 @@ import ( "github.com/influxdata/platform/logger" "github.com/influxdata/platform/pkg/bytesutil" "github.com/influxdata/platform/tsdb" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -54,6 +55,8 @@ type Partition struct { // Measurement stats stats MeasurementCardinalityStats + tracker *partitionTracker + // Fast series lookup of series IDs in the series file that have been present // in this partition. This set tracks both insertions and deletions of a series. seriesIDSet *tsdb.SeriesIDSet @@ -92,7 +95,7 @@ type Partition struct { // NewPartition returns a new instance of Partition. func NewPartition(sfile *tsdb.SeriesFile, path string) *Partition { - return &Partition{ + partition := &Partition{ closing: make(chan struct{}), path: path, sfile: sfile, @@ -106,6 +109,10 @@ func NewPartition(sfile *tsdb.SeriesFile, path string) *Partition { logger: zap.NewNop(), version: Version, } + + defaultLabels := prometheus.Labels{"index_partition": ""} + partition.tracker = newPartitionTracker(newPartitionMetrics(nil), defaultLabels) + return partition } // bytes estimates the memory footprint of this Partition, in bytes. @@ -244,6 +251,10 @@ func (p *Partition) Open() error { if err := p.buildSeriesSet(); err != nil { return err } + p.tracker.SetSeries(p.seriesIDSet.Cardinality()) + p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") + p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") + p.tracker.SetDiskSize(uint64(p.fileSet.Size())) // Mark opened. p.opened = true @@ -472,6 +483,11 @@ func (p *Partition) prependActiveLogFile() error { if err := p.writeStatsFile(); err != nil { return err } + + // Set the file metrics again. + p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") + p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") + p.tracker.SetDiskSize(uint64(p.fileSet.Size())) return nil } @@ -663,6 +679,7 @@ func (p *Partition) createSeriesListIfNotExists(collection *tsdb.SeriesCollectio defer fs.Release() // Ensure fileset cannot change during insert. + now := time.Now() p.mu.RLock() // Insert series into log file. ids, err := p.activeLogFile.AddSeriesList(p.seriesIDSet, collection) @@ -675,9 +692,28 @@ func (p *Partition) createSeriesListIfNotExists(collection *tsdb.SeriesCollectio if err := p.CheckLogFile(); err != nil { return nil, err } + + // NOTE(edd): if this becomes expensive then we can move the count into the + // log file. + var totalNew uint64 + for _, id := range ids { + if !id.IsZero() { + totalNew++ + } + } + if totalNew > 0 { + p.tracker.AddSeriesCreated(totalNew, time.Since(now)) + p.tracker.AddSeries(totalNew) + p.mu.RLock() + p.tracker.SetDiskSize(uint64(p.fileSet.Size())) + p.mu.RUnlock() + } return ids, nil } +// DropSeries removes the provided series id from the index. +// +// TODO(edd): We should support a bulk drop here. func (p *Partition) DropSeries(seriesID tsdb.SeriesID) error { // Ignore if the series is already deleted. if !p.seriesIDSet.Contains(seriesID) { @@ -691,6 +727,8 @@ func (p *Partition) DropSeries(seriesID tsdb.SeriesID) error { // Update series set. p.seriesIDSet.Remove(seriesID) + p.tracker.AddSeriesDropped(1) + p.tracker.SubSeries(1) // Swap log file, if necessary. return p.CheckLogFile() @@ -924,6 +962,23 @@ func (p *Partition) compactToLevel(files []*IndexFile, level int, interrupt <-ch assert(len(files) >= 2, "at least two index files are required for compaction") assert(level > 0, "cannot compact level zero") + var err error + var start time.Time + + p.tracker.IncActiveCompaction(level) + // Set the relevant metrics at the end of any compaction. + defer func() { + p.mu.RLock() + defer p.mu.RUnlock() + p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") + p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") + p.tracker.SetDiskSize(uint64(p.fileSet.Size())) + p.tracker.DecActiveCompaction(level) + + success := err == nil + p.tracker.CompactionAttempted(level, success, time.Since(start)) + }() + // Build a logger for this compaction. log, logEnd := logger.NewOperation(p.logger, "TSI level compaction", "tsi1_compact_to_level", zap.Int("tsi1_level", level)) defer logEnd() @@ -942,12 +997,12 @@ func (p *Partition) compactToLevel(files []*IndexFile, level int, interrupt <-ch defer once.Do(func() { IndexFiles(files).Release() }) // Track time to compact. - start := time.Now() + start = time.Now() // Create new index file. path := filepath.Join(p.path, FormatIndexFileName(p.NextSequence(), level)) - f, err := os.Create(path) - if err != nil { + var f *os.File + if f, err = os.Create(path); err != nil { log.Error("Cannot create compaction files", zap.Error(err)) return } @@ -960,14 +1015,14 @@ func (p *Partition) compactToLevel(files []*IndexFile, level int, interrupt <-ch // Compact all index files to new index file. lvl := p.levels[level] - n, err := IndexFiles(files).CompactTo(f, p.sfile, lvl.M, lvl.K, interrupt) - if err != nil { + var n int64 + if n, err = IndexFiles(files).CompactTo(f, p.sfile, lvl.M, lvl.K, interrupt); err != nil { log.Error("Cannot compact index files", zap.Error(err)) return } // Close file. - if err := f.Close(); err != nil { + if err = f.Close(); err != nil { log.Error("Error closing index file", zap.Error(err)) return } @@ -975,13 +1030,13 @@ func (p *Partition) compactToLevel(files []*IndexFile, level int, interrupt <-ch // Reopen as an index file. file := NewIndexFile(p.sfile) file.SetPath(path) - if err := file.Open(); err != nil { + if err = file.Open(); err != nil { log.Error("Cannot open new index file", zap.Error(err)) return } // Obtain lock to swap in index file and write manifest. - if err := func() error { + if err = func() error { p.mu.Lock() defer p.mu.Unlock() @@ -1021,10 +1076,10 @@ func (p *Partition) compactToLevel(files []*IndexFile, level int, interrupt <-ch for _, f := range files { log.Info("Removing index file", zap.String("path", f.Path())) - if err := f.Close(); err != nil { + if err = f.Close(); err != nil { log.Error("Cannot close index file", zap.Error(err)) return - } else if err := os.Remove(f.Path()); err != nil { + } else if err = os.Remove(f.Path()); err != nil { log.Error("Cannot remove index file", zap.Error(err)) return } @@ -1081,6 +1136,14 @@ func (p *Partition) compactLogFile(logFile *LogFile) { return } + defer func() { + p.mu.RLock() + defer p.mu.RUnlock() + p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") + p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") + p.tracker.SetDiskSize(uint64(p.fileSet.Size())) + }() + p.mu.Lock() interrupt := p.compactionInterrupt p.mu.Unlock() @@ -1228,6 +1291,128 @@ func (p *Partition) MeasurementCardinalityStats() MeasurementCardinalityStats { return stats } +type partitionTracker struct { + metrics *partitionMetrics + labels prometheus.Labels +} + +func newPartitionTracker(metrics *partitionMetrics, defaultLabels prometheus.Labels) *partitionTracker { + return &partitionTracker{ + metrics: metrics, + labels: defaultLabels, + } +} + +// Labels returns a copy of labels for use with index partition metrics. +func (t *partitionTracker) Labels() prometheus.Labels { + l := make(map[string]string, len(t.labels)) + for k, v := range t.labels { + l[k] = v + } + return l +} + +// AddSeriesCreated increases the number of series created in the partition by n +// and sets a sample of the time taken to create a series. +func (t *partitionTracker) AddSeriesCreated(n uint64, d time.Duration) { + labels := t.Labels() + t.metrics.SeriesCreated.With(labels).Add(float64(n)) + + if n == 0 { + return // Nothing to record + } + + perseries := d.Seconds() / float64(n) + t.metrics.SeriesCreatedDuration.With(labels).Observe(perseries) +} + +// AddSeriesDropped increases the number of series dropped in the partition by n. +func (t *partitionTracker) AddSeriesDropped(n uint64) { + labels := t.Labels() + t.metrics.SeriesDropped.With(labels).Add(float64(n)) +} + +// SetSeries sets the number of series in the partition. +func (t *partitionTracker) SetSeries(n uint64) { + labels := t.Labels() + t.metrics.Series.With(labels).Set(float64(n)) +} + +// AddSeries increases the number of series in the partition by n. +func (t *partitionTracker) AddSeries(n uint64) { + labels := t.Labels() + t.metrics.Series.With(labels).Add(float64(n)) +} + +// SubSeries decreases the number of series in the partition by n. +func (t *partitionTracker) SubSeries(n uint64) { + labels := t.Labels() + t.metrics.Series.With(labels).Sub(float64(n)) +} + +// SetMeasurements sets the number of measurements in the partition. +func (t *partitionTracker) SetMeasurements(n uint64) { + labels := t.Labels() + t.metrics.Measurements.With(labels).Set(float64(n)) +} + +// AddMeasurements increases the number of measurements in the partition by n. +func (t *partitionTracker) AddMeasurements(n uint64) { + labels := t.Labels() + t.metrics.Measurements.With(labels).Add(float64(n)) +} + +// SubMeasurements decreases the number of measurements in the partition by n. +func (t *partitionTracker) SubMeasurements(n uint64) { + labels := t.Labels() + t.metrics.Measurements.With(labels).Sub(float64(n)) +} + +// SetFiles sets the number of files in the partition. +func (t *partitionTracker) SetFiles(n uint64, typ string) { + labels := t.Labels() + labels["type"] = typ + t.metrics.FilesTotal.With(labels).Set(float64(n)) +} + +// SetDiskSize sets the size of files in the partition. +func (t *partitionTracker) SetDiskSize(n uint64) { + labels := t.Labels() + t.metrics.DiskSize.With(labels).Set(float64(n)) +} + +// IncActiveCompaction increments the number of active compactions for the provided level. +func (t *partitionTracker) IncActiveCompaction(level int) { + labels := t.Labels() + labels["level"] = fmt.Sprint(level) + + t.metrics.CompactionsActive.With(labels).Inc() +} + +// DecActiveCompaction decrements the number of active compactions for the provided level. +func (t *partitionTracker) DecActiveCompaction(level int) { + labels := t.Labels() + labels["level"] = fmt.Sprint(level) + + t.metrics.CompactionsActive.With(labels).Dec() +} + +// CompactionAttempted updates the number of compactions attempted for the provided level. +func (t *partitionTracker) CompactionAttempted(level int, success bool, d time.Duration) { + labels := t.Labels() + labels["level"] = fmt.Sprint(level) + if success { + t.metrics.CompactionDuration.With(labels).Observe(d.Seconds()) + + labels["status"] = "ok" + t.metrics.Compactions.With(labels).Inc() + return + } + + labels["status"] = "error" + t.metrics.Compactions.With(labels).Inc() +} + // unionStringSets returns the union of two sets func unionStringSets(a, b map[string]struct{}) map[string]struct{} { other := make(map[string]struct{}) diff --git a/tsdb/tsm1/bit_reader_test.go b/tsdb/tsm1/bit_reader_test.go index 0360a13531..f5d3150f1e 100644 --- a/tsdb/tsm1/bit_reader_test.go +++ b/tsdb/tsm1/bit_reader_test.go @@ -9,7 +9,7 @@ import ( "testing" "testing/quick" - "github.com/dgryski/go-bitstream" + bitstream "github.com/dgryski/go-bitstream" "github.com/influxdata/platform/tsdb/tsm1" ) diff --git a/tsdb/tsm1/cache.go b/tsdb/tsm1/cache.go index a075ed21e8..1302b5b3a5 100644 --- a/tsdb/tsm1/cache.go +++ b/tsdb/tsm1/cache.go @@ -11,6 +11,7 @@ import ( "github.com/influxdata/influxql" "github.com/influxdata/platform/models" "github.com/influxdata/platform/tsdb" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -143,25 +144,6 @@ func (e *entry) InfluxQLType() (influxql.DataType, error) { return e.values.InfluxQLType() } -// Statistics gathered by the Cache. -const ( - // levels - point in time measures - - statCacheMemoryBytes = "memBytes" // level: Size of in-memory cache in bytes - statCacheDiskBytes = "diskBytes" // level: Size of on-disk snapshots in bytes - statSnapshots = "snapshotCount" // level: Number of active snapshots. - statCacheAgeMs = "cacheAgeMs" // level: Number of milliseconds since cache was last snapshoted at sample time - - // counters - accumulative measures - - statCachedBytes = "cachedBytes" // counter: Total number of bytes written into snapshots. - statWALCompactionTimeMs = "WALCompactionTimeMs" // counter: Total number of milliseconds spent compacting snapshots - - statCacheWriteOK = "writeOk" - statCacheWriteErr = "writeErr" - statCacheWriteDropped = "writeDropped" -) - // storer is the interface that descibes a cache's store. type storer interface { entry(key []byte) *entry // Get an entry by its key. @@ -178,12 +160,7 @@ type storer interface { // Cache maintains an in-memory store of Values for a set of keys. type Cache struct { - // Due to a bug in atomic size needs to be the first word in the struct, as - // that's the only place where you're guaranteed to be 64-bit aligned on a - // 32 bit system. See: https://golang.org/pkg/sync/atomic/#pkg-note-BUG - size uint64 - snapshotSize uint64 - + _ uint64 // Padding for 32 bit struct alignment mu sync.RWMutex store storer maxSize uint64 @@ -194,10 +171,7 @@ type Cache struct { snapshot *Cache snapshotting bool - // This number is the number of pending or failed WriteSnaphot attempts since the last successful one. - snapshotAttempts int - - stats *CacheStatistics + tracker *cacheTracker lastSnapshot time.Time lastWriteTime time.Time @@ -213,50 +187,13 @@ func NewCache(maxSize uint64) *Cache { c := &Cache{ maxSize: maxSize, store: emptyStore{}, - stats: &CacheStatistics{}, lastSnapshot: time.Now(), + tracker: newCacheTracker(newCacheMetrics(nil), nil), } c.initialize.Store(&sync.Once{}) - c.UpdateAge() - c.UpdateCompactTime(0) - c.updateCachedBytes(0) - c.updateMemSize(0) - c.updateSnapshots() return c } -// CacheStatistics hold statistics related to the cache. -type CacheStatistics struct { - MemSizeBytes int64 - DiskSizeBytes int64 - SnapshotCount int64 - CacheAgeMs int64 - CachedBytes int64 - WALCompactionTimeMs int64 - WriteOK int64 - WriteErr int64 - WriteDropped int64 -} - -// Statistics returns statistics for periodic monitoring. -func (c *Cache) Statistics(tags map[string]string) []models.Statistic { - return []models.Statistic{{ - Name: "tsm1_cache", - Tags: tags, - Values: map[string]interface{}{ - statCacheMemoryBytes: atomic.LoadInt64(&c.stats.MemSizeBytes), - statCacheDiskBytes: atomic.LoadInt64(&c.stats.DiskSizeBytes), - statSnapshots: atomic.LoadInt64(&c.stats.SnapshotCount), - statCacheAgeMs: atomic.LoadInt64(&c.stats.CacheAgeMs), - statCachedBytes: atomic.LoadInt64(&c.stats.CachedBytes), - statWALCompactionTimeMs: atomic.LoadInt64(&c.stats.WALCompactionTimeMs), - statCacheWriteOK: atomic.LoadInt64(&c.stats.WriteOK), - statCacheWriteErr: atomic.LoadInt64(&c.stats.WriteErr), - statCacheWriteDropped: atomic.LoadInt64(&c.stats.WriteDropped), - }, - }} -} - // init initializes the cache and allocates the underlying store. Once initialized, // the store re-used until Freed. func (c *Cache) init() { @@ -291,13 +228,15 @@ func (c *Cache) Write(key []byte, values []Value) error { n := c.Size() + addedSize if limit > 0 && n > limit { - atomic.AddInt64(&c.stats.WriteErr, 1) + c.tracker.IncWritesErr() + c.tracker.AddWrittenBytesDrop(uint64(addedSize)) return ErrCacheMemorySizeLimitExceeded(n, limit) } newKey, err := c.store.write(key, values) if err != nil { - atomic.AddInt64(&c.stats.WriteErr, 1) + c.tracker.IncWritesErr() + c.tracker.AddWrittenBytesErr(uint64(addedSize)) return err } @@ -305,9 +244,10 @@ func (c *Cache) Write(key []byte, values []Value) error { addedSize += uint64(len(key)) } // Update the cache size and the memory size stat. - c.increaseSize(addedSize) - c.updateMemSize(int64(addedSize)) - atomic.AddInt64(&c.stats.WriteOK, 1) + c.tracker.IncCacheSize(addedSize) + c.tracker.AddMemBytes(addedSize) + c.tracker.AddWrittenBytesOK(uint64(addedSize)) + c.tracker.IncWritesOK() return nil } @@ -328,7 +268,8 @@ func (c *Cache) WriteMulti(values map[string][]Value) error { limit := c.maxSize // maxSize is safe for reading without a lock. n := c.Size() + addedSize if limit > 0 && n > limit { - atomic.AddInt64(&c.stats.WriteErr, 1) + c.tracker.IncWritesErr() + c.tracker.AddWrittenBytesDrop(uint64(addedSize)) return ErrCacheMemorySizeLimitExceeded(n, limit) } @@ -337,32 +278,36 @@ func (c *Cache) WriteMulti(values map[string][]Value) error { store := c.store c.mu.RUnlock() - // We'll optimistially set size here, and then decrement it for write errors. - c.increaseSize(addedSize) + var bytesWrittenErr uint64 + + // We'll optimistically set size here, and then decrement it for write errors. for k, v := range values { newKey, err := store.write([]byte(k), v) if err != nil { // The write failed, hold onto the error and adjust the size delta. werr = err addedSize -= uint64(Values(v).Size()) - c.decreaseSize(uint64(Values(v).Size())) + bytesWrittenErr += uint64(Values(v).Size()) } + if newKey { addedSize += uint64(len(k)) - c.increaseSize(uint64(len(k))) } } // Some points in the batch were dropped. An error is returned so // error stat is incremented as well. if werr != nil { - atomic.AddInt64(&c.stats.WriteDropped, 1) - atomic.AddInt64(&c.stats.WriteErr, 1) + c.tracker.IncWritesErr() + c.tracker.IncWritesDrop() + c.tracker.AddWrittenBytesErr(bytesWrittenErr) } // Update the memory size stat - c.updateMemSize(int64(addedSize)) - atomic.AddInt64(&c.stats.WriteOK, 1) + c.tracker.IncCacheSize(addedSize) + c.tracker.AddMemBytes(addedSize) + c.tracker.IncWritesOK() + c.tracker.AddWrittenBytesOK(addedSize) c.mu.Lock() c.lastWriteTime = time.Now() @@ -384,7 +329,7 @@ func (c *Cache) Snapshot() (*Cache, error) { } c.snapshotting = true - c.snapshotAttempts++ // increment the number of times we tried to do this + c.tracker.IncSnapshotsActive() // increment the number of times we tried to do this // If no snapshot exists, create a new one, otherwise update the existing snapshot if c.snapshot == nil { @@ -394,7 +339,8 @@ func (c *Cache) Snapshot() (*Cache, error) { } c.snapshot = &Cache{ - store: store, + store: store, + tracker: newCacheTracker(c.tracker.metrics, c.tracker.labels), } } @@ -407,18 +353,17 @@ func (c *Cache) Snapshot() (*Cache, error) { c.snapshot.store, c.store = c.store, c.snapshot.store snapshotSize := c.Size() - // Save the size of the snapshot on the snapshot cache - atomic.StoreUint64(&c.snapshot.size, snapshotSize) - // Save the size of the snapshot on the live cache - atomic.StoreUint64(&c.snapshotSize, snapshotSize) + c.snapshot.tracker.SetSnapshotSize(snapshotSize) // Save the size of the snapshot on the snapshot cache + c.tracker.SetSnapshotSize(snapshotSize) // Save the size of the snapshot on the live cache // Reset the cache's store. c.store.reset() - atomic.StoreUint64(&c.size, 0) + c.tracker.SetCacheSize(0) c.lastSnapshot = time.Now() - c.updateCachedBytes(snapshotSize) // increment the number of bytes added to the snapshot - c.updateSnapshots() + c.tracker.AddSnapshottedBytes(snapshotSize) // increment the number of bytes added to the snapshot + c.tracker.SetDiskBytes(0) + c.tracker.SetSnapshotsActive(0) return c.snapshot, nil } @@ -455,33 +400,25 @@ func (c *Cache) ClearSnapshot(success bool) { c.snapshotting = false if success { - c.snapshotAttempts = 0 - c.updateMemSize(-int64(atomic.LoadUint64(&c.snapshotSize))) // decrement the number of bytes in cache + snapshotSize := c.tracker.SnapshotSize() + c.tracker.SetSnapshotsActive(0) + c.tracker.SubMemBytes(snapshotSize) // decrement the number of bytes in cache // Reset the snapshot to a fresh Cache. c.snapshot = &Cache{ - store: c.snapshot.store, + store: c.snapshot.store, + tracker: newCacheTracker(c.tracker.metrics, c.tracker.labels), } - atomic.StoreUint64(&c.snapshotSize, 0) - c.updateSnapshots() + c.tracker.SetSnapshotSize(0) + c.tracker.SetDiskBytes(0) + c.tracker.SetSnapshotsActive(0) } } // Size returns the number of point-calcuated bytes the cache currently uses. func (c *Cache) Size() uint64 { - return atomic.LoadUint64(&c.size) + atomic.LoadUint64(&c.snapshotSize) -} - -// increaseSize increases size by delta. -func (c *Cache) increaseSize(delta uint64) { - atomic.AddUint64(&c.size, delta) -} - -// decreaseSize decreases size by delta. -func (c *Cache) decreaseSize(delta uint64) { - // Per sync/atomic docs, bit-flip delta minus one to perform subtraction within AddUint64. - atomic.AddUint64(&c.size, ^(delta - 1)) + return c.tracker.CacheSize() + c.tracker.SnapshotSize() } // MaxSize returns the maximum number of bytes the cache may consume. @@ -623,6 +560,7 @@ func (c *Cache) DeleteRange(keys [][]byte, min, max int64) { c.mu.Lock() defer c.mu.Unlock() + var total uint64 for _, k := range keys { // Make sure key exist in the cache, skip if it does not e := c.store.entry(k) @@ -630,23 +568,28 @@ func (c *Cache) DeleteRange(keys [][]byte, min, max int64) { continue } - origSize := uint64(e.size()) + total += uint64(e.size()) + // Everything is being deleted. if min == math.MinInt64 && max == math.MaxInt64 { - c.decreaseSize(origSize + uint64(len(k))) + total += uint64(len(k)) // all entries and the key. c.store.remove(k) continue } + // Filter what to delete by time range. e.filter(min, max) if e.count() == 0 { + // Nothing left in cache for that key + total += uint64(len(k)) // all entries and the key. c.store.remove(k) - c.decreaseSize(origSize + uint64(len(k))) continue } - c.decreaseSize(origSize - uint64(e.size())) + // Just update what is being deleted by the size of the filtered entries. + total -= uint64(e.size()) } - atomic.StoreInt64(&c.stats.MemSizeBytes, int64(c.Size())) + c.tracker.DecCacheSize(total) // Decrease the live cache size. + c.tracker.SetMemBytes(uint64(c.Size())) } // SetMaxSize updates the memory limit of the cache. @@ -777,23 +720,167 @@ func (c *Cache) LastWriteTime() time.Time { func (c *Cache) UpdateAge() { c.mu.RLock() defer c.mu.RUnlock() - ageStat := int64(time.Since(c.lastSnapshot) / time.Millisecond) - atomic.StoreInt64(&c.stats.CacheAgeMs, ageStat) + c.tracker.SetAge(time.Since(c.lastSnapshot)) } -// UpdateCompactTime updates WAL compaction time statistic based on d. -func (c *Cache) UpdateCompactTime(d time.Duration) { - atomic.AddInt64(&c.stats.WALCompactionTimeMs, int64(d/time.Millisecond)) +// cacheTracker tracks writes to the cache and snapshots. +// +// As well as being responsible for providing atomic reads and writes to the +// statistics, cacheTracker also mirrors any changes to the external prometheus +// metrics, which the Engine exposes. +// +// *NOTE* - cacheTracker fields should not be directory modified. Doing so +// could result in the Engine exposing inaccurate metrics. +type cacheTracker struct { + metrics *cacheMetrics + labels prometheus.Labels + snapshotsActive uint64 + snapshotSize uint64 + cacheSize uint64 + + // Used in testing. + memSizeBytes uint64 + snapshottedBytes uint64 + writesDropped uint64 + writesErr uint64 } -// updateCachedBytes increases the cachedBytes counter by b. -func (c *Cache) updateCachedBytes(b uint64) { - atomic.AddInt64(&c.stats.CachedBytes, int64(b)) +func newCacheTracker(metrics *cacheMetrics, defaultLabels prometheus.Labels) *cacheTracker { + return &cacheTracker{metrics: metrics, labels: defaultLabels} } -// updateMemSize updates the memSize level by b. -func (c *Cache) updateMemSize(b int64) { - atomic.AddInt64(&c.stats.MemSizeBytes, b) +// Labels returns a copy of the default labels used by the tracker's metrics. +// The returned map is safe for modification. +func (t *cacheTracker) Labels() prometheus.Labels { + labels := make(prometheus.Labels, len(t.labels)) + for k, v := range t.labels { + labels[k] = v + } + return labels +} + +// AddMemBytes increases the number of in-memory cache bytes. +func (t *cacheTracker) AddMemBytes(bytes uint64) { + atomic.AddUint64(&t.memSizeBytes, bytes) + + labels := t.labels + t.metrics.MemSize.With(labels).Add(float64(bytes)) +} + +// SubMemBytes decreases the number of in-memory cache bytes. +func (t *cacheTracker) SubMemBytes(bytes uint64) { + atomic.AddUint64(&t.memSizeBytes, ^(bytes - 1)) + + labels := t.labels + t.metrics.MemSize.With(labels).Sub(float64(bytes)) +} + +// SetMemBytes sets the number of in-memory cache bytes. +func (t *cacheTracker) SetMemBytes(bytes uint64) { + atomic.StoreUint64(&t.memSizeBytes, bytes) + + labels := t.labels + t.metrics.MemSize.With(labels).Set(float64(bytes)) +} + +// AddBytesWritten increases the number of bytes written to the cache. +func (t *cacheTracker) AddBytesWritten(bytes uint64) { + labels := t.labels + t.metrics.MemSize.With(labels).Add(float64(bytes)) +} + +// AddSnapshottedBytes increases the number of bytes snapshotted. +func (t *cacheTracker) AddSnapshottedBytes(bytes uint64) { + atomic.AddUint64(&t.snapshottedBytes, bytes) + + labels := t.labels + t.metrics.SnapshottedBytes.With(labels).Add(float64(bytes)) +} + +// SetDiskBytes sets the number of bytes on disk used by snapshot data. +func (t *cacheTracker) SetDiskBytes(bytes uint64) { + labels := t.labels + t.metrics.DiskSize.With(labels).Set(float64(bytes)) +} + +// IncSnapshotsActive increases the number of active snapshots. +func (t *cacheTracker) IncSnapshotsActive() { + atomic.AddUint64(&t.snapshotsActive, 1) + + labels := t.labels + t.metrics.SnapshotsActive.With(labels).Inc() +} + +// SetSnapshotsActive sets the number of bytes on disk used by snapshot data. +func (t *cacheTracker) SetSnapshotsActive(n uint64) { + atomic.StoreUint64(&t.snapshotsActive, n) + + labels := t.labels + t.metrics.SnapshotsActive.With(labels).Set(float64(n)) +} + +// AddWrittenBytes increases the number of bytes written to the cache, with a required status. +func (t *cacheTracker) AddWrittenBytes(status string, bytes uint64) { + labels := t.Labels() + labels["status"] = status + t.metrics.WrittenBytes.With(labels).Add(float64(bytes)) +} + +// AddWrittenBytesOK increments the number of successful writes. +func (t *cacheTracker) AddWrittenBytesOK(bytes uint64) { t.AddWrittenBytes("ok", bytes) } + +// AddWrittenBytesError increments the number of writes that encountered an error. +func (t *cacheTracker) AddWrittenBytesErr(bytes uint64) { t.AddWrittenBytes("error", bytes) } + +// AddWrittenBytesDrop increments the number of writes that were dropped. +func (t *cacheTracker) AddWrittenBytesDrop(bytes uint64) { t.AddWrittenBytes("dropped", bytes) } + +// IncWrites increments the number of writes to the cache, with a required status. +func (t *cacheTracker) IncWrites(status string) { + labels := t.Labels() + labels["status"] = status + t.metrics.Writes.With(labels).Inc() +} + +// IncWritesOK increments the number of successful writes. +func (t *cacheTracker) IncWritesOK() { t.IncWrites("ok") } + +// IncWritesError increments the number of writes that encountered an error. +func (t *cacheTracker) IncWritesErr() { + atomic.AddUint64(&t.writesErr, 1) + + t.IncWrites("error") +} + +// IncWritesDrop increments the number of writes that were dropped. +func (t *cacheTracker) IncWritesDrop() { + atomic.AddUint64(&t.writesDropped, 1) + + t.IncWrites("dropped") +} + +// CacheSize returns the live cache size. +func (t *cacheTracker) CacheSize() uint64 { return atomic.LoadUint64(&t.cacheSize) } + +// IncCacheSize increases the live cache size by sz bytes. +func (t *cacheTracker) IncCacheSize(sz uint64) { atomic.AddUint64(&t.cacheSize, sz) } + +// DecCacheSize decreases the live cache size by sz bytes. +func (t *cacheTracker) DecCacheSize(sz uint64) { atomic.AddUint64(&t.cacheSize, ^(sz - 1)) } + +// SetCacheSize sets the live cache size to sz. +func (t *cacheTracker) SetCacheSize(sz uint64) { atomic.StoreUint64(&t.cacheSize, sz) } + +// SetSnapshotSize sets the last successful snapshot size. +func (t *cacheTracker) SetSnapshotSize(sz uint64) { atomic.StoreUint64(&t.snapshotSize, sz) } + +// SnapshotSize returns the last successful snapshot size. +func (t *cacheTracker) SnapshotSize() uint64 { return atomic.LoadUint64(&t.snapshotSize) } + +// SetAge sets the time since the last successful snapshot +func (t *cacheTracker) SetAge(d time.Duration) { + labels := t.Labels() + t.metrics.Age.With(labels).Set(d.Seconds()) } func valueType(v Value) byte { @@ -811,13 +898,6 @@ func valueType(v Value) byte { } } -// updateSnapshots updates the snapshotsCount and the diskSize levels. -func (c *Cache) updateSnapshots() { - // Update disk stats - atomic.StoreInt64(&c.stats.DiskSizeBytes, int64(atomic.LoadUint64(&c.snapshotSize))) - atomic.StoreInt64(&c.stats.SnapshotCount, int64(c.snapshotAttempts)) -} - type emptyStore struct{} func (e emptyStore) entry(key []byte) *entry { return nil } diff --git a/tsdb/tsm1/cache_test.go b/tsdb/tsm1/cache_test.go index 0f0dff9673..a5f107529c 100644 --- a/tsdb/tsm1/cache_test.go +++ b/tsdb/tsm1/cache_test.go @@ -138,9 +138,9 @@ func TestCache_WriteMulti_Stats(t *testing.T) { } // Write stats updated - if got, exp := c.stats.WriteDropped, int64(1); got != exp { + if got, exp := atomic.LoadUint64(&c.tracker.writesDropped), uint64(1); got != exp { t.Fatalf("got %v, expected %v", got, exp) - } else if got, exp := c.stats.WriteErr, int64(1); got != exp { + } else if got, exp := atomic.LoadUint64(&c.tracker.writesErr), uint64(1); got != exp { t.Fatalf("got %v, expected %v", got, exp) } } @@ -190,11 +190,11 @@ func TestCache_Cache_DeleteRange(t *testing.T) { c.DeleteRange([][]byte{[]byte("bar")}, 2, math.MaxInt64) if exp, keys := [][]byte{[]byte("bar"), []byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) + t.Fatalf("cache keys incorrect after delete, exp %v, got %v", exp, keys) } if got, exp := c.Size(), valuesSize+uint64(v0.Size())+6; exp != got { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", exp, got) + t.Fatalf("cache size incorrect after delete, exp %d, got %d", exp, got) } if got, exp := len(c.Values([]byte("bar"))), 1; got != exp { @@ -479,7 +479,7 @@ func TestCache_Snapshot_Stats(t *testing.T) { t.Fatal(err) } - if got, exp := c.stats.MemSizeBytes, int64(16)+3; got != exp { + if got, exp := atomic.LoadUint64(&c.tracker.memSizeBytes), uint64(16)+3; got != exp { t.Fatalf("got %v, expected %v", got, exp) } @@ -494,11 +494,11 @@ func TestCache_Snapshot_Stats(t *testing.T) { } // Cached bytes should have been increased. - if got, exp := c.stats.CachedBytes, int64(16)+3; got != exp { + if got, exp := atomic.LoadUint64(&c.tracker.snapshottedBytes), uint64(16)+3; got != exp { t.Fatalf("got %v, expected %v", got, exp) } - if got, exp := c.stats.MemSizeBytes, int64(16)+3; got != exp { + if got, exp := atomic.LoadUint64(&c.tracker.memSizeBytes), uint64(16)+3; got != exp { t.Fatalf("got %v, expected %v", got, exp) } } diff --git a/tsdb/tsm1/engine.go b/tsdb/tsm1/engine.go index 87e1280de1..cef63dd471 100644 --- a/tsdb/tsm1/engine.go +++ b/tsdb/tsm1/engine.go @@ -6,6 +6,7 @@ import ( "context" "errors" "fmt" + "io" "io/ioutil" "math" "os" @@ -26,6 +27,7 @@ import ( "github.com/influxdata/platform/query" "github.com/influxdata/platform/tsdb" "github.com/influxdata/platform/tsdb/tsi1" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -35,7 +37,8 @@ import ( //go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@compact.gen.go.tmpldata compact.gen.go.tmpl //go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@reader.gen.go.tmpldata reader.gen.go.tmpl -var ( // Static objects to prevent small allocs. +var ( + // Static objects to prevent small allocs. keyFieldSeparatorBytes = []byte(keyFieldSeparator) emptyBytes = []byte{} ) @@ -70,44 +73,6 @@ const ( MaxPointsPerBlock = 1000 ) -// Statistics gathered by the engine. -const ( - statCacheCompactions = "cacheCompactions" - statCacheCompactionsActive = "cacheCompactionsActive" - statCacheCompactionError = "cacheCompactionErr" - statCacheCompactionDuration = "cacheCompactionDuration" - - statTSMLevel1Compactions = "tsmLevel1Compactions" - statTSMLevel1CompactionsActive = "tsmLevel1CompactionsActive" - statTSMLevel1CompactionError = "tsmLevel1CompactionErr" - statTSMLevel1CompactionDuration = "tsmLevel1CompactionDuration" - statTSMLevel1CompactionQueue = "tsmLevel1CompactionQueue" - - statTSMLevel2Compactions = "tsmLevel2Compactions" - statTSMLevel2CompactionsActive = "tsmLevel2CompactionsActive" - statTSMLevel2CompactionError = "tsmLevel2CompactionErr" - statTSMLevel2CompactionDuration = "tsmLevel2CompactionDuration" - statTSMLevel2CompactionQueue = "tsmLevel2CompactionQueue" - - statTSMLevel3Compactions = "tsmLevel3Compactions" - statTSMLevel3CompactionsActive = "tsmLevel3CompactionsActive" - statTSMLevel3CompactionError = "tsmLevel3CompactionErr" - statTSMLevel3CompactionDuration = "tsmLevel3CompactionDuration" - statTSMLevel3CompactionQueue = "tsmLevel3CompactionQueue" - - statTSMOptimizeCompactions = "tsmOptimizeCompactions" - statTSMOptimizeCompactionsActive = "tsmOptimizeCompactionsActive" - statTSMOptimizeCompactionError = "tsmOptimizeCompactionErr" - statTSMOptimizeCompactionDuration = "tsmOptimizeCompactionDuration" - statTSMOptimizeCompactionQueue = "tsmOptimizeCompactionQueue" - - statTSMFullCompactions = "tsmFullCompactions" - statTSMFullCompactionsActive = "tsmFullCompactionsActive" - statTSMFullCompactionError = "tsmFullCompactionErr" - statTSMFullCompactionDuration = "tsmFullCompactionDuration" - statTSMFullCompactionQueue = "tsmFullCompactionQueue" -) - // An EngineOption is a functional option for changing the configuration of // an Engine. type EngineOption func(i *Engine) @@ -190,7 +155,8 @@ type Engine struct { // Controls whether to enabled compactions when the engine is open enableCompactionsOnOpen bool - stats *EngineStatistics + compactionTracker *compactionTracker // Used to track state of compactions. + defaultMetricLabels prometheus.Labels // N.B this must not be mutated after Open is called. // Limiter for concurrent compactions. compactionLimiter limiter.Fixed @@ -234,7 +200,6 @@ func NewEngine(path string, idx *tsi1.Index, config Config, options ...EngineOpt } logger := zap.NewNop() - stats := &EngineStatistics{} e := &Engine{ path: path, index: idx, @@ -254,9 +219,8 @@ func NewEngine(path string, idx *tsi1.Index, config Config, options ...EngineOpt CacheFlushWriteColdDuration: time.Duration(config.Cache.SnapshotWriteColdDuration), enableCompactionsOnOpen: true, formatFileName: DefaultFormatFileName, - stats: stats, compactionLimiter: limiter.NewFixed(maxCompactions), - scheduler: newScheduler(stats, maxCompactions), + scheduler: newScheduler(maxCompactions), } for _, option := range options { @@ -285,6 +249,12 @@ func (e *Engine) WithCompactionPlanner(planner CompactionPlanner) { e.CompactionPlan = planner } +// SetDefaultMetricLabels sets the default labels for metrics on the engine. +// It must be called before the Engine is opened. +func (e *Engine) SetDefaultMetricLabels(labels prometheus.Labels) { + e.defaultMetricLabels = labels +} + // SetEnabled sets whether the engine is enabled. func (e *Engine) SetEnabled(enabled bool) { e.enableCompactionsOnOpen = enabled @@ -522,89 +492,37 @@ func (e *Engine) MeasurementStats() (MeasurementStats, error) { return e.FileStore.MeasurementStats() } -// EngineStatistics maintains statistics for the engine. -type EngineStatistics struct { - CacheCompactions int64 // Counter of cache compactions that have ever run. - CacheCompactionsActive int64 // Gauge of cache compactions currently running. - CacheCompactionErrors int64 // Counter of cache compactions that have failed due to error. - CacheCompactionDuration int64 // Counter of number of wall nanoseconds spent in cache compactions. - - TSMCompactions [3]int64 // Counter of TSM compactions (by level) that have ever run. - TSMCompactionsActive [3]int64 // Gauge of TSM compactions (by level) currently running. - TSMCompactionErrors [3]int64 // Counter of TSM compcations (by level) that have failed due to error. - TSMCompactionDuration [3]int64 // Counter of number of wall nanoseconds spent in TSM compactions (by level). - TSMCompactionsQueue [3]int64 // Gauge of TSM compactions queues (by level). - - TSMOptimizeCompactions int64 // Counter of optimize compactions that have ever run. - TSMOptimizeCompactionsActive int64 // Gauge of optimize compactions currently running. - TSMOptimizeCompactionErrors int64 // Counter of optimize compactions that have failed due to error. - TSMOptimizeCompactionDuration int64 // Counter of number of wall nanoseconds spent in optimize compactions. - TSMOptimizeCompactionsQueue int64 // Gauge of optimize compactions queue. - - TSMFullCompactions int64 // Counter of full compactions that have ever run. - TSMFullCompactionsActive int64 // Gauge of full compactions currently running. - TSMFullCompactionErrors int64 // Counter of full compactions that have failed due to error. - TSMFullCompactionDuration int64 // Counter of number of wall nanoseconds spent in full compactions. - TSMFullCompactionsQueue int64 // Gauge of full compactions queue. -} - -// Statistics returns statistics for periodic monitoring. -func (e *Engine) Statistics(tags map[string]string) []models.Statistic { - statistics := make([]models.Statistic, 0, 4) - statistics = append(statistics, models.Statistic{ - Name: "tsm1_engine", - Tags: tags, - Values: map[string]interface{}{ - statCacheCompactions: atomic.LoadInt64(&e.stats.CacheCompactions), - statCacheCompactionsActive: atomic.LoadInt64(&e.stats.CacheCompactionsActive), - statCacheCompactionError: atomic.LoadInt64(&e.stats.CacheCompactionErrors), - statCacheCompactionDuration: atomic.LoadInt64(&e.stats.CacheCompactionDuration), - - statTSMLevel1Compactions: atomic.LoadInt64(&e.stats.TSMCompactions[0]), - statTSMLevel1CompactionsActive: atomic.LoadInt64(&e.stats.TSMCompactionsActive[0]), - statTSMLevel1CompactionError: atomic.LoadInt64(&e.stats.TSMCompactionErrors[0]), - statTSMLevel1CompactionDuration: atomic.LoadInt64(&e.stats.TSMCompactionDuration[0]), - statTSMLevel1CompactionQueue: atomic.LoadInt64(&e.stats.TSMCompactionsQueue[0]), - - statTSMLevel2Compactions: atomic.LoadInt64(&e.stats.TSMCompactions[1]), - statTSMLevel2CompactionsActive: atomic.LoadInt64(&e.stats.TSMCompactionsActive[1]), - statTSMLevel2CompactionError: atomic.LoadInt64(&e.stats.TSMCompactionErrors[1]), - statTSMLevel2CompactionDuration: atomic.LoadInt64(&e.stats.TSMCompactionDuration[1]), - statTSMLevel2CompactionQueue: atomic.LoadInt64(&e.stats.TSMCompactionsQueue[1]), - - statTSMLevel3Compactions: atomic.LoadInt64(&e.stats.TSMCompactions[2]), - statTSMLevel3CompactionsActive: atomic.LoadInt64(&e.stats.TSMCompactionsActive[2]), - statTSMLevel3CompactionError: atomic.LoadInt64(&e.stats.TSMCompactionErrors[2]), - statTSMLevel3CompactionDuration: atomic.LoadInt64(&e.stats.TSMCompactionDuration[2]), - statTSMLevel3CompactionQueue: atomic.LoadInt64(&e.stats.TSMCompactionsQueue[2]), - - statTSMOptimizeCompactions: atomic.LoadInt64(&e.stats.TSMOptimizeCompactions), - statTSMOptimizeCompactionsActive: atomic.LoadInt64(&e.stats.TSMOptimizeCompactionsActive), - statTSMOptimizeCompactionError: atomic.LoadInt64(&e.stats.TSMOptimizeCompactionErrors), - statTSMOptimizeCompactionDuration: atomic.LoadInt64(&e.stats.TSMOptimizeCompactionDuration), - statTSMOptimizeCompactionQueue: atomic.LoadInt64(&e.stats.TSMOptimizeCompactionsQueue), - - statTSMFullCompactions: atomic.LoadInt64(&e.stats.TSMFullCompactions), - statTSMFullCompactionsActive: atomic.LoadInt64(&e.stats.TSMFullCompactionsActive), - statTSMFullCompactionError: atomic.LoadInt64(&e.stats.TSMFullCompactionErrors), - statTSMFullCompactionDuration: atomic.LoadInt64(&e.stats.TSMFullCompactionDuration), - statTSMFullCompactionQueue: atomic.LoadInt64(&e.stats.TSMFullCompactionsQueue), - }, - }) - - statistics = append(statistics, e.Cache.Statistics(tags)...) - statistics = append(statistics, e.FileStore.Statistics(tags)...) - return statistics -} - // DiskSize returns the total size in bytes of all TSM and WAL segments on disk. func (e *Engine) DiskSize() int64 { walDiskSizeBytes := e.WAL.DiskSizeBytes() return e.FileStore.DiskSizeBytes() + walDiskSizeBytes } +func (e *Engine) initTrackers() { + mmu.Lock() + defer mmu.Unlock() + + if bms == nil { + // Initialise metrics if an engine has not done so already. + bms = newBlockMetrics(e.defaultMetricLabels) + } + + // Propagate prometheus metrics down into trackers. + e.compactionTracker = newCompactionTracker(bms.compactionMetrics, e.defaultMetricLabels) + e.FileStore.tracker = newFileTracker(bms.fileMetrics, e.defaultMetricLabels) + e.Cache.tracker = newCacheTracker(bms.cacheMetrics, e.defaultMetricLabels) + + // Set default metrics on WAL if enabled. + if wal, ok := e.WAL.(*WAL); ok { + wal.tracker = newWALTracker(bms.walMetrics, e.defaultMetricLabels) + } + e.scheduler.setCompactionTracker(e.compactionTracker) +} + // Open opens and initializes the engine. func (e *Engine) Open() error { + e.initTrackers() + if err := os.MkdirAll(e.path, 0777); err != nil { return err } @@ -668,15 +586,7 @@ func (e *Engine) WithLogger(log *zap.Logger) { // shard is fully compacted. func (e *Engine) IsIdle() bool { cacheEmpty := e.Cache.Size() == 0 - - runningCompactions := atomic.LoadInt64(&e.stats.CacheCompactionsActive) - runningCompactions += atomic.LoadInt64(&e.stats.TSMCompactionsActive[0]) - runningCompactions += atomic.LoadInt64(&e.stats.TSMCompactionsActive[1]) - runningCompactions += atomic.LoadInt64(&e.stats.TSMCompactionsActive[2]) - runningCompactions += atomic.LoadInt64(&e.stats.TSMFullCompactionsActive) - runningCompactions += atomic.LoadInt64(&e.stats.TSMOptimizeCompactionsActive) - - return cacheEmpty && runningCompactions == 0 && e.CompactionPlan.FullyCompacted() + return cacheEmpty && e.compactionTracker.AllActive() == 0 && e.CompactionPlan.FullyCompacted() } // Free releases any resources held by the engine to free up memory or CPU. @@ -1106,6 +1016,163 @@ func (e *Engine) CreateSeriesListIfNotExists(collection *tsdb.SeriesCollection) return e.index.CreateSeriesListIfNotExists(collection) } +// WriteTo is not implemented. +func (e *Engine) WriteTo(w io.Writer) (n int64, err error) { panic("not implemented") } + +// compactionLevel describes a snapshot or levelled compaction. +type compactionLevel int + +func (l compactionLevel) String() string { + switch l { + case 0: + return "snapshot" + case 1, 2, 3: + return fmt.Sprint(int(l)) + case 4: + return "optimize" + case 5: + return "full" + default: + panic("unsupported compaction level") + } +} + +// compactionTracker tracks compactions and snapshots within the Engine. +// +// As well as being responsible for providing atomic reads and writes to the +// statistics tracking the various compaction operations, compactionTracker also +// mirrors any writes to the prometheus block metrics, which the Engine exposes. +// +// *NOTE* - compactionTracker fields should not be directory modified. Doing so +// could result in the Engine exposing inaccurate metrics. +type compactionTracker struct { + metrics *compactionMetrics + labels prometheus.Labels + // Note: Compactions are levelled as follows: + // 0 – Snapshots + // 1-3 – Levelled compactions + // 4 – Optimize compactions + // 5 – Full compactions + + ok [6]uint64 // Counter of TSM compactions (by level) that have successfully completed. + active [6]uint64 // Gauge of TSM compactions (by level) currently running. + errors [6]uint64 // Counter of TSM compcations (by level) that have failed due to error. + queue [6]uint64 // Gauge of TSM compactions queues (by level). +} + +func newCompactionTracker(metrics *compactionMetrics, defaultLables prometheus.Labels) *compactionTracker { + return &compactionTracker{metrics: metrics, labels: defaultLables} +} + +// Labels returns a copy of the default labels used by the tracker's metrics. +// The returned map is safe for modification. +func (t *compactionTracker) Labels(level compactionLevel) prometheus.Labels { + labels := make(prometheus.Labels, len(t.labels)) + for k, v := range t.labels { + labels[k] = v + } + + // All metrics have a level label. + labels["level"] = fmt.Sprint(level) + return labels +} + +// Completed returns the total number of compactions for the provided level. +func (t *compactionTracker) Completed(level int) uint64 { return atomic.LoadUint64(&t.ok[level]) } + +// Active returns the number of active snapshots (level 0), +// level 1, 2 or 3 compactions, optimize compactions (level 4), or full +// compactions (level 5). +func (t *compactionTracker) Active(level int) uint64 { + return atomic.LoadUint64(&t.active[level]) +} + +// AllActive returns the number of active snapshots and compactions. +func (t *compactionTracker) AllActive() uint64 { + var total uint64 + for i := 0; i < len(t.active); i++ { + total += atomic.LoadUint64(&t.active[i]) + } + return total +} + +// ActiveOptimise returns the number of active Optimise compactions. +// +// ActiveOptimise is a helper for Active(4). +func (t *compactionTracker) ActiveOptimise() uint64 { return t.Active(4) } + +// ActiveFull returns the number of active Full compactions. +// +// ActiveFull is a helper for Active(5). +func (t *compactionTracker) ActiveFull() uint64 { return t.Active(5) } + +// Errors returns the total number of errors encountered attempting compactions +// for the provided level. +func (t *compactionTracker) Errors(level int) uint64 { return atomic.LoadUint64(&t.errors[level]) } + +// IncActive increments the number of active compactions for the provided level. +func (t *compactionTracker) IncActive(level compactionLevel) { + atomic.AddUint64(&t.active[level], 1) + + labels := t.Labels(level) + t.metrics.CompactionsActive.With(labels).Inc() +} + +// IncFullActive increments the number of active Full compactions. +func (t *compactionTracker) IncFullActive() { t.IncActive(5) } + +// DecActive decrements the number of active compactions for the provided level. +func (t *compactionTracker) DecActive(level compactionLevel) { + atomic.AddUint64(&t.active[level], ^uint64(0)) + + labels := t.Labels(level) + t.metrics.CompactionsActive.With(labels).Dec() +} + +// DecFullActive decrements the number of active Full compactions. +func (t *compactionTracker) DecFullActive() { t.DecActive(5) } + +// Attempted updates the number of compactions attempted for the provided level. +func (t *compactionTracker) Attempted(level compactionLevel, success bool, duration time.Duration) { + if success { + atomic.AddUint64(&t.ok[level], 1) + + labels := t.Labels(level) + + t.metrics.CompactionDuration.With(labels).Observe(duration.Seconds()) + + labels["status"] = "ok" + t.metrics.Compactions.With(labels).Inc() + + return + } + + atomic.AddUint64(&t.errors[level], 1) + + labels := t.Labels(level) + labels["status"] = "error" + t.metrics.Compactions.With(labels).Inc() +} + +// SnapshotAttempted updates the number of snapshots attempted. +func (t *compactionTracker) SnapshotAttempted(success bool, duration time.Duration) { + t.Attempted(0, success, duration) +} + +// SetQueue sets the compaction queue depth for the provided level. +func (t *compactionTracker) SetQueue(level compactionLevel, length uint64) { + atomic.StoreUint64(&t.queue[level], length) + + labels := t.Labels(level) + t.metrics.CompactionQueue.With(labels).Set(float64(length)) +} + +// SetOptimiseQueue sets the queue depth for Optimisation compactions. +func (t *compactionTracker) SetOptimiseQueue(length uint64) { t.SetQueue(4, length) } + +// SetFullQueue sets the queue depth for Full compactions. +func (t *compactionTracker) SetFullQueue(length uint64) { t.SetQueue(5, length) } + // WriteSnapshot will snapshot the cache and write a new TSM file with its contents, releasing the snapshot when done. func (e *Engine) WriteSnapshot() error { // Lock and grab the cache snapshot along with all the closed WAL @@ -1116,7 +1183,6 @@ func (e *Engine) WriteSnapshot() error { log, logEnd := logger.NewOperation(e.logger, "Cache snapshot", "tsm1_cache_snapshot") defer func() { elapsed := time.Since(started) - e.Cache.UpdateCompactTime(elapsed) log.Info("Snapshot for path written", zap.String("path", e.path), zap.Duration("duration", elapsed)) @@ -1216,11 +1282,8 @@ func (e *Engine) compactCache() { err := e.WriteSnapshot() if err != nil && err != errCompactionsDisabled { e.logger.Info("Error writing snapshot", zap.Error(err)) - atomic.AddInt64(&e.stats.CacheCompactionErrors, 1) - } else { - atomic.AddInt64(&e.stats.CacheCompactions, 1) } - atomic.AddInt64(&e.stats.CacheCompactionDuration, time.Since(start).Nanoseconds()) + e.compactionTracker.SnapshotAttempted(err == nil || err == errCompactionsDisabled, time.Since(start)) } } } @@ -1262,18 +1325,18 @@ func (e *Engine) compact(wg *sync.WaitGroup) { level2Groups := e.CompactionPlan.PlanLevel(2) level3Groups := e.CompactionPlan.PlanLevel(3) level4Groups := e.CompactionPlan.Plan(e.FileStore.LastModified()) - atomic.StoreInt64(&e.stats.TSMOptimizeCompactionsQueue, int64(len(level4Groups))) + e.compactionTracker.SetOptimiseQueue(uint64(len(level4Groups))) // If no full compactions are need, see if an optimize is needed if len(level4Groups) == 0 { level4Groups = e.CompactionPlan.PlanOptimize() - atomic.StoreInt64(&e.stats.TSMOptimizeCompactionsQueue, int64(len(level4Groups))) + e.compactionTracker.SetOptimiseQueue(uint64(len(level4Groups))) } // Update the level plan queue stats - atomic.StoreInt64(&e.stats.TSMCompactionsQueue[0], int64(len(level1Groups))) - atomic.StoreInt64(&e.stats.TSMCompactionsQueue[1], int64(len(level2Groups))) - atomic.StoreInt64(&e.stats.TSMCompactionsQueue[2], int64(len(level3Groups))) + e.compactionTracker.SetQueue(1, uint64(len(level1Groups))) + e.compactionTracker.SetQueue(2, uint64(len(level2Groups))) + e.compactionTracker.SetQueue(3, uint64(len(level3Groups))) // Set the queue depths on the scheduler e.scheduler.setDepth(1, len(level1Groups)) @@ -1314,7 +1377,7 @@ func (e *Engine) compact(wg *sync.WaitGroup) { // compactHiPriorityLevel kicks off compactions using the high priority policy. It returns // true if the compaction was started -func (e *Engine) compactHiPriorityLevel(grp CompactionGroup, level int, fast bool, wg *sync.WaitGroup) bool { +func (e *Engine) compactHiPriorityLevel(grp CompactionGroup, level compactionLevel, fast bool, wg *sync.WaitGroup) bool { s := e.levelCompactionStrategy(grp, fast, level) if s == nil { return false @@ -1322,13 +1385,12 @@ func (e *Engine) compactHiPriorityLevel(grp CompactionGroup, level int, fast boo // Try hi priority limiter, otherwise steal a little from the low priority if we can. if e.compactionLimiter.TryTake() { - atomic.AddInt64(&e.stats.TSMCompactionsActive[level-1], 1) + e.compactionTracker.IncActive(level) wg.Add(1) go func() { defer wg.Done() - defer atomic.AddInt64(&e.stats.TSMCompactionsActive[level-1], -1) - + defer e.compactionTracker.DecActive(level) defer e.compactionLimiter.Release() s.Apply() // Release the files in the compaction plan @@ -1343,7 +1405,7 @@ func (e *Engine) compactHiPriorityLevel(grp CompactionGroup, level int, fast boo // compactLoPriorityLevel kicks off compactions using the lo priority policy. It returns // the plans that were not able to be started -func (e *Engine) compactLoPriorityLevel(grp CompactionGroup, level int, fast bool, wg *sync.WaitGroup) bool { +func (e *Engine) compactLoPriorityLevel(grp CompactionGroup, level compactionLevel, fast bool, wg *sync.WaitGroup) bool { s := e.levelCompactionStrategy(grp, fast, level) if s == nil { return false @@ -1351,11 +1413,11 @@ func (e *Engine) compactLoPriorityLevel(grp CompactionGroup, level int, fast boo // Try the lo priority limiter, otherwise steal a little from the high priority if we can. if e.compactionLimiter.TryTake() { - atomic.AddInt64(&e.stats.TSMCompactionsActive[level-1], 1) + e.compactionTracker.IncActive(level) wg.Add(1) go func() { defer wg.Done() - defer atomic.AddInt64(&e.stats.TSMCompactionsActive[level-1], -1) + defer e.compactionTracker.DecActive(level) defer e.compactionLimiter.Release() s.Apply() // Release the files in the compaction plan @@ -1376,11 +1438,11 @@ func (e *Engine) compactFull(grp CompactionGroup, wg *sync.WaitGroup) bool { // Try the lo priority limiter, otherwise steal a little from the high priority if we can. if e.compactionLimiter.TryTake() { - atomic.AddInt64(&e.stats.TSMFullCompactionsActive, 1) + e.compactionTracker.IncFullActive() wg.Add(1) go func() { defer wg.Done() - defer atomic.AddInt64(&e.stats.TSMFullCompactionsActive, -1) + defer e.compactionTracker.DecFullActive() defer e.compactionLimiter.Release() s.Apply() // Release the files in the compaction plan @@ -1396,12 +1458,9 @@ type compactionStrategy struct { group CompactionGroup fast bool - level int + level compactionLevel - durationStat *int64 - activeStat *int64 - successStat *int64 - errorStat *int64 + tracker *compactionTracker logger *zap.Logger compactor *Compactor @@ -1412,13 +1471,12 @@ type compactionStrategy struct { // Apply concurrently compacts all the groups in a compaction strategy. func (s *compactionStrategy) Apply() { - start := time.Now() s.compactGroup() - atomic.AddInt64(s.durationStat, time.Since(start).Nanoseconds()) } // compactGroup executes the compaction strategy against a single CompactionGroup. func (s *compactionStrategy) compactGroup() { + now := time.Now() group := s.group log, logEnd := logger.NewOperation(s.logger, "TSM compaction", "tsm1_compact_group") defer logEnd() @@ -1451,14 +1509,14 @@ func (s *compactionStrategy) compactGroup() { } log.Info("Error compacting TSM files", zap.Error(err)) - atomic.AddInt64(s.errorStat, 1) + s.tracker.Attempted(s.level, false, 0) time.Sleep(time.Second) return } if err := s.fileStore.ReplaceWithCallback(group, files, nil); err != nil { log.Info("Error replacing new TSM files", zap.Error(err)) - atomic.AddInt64(s.errorStat, 1) + s.tracker.Attempted(s.level, false, 0) time.Sleep(time.Second) return } @@ -1466,27 +1524,22 @@ func (s *compactionStrategy) compactGroup() { for i, f := range files { log.Info("Compacted file", zap.Int("tsm1_index", i), zap.String("tsm1_file", f)) } - log.Info("Finished compacting files", - zap.Int("tsm1_files_n", len(files))) - atomic.AddInt64(s.successStat, 1) + log.Info("Finished compacting files", zap.Int("tsm1_files_n", len(files))) + s.tracker.Attempted(s.level, true, time.Since(now)) } // levelCompactionStrategy returns a compactionStrategy for the given level. // It returns nil if there are no TSM files to compact. -func (e *Engine) levelCompactionStrategy(group CompactionGroup, fast bool, level int) *compactionStrategy { +func (e *Engine) levelCompactionStrategy(group CompactionGroup, fast bool, level compactionLevel) *compactionStrategy { return &compactionStrategy{ group: group, - logger: e.logger.With(zap.Int("tsm1_level", level), zap.String("tsm1_strategy", "level")), + logger: e.logger.With(zap.Int("tsm1_level", int(level)), zap.String("tsm1_strategy", "level")), fileStore: e.FileStore, compactor: e.Compactor, fast: fast, engine: e, level: level, - - activeStat: &e.stats.TSMCompactionsActive[level-1], - successStat: &e.stats.TSMCompactions[level-1], - errorStat: &e.stats.TSMCompactionErrors[level-1], - durationStat: &e.stats.TSMCompactionDuration[level-1], + tracker: e.compactionTracker, } } @@ -1500,21 +1553,12 @@ func (e *Engine) fullCompactionStrategy(group CompactionGroup, optimize bool) *c compactor: e.Compactor, fast: optimize, engine: e, - level: 4, + level: 5, } if optimize { - s.activeStat = &e.stats.TSMOptimizeCompactionsActive - s.successStat = &e.stats.TSMOptimizeCompactions - s.errorStat = &e.stats.TSMOptimizeCompactionErrors - s.durationStat = &e.stats.TSMOptimizeCompactionDuration - } else { - s.activeStat = &e.stats.TSMFullCompactionsActive - s.successStat = &e.stats.TSMFullCompactions - s.errorStat = &e.stats.TSMFullCompactionErrors - s.durationStat = &e.stats.TSMFullCompactionDuration + s.level = 4 } - return s } diff --git a/tsdb/tsm1/file_store.go b/tsdb/tsm1/file_store.go index 8934949731..eb81c93ab8 100644 --- a/tsdb/tsm1/file_store.go +++ b/tsdb/tsm1/file_store.go @@ -17,12 +17,12 @@ import ( "sync/atomic" "time" - "github.com/influxdata/platform/models" "github.com/influxdata/platform/pkg/file" "github.com/influxdata/platform/pkg/limiter" "github.com/influxdata/platform/pkg/metrics" "github.com/influxdata/platform/query" "github.com/influxdata/platform/tsdb" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -160,12 +160,6 @@ type FileStoreObserver interface { FileUnlinking(path string) error } -// Statistics gathered by the FileStore. -const ( - statFileStoreBytes = "diskBytes" - statFileStoreCount = "numFiles" -) - var ( floatBlocksDecodedCounter = metrics.MustRegisterCounter("float_blocks_decoded", metrics.WithGroup(tsmGroup)) floatBlocksSizeCounter = metrics.MustRegisterCounter("float_blocks_size_bytes", metrics.WithGroup(tsmGroup)) @@ -198,8 +192,8 @@ type FileStore struct { traceLogger *zap.Logger // Logger to be used when trace-logging is on. traceLogging bool - stats *FileStoreStatistics - purger *purger + tracker *fileTracker + purger *purger currentTempDirID int @@ -242,13 +236,13 @@ func NewFileStore(dir string) *FileStore { logger: logger, traceLogger: logger, openLimiter: limiter.NewFixed(runtime.GOMAXPROCS(0)), - stats: &FileStoreStatistics{}, purger: &purger{ files: map[string]TSMFile{}, logger: logger, }, obs: noFileStoreObserver{}, parseFileName: DefaultParseFileName, + tracker: newFileTracker(newFileMetrics(nil), nil), } fs.purger.fileStore = fs return fs @@ -290,20 +284,58 @@ func (f *FileStore) WithLogger(log *zap.Logger) { // FileStoreStatistics keeps statistics about the file store. type FileStoreStatistics struct { - DiskBytes int64 - FileCount int64 + SDiskBytes int64 + SFileCount int64 } -// Statistics returns statistics for periodic monitoring. -func (f *FileStore) Statistics(tags map[string]string) []models.Statistic { - return []models.Statistic{{ - Name: "tsm1_filestore", - Tags: tags, - Values: map[string]interface{}{ - statFileStoreBytes: atomic.LoadInt64(&f.stats.DiskBytes), - statFileStoreCount: atomic.LoadInt64(&f.stats.FileCount), - }, - }} +// fileTracker tracks file counts and sizes within the FileStore. +// +// As well as being responsible for providing atomic reads and writes to the +// statistics, fileTracker also mirrors any changes to the external prometheus +// metrics, which the Engine exposes. +// +// *NOTE* - fileTracker fields should not be directory modified. Doing so +// could result in the Engine exposing inaccurate metrics. +type fileTracker struct { + metrics *fileMetrics + labels prometheus.Labels + diskBytes uint64 + fileCount uint64 +} + +func newFileTracker(metrics *fileMetrics, defaultLabels prometheus.Labels) *fileTracker { + return &fileTracker{metrics: metrics, labels: defaultLabels} +} + +func (t *fileTracker) Labels() prometheus.Labels { + return t.labels +} + +// Bytes returns the number of bytes in use on disk. +func (t *fileTracker) Bytes() uint64 { return atomic.LoadUint64(&t.diskBytes) } + +// SetBytes sets the number of bytes in use on disk. +func (t *fileTracker) SetBytes(bytes uint64) { + atomic.StoreUint64(&t.diskBytes, bytes) + + labels := t.Labels() + t.metrics.DiskSize.With(labels).Set(float64(bytes)) +} + +// AddBytes increases the number of bytes. +func (t *fileTracker) AddBytes(bytes uint64) { + atomic.AddUint64(&t.diskBytes, bytes) + + labels := t.Labels() + t.metrics.DiskSize.With(labels).Add(float64(bytes)) +} + +// SetFileCount sets the number of files in the FileStore. +func (t *fileTracker) SetFileCount(files uint64) { + atomic.StoreUint64(&t.fileCount, files) + + labels := t.Labels() + t.metrics.Files.With(labels).Set(float64(files)) } // Count returns the number of TSM files currently loaded. @@ -581,10 +613,11 @@ func (f *FileStore) Open() error { f.files = append(f.files, res.r) // Accumulate file store size stats - atomic.AddInt64(&f.stats.DiskBytes, int64(res.r.Size())) + totalSize := uint64(res.r.Size()) for _, ts := range res.r.TombstoneFiles() { - atomic.AddInt64(&f.stats.DiskBytes, int64(ts.Size)) + totalSize += uint64(ts.Size) } + f.tracker.AddBytes(totalSize) // Re-initialize the lastModified time for the file store if res.r.LastModified() > lm { @@ -596,7 +629,7 @@ func (f *FileStore) Open() error { close(readerC) sort.Sort(tsmReaders(f.files)) - atomic.StoreInt64(&f.stats.FileCount, int64(len(f.files))) + f.tracker.SetFileCount(uint64(len(f.files))) return nil } @@ -609,7 +642,7 @@ func (f *FileStore) Close() error { f.lastFileStats = nil f.files = nil - atomic.StoreInt64(&f.stats.FileCount, 0) + f.tracker.SetFileCount(uint64(0)) // Let other methods access this closed object while we do the actual closing. f.mu.Unlock() @@ -624,9 +657,8 @@ func (f *FileStore) Close() error { return nil } -func (f *FileStore) DiskSizeBytes() int64 { - return atomic.LoadInt64(&f.stats.DiskBytes) -} +// DiskSizeBytes returns the total number of bytes consumed by the files in the FileStore. +func (f *FileStore) DiskSizeBytes() int64 { return int64(f.tracker.Bytes()) } // Read returns the slice of values for the given key and the given timestamp, // if any file matches those constraints. @@ -878,18 +910,18 @@ func (f *FileStore) replace(oldFiles, newFiles []string, updatedFn func(r []TSMF f.lastFileStats = nil f.files = active sort.Sort(tsmReaders(f.files)) - atomic.StoreInt64(&f.stats.FileCount, int64(len(f.files))) + f.tracker.SetFileCount(uint64(len(f.files))) // Recalculate the disk size stat - var totalSize int64 + var totalSize uint64 for _, file := range f.files { - totalSize += int64(file.Size()) + totalSize += uint64(file.Size()) for _, ts := range file.TombstoneFiles() { - totalSize += int64(ts.Size) + totalSize += uint64(ts.Size) } } - atomic.StoreInt64(&f.stats.DiskBytes, totalSize) + f.tracker.SetBytes(totalSize) return nil } diff --git a/tsdb/tsm1/float.go b/tsdb/tsm1/float.go index bf1e65447d..ad8f43b7d5 100644 --- a/tsdb/tsm1/float.go +++ b/tsdb/tsm1/float.go @@ -15,7 +15,7 @@ import ( "math" "math/bits" - "github.com/dgryski/go-bitstream" + bitstream "github.com/dgryski/go-bitstream" ) // Note: an uncompressed format is not yet implemented. diff --git a/tsdb/tsm1/metrics.go b/tsdb/tsm1/metrics.go new file mode 100644 index 0000000000..0d2299ffd6 --- /dev/null +++ b/tsdb/tsm1/metrics.go @@ -0,0 +1,308 @@ +package tsm1 + +import ( + "sort" + "sync" + + "github.com/prometheus/client_golang/prometheus" +) + +// The following package variables act as singletons, to be shared by all Engine +// instantiations. This allows multiple Engines to be instantiated within the +// same process. +var ( + bms *blockMetrics + mmu sync.RWMutex +) + +// PrometheusCollectors returns all prometheus metrics for the tsm1 package. +func PrometheusCollectors() []prometheus.Collector { + mmu.RLock() + defer mmu.RUnlock() + + var collectors []prometheus.Collector + if bms != nil { + collectors = append(collectors, bms.compactionMetrics.PrometheusCollectors()...) + collectors = append(collectors, bms.fileMetrics.PrometheusCollectors()...) + collectors = append(collectors, bms.cacheMetrics.PrometheusCollectors()...) + collectors = append(collectors, bms.walMetrics.PrometheusCollectors()...) + } + return collectors +} + +// namespace is the leading part of all published metrics for the Storage service. +const namespace = "storage" + +const compactionSubsystem = "compactions" // sub-system associated with metrics for compactions. +const fileStoreSubsystem = "tsm_files" // sub-system associated with metrics for TSM files. +const cacheSubsystem = "cache" // sub-system associated with metrics for the cache. +const walSubsystem = "wal" // sub-system associated with metrics for the WAL. + +// blockMetrics are a set of metrics concerned with tracking data about block storage. +type blockMetrics struct { + labels prometheus.Labels + *compactionMetrics + *fileMetrics + *cacheMetrics + *walMetrics +} + +// newBlockMetrics initialises the prometheus metrics for the block subsystem. +func newBlockMetrics(labels prometheus.Labels) *blockMetrics { + return &blockMetrics{ + labels: labels, + compactionMetrics: newCompactionMetrics(labels), + fileMetrics: newFileMetrics(labels), + cacheMetrics: newCacheMetrics(labels), + walMetrics: newWALMetrics(labels), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *blockMetrics) PrometheusCollectors() []prometheus.Collector { + var metrics []prometheus.Collector + metrics = append(metrics, m.compactionMetrics.PrometheusCollectors()...) + metrics = append(metrics, m.fileMetrics.PrometheusCollectors()...) + metrics = append(metrics, m.cacheMetrics.PrometheusCollectors()...) + metrics = append(metrics, m.walMetrics.PrometheusCollectors()...) + return metrics +} + +// compactionMetrics are a set of metrics concerned with tracking data about compactions. +type compactionMetrics struct { + CompactionsActive *prometheus.GaugeVec + CompactionDuration *prometheus.HistogramVec + CompactionQueue *prometheus.GaugeVec + + // The following metrics include a ``"status" = {ok, error}` label + Compactions *prometheus.CounterVec +} + +// newCompactionMetrics initialises the prometheus metrics for compactions. +func newCompactionMetrics(labels prometheus.Labels) *compactionMetrics { + names := []string{"level"} // All compaction metrics have a `level` label. + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + totalCompactionsNames := append(append([]string(nil), names...), "status") + sort.Strings(totalCompactionsNames) + + return &compactionMetrics{ + Compactions: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: compactionSubsystem, + Name: "total", + Help: "Number of times cache snapshotted or TSM compaction attempted.", + }, totalCompactionsNames), + CompactionsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: compactionSubsystem, + Name: "active", + Help: "Number of active compactions.", + }, names), + CompactionDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: compactionSubsystem, + Name: "duration_seconds", + Help: "Time taken for a successful compaction or snapshot.", + // 30 buckets spaced exponentially between 5s and ~53 minutes. + Buckets: prometheus.ExponentialBuckets(5.0, 1.25, 30), + }, names), + CompactionQueue: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: compactionSubsystem, + Name: "queued", + Help: "Number of queued compactions.", + }, names), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *compactionMetrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.Compactions, + m.CompactionsActive, + m.CompactionDuration, + m.CompactionQueue, + } +} + +// fileMetrics are a set of metrics concerned with tracking data about compactions. +type fileMetrics struct { + DiskSize *prometheus.GaugeVec + Files *prometheus.GaugeVec +} + +// newFileMetrics initialises the prometheus metrics for tracking files on disk. +func newFileMetrics(labels prometheus.Labels) *fileMetrics { + var names []string + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + return &fileMetrics{ + DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: fileStoreSubsystem, + Name: "disk_bytes", + Help: "Number of bytes TSM files using on disk.", + }, names), + Files: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: fileStoreSubsystem, + Name: "total", + Help: "Number of files.", + }, names), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *fileMetrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.DiskSize, + m.Files, + } +} + +// cacheMetrics are a set of metrics concerned with tracking data about the TSM Cache. +type cacheMetrics struct { + MemSize *prometheus.GaugeVec + DiskSize *prometheus.GaugeVec + SnapshotsActive *prometheus.GaugeVec + Age *prometheus.GaugeVec + SnapshottedBytes *prometheus.CounterVec + + // The following metrics include a ``"status" = {ok, error, dropped}` label + WrittenBytes *prometheus.CounterVec + Writes *prometheus.CounterVec +} + +// newCacheMetrics initialises the prometheus metrics for compactions. +func newCacheMetrics(labels prometheus.Labels) *cacheMetrics { + var names []string + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + writeNames := append(append([]string(nil), names...), "status") + sort.Strings(writeNames) + + return &cacheMetrics{ + MemSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "inuse_bytes", + Help: "In-memory size of cache.", + }, names), + DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "disk_bytes", + Help: "Number of bytes on disk used by snapshot data.", + }, names), + SnapshotsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "snapshots_active", + Help: "Number of active concurrent snapshots (>1 when splitting the cache).", + }, names), + Age: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "age_seconds", + Help: "Age in seconds of the current cache (time since last snapshot or initialisation).", + }, names), + SnapshottedBytes: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "snapshot_bytes", + Help: "Number of bytes snapshotted.", + }, names), + WrittenBytes: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "written_bytes", + Help: "Number of bytes successfully written to the Cache.", + }, writeNames), + Writes: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cacheSubsystem, + Name: "writes_total", + Help: "Number of writes to the Cache.", + }, writeNames), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *cacheMetrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.MemSize, + m.DiskSize, + m.SnapshotsActive, + m.Age, + m.SnapshottedBytes, + m.WrittenBytes, + m.Writes, + } +} + +// walMetrics are a set of metrics concerned with tracking data about compactions. +type walMetrics struct { + OldSegmentBytes *prometheus.GaugeVec + CurrentSegmentBytes *prometheus.GaugeVec + Segments *prometheus.GaugeVec + Writes *prometheus.CounterVec +} + +// newWALMetrics initialises the prometheus metrics for tracking the WAL. +func newWALMetrics(labels prometheus.Labels) *walMetrics { + var names []string + for k := range labels { + names = append(names, k) + } + sort.Strings(names) + + writeNames := append(append([]string(nil), names...), "status") + sort.Strings(writeNames) + + return &walMetrics{ + OldSegmentBytes: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: walSubsystem, + Name: "old_segment_bytes", + Help: "Number of bytes old WAL segments using on disk.", + }, names), + CurrentSegmentBytes: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: walSubsystem, + Name: "current_segment_bytes", + Help: "Number of bytes TSM files using on disk.", + }, names), + Segments: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: walSubsystem, + Name: "segments_total", + Help: "Number of WAL segment files on disk.", + }, names), + Writes: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: walSubsystem, + Name: "writes_total", + Help: "Number of writes to the WAL.", + }, writeNames), + } +} + +// PrometheusCollectors satisfies the prom.PrometheusCollector interface. +func (m *walMetrics) PrometheusCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.OldSegmentBytes, + m.CurrentSegmentBytes, + m.Segments, + m.Writes, + } +} diff --git a/tsdb/tsm1/metrics_test.go b/tsdb/tsm1/metrics_test.go new file mode 100644 index 0000000000..8aafb1aa7a --- /dev/null +++ b/tsdb/tsm1/metrics_test.go @@ -0,0 +1,282 @@ +package tsm1 + +import ( + "testing" + + "github.com/influxdata/platform/kit/prom/promtest" + "github.com/prometheus/client_golang/prometheus" +) + +func TestMetrics_Filestore(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := newFileMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newFileTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) + t2 := newFileTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + // Generate some measurements. + t1.AddBytes(100) + t1.SetFileCount(3) + + t2.AddBytes(200) + t2.SetFileCount(4) + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + base := namespace + "_" + fileStoreSubsystem + "_" + m1Bytes := promtest.MustFindMetric(t, mfs, base+"disk_bytes", prometheus.Labels{"engine_id": "0", "node_id": "0"}) + m2Bytes := promtest.MustFindMetric(t, mfs, base+"disk_bytes", prometheus.Labels{"engine_id": "1", "node_id": "0"}) + m1Files := promtest.MustFindMetric(t, mfs, base+"total", prometheus.Labels{"engine_id": "0", "node_id": "0"}) + m2Files := promtest.MustFindMetric(t, mfs, base+"total", prometheus.Labels{"engine_id": "1", "node_id": "0"}) + + if m, got, exp := m1Bytes, m1Bytes.GetGauge().GetValue(), 100.0; got != exp { + t.Errorf("[%s] got %v, expected %v", m, got, exp) + } + + if m, got, exp := m1Files, m1Files.GetGauge().GetValue(), 3.0; got != exp { + t.Errorf("[%s] got %v, expected %v", m, got, exp) + } + + if m, got, exp := m2Bytes, m2Bytes.GetGauge().GetValue(), 200.0; got != exp { + t.Errorf("[%s] got %v, expected %v", m, got, exp) + } + + if m, got, exp := m2Files, m2Files.GetGauge().GetValue(), 4.0; got != exp { + t.Errorf("[%s] got %v, expected %v", m, got, exp) + } + +} + +func TestMetrics_Cache(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := newCacheMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) + t2 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + base := namespace + "_" + cacheSubsystem + "_" + + // All the metric names + gauges := []string{ + base + "inuse_bytes", + base + "disk_bytes", + base + "age_seconds", + base + "snapshots_active", + } + + counters := []string{ + base + "snapshot_bytes", + base + "written_bytes", + base + "writes_total", + } + + // Generate some measurements. + for i, tracker := range []*cacheTracker{t1, t2} { + tracker.SetMemBytes(uint64(i + len(gauges[0]))) + tracker.SetDiskBytes(uint64(i + len(gauges[1]))) + tracker.metrics.Age.With(tracker.Labels()).Set(float64(i + len(gauges[2]))) + tracker.SetSnapshotsActive(uint64(i + len(gauges[3]))) + + tracker.AddSnapshottedBytes(uint64(i + len(counters[0]))) + tracker.AddWrittenBytesOK(uint64(i + len(counters[1]))) + + labels := tracker.Labels() + labels["status"] = "ok" + tracker.metrics.Writes.With(labels).Add(float64(i + len(counters[2]))) + } + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + // The label variants for the two caches. + labelVariants := []prometheus.Labels{ + prometheus.Labels{"engine_id": "0", "node_id": "0"}, + prometheus.Labels{"engine_id": "1", "node_id": "0"}, + } + + for i, labels := range labelVariants { + for _, name := range gauges { + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetGauge().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range counters { + exp := float64(i + len(name)) + + if name == counters[1] || name == counters[2] { + labels["status"] = "ok" + } + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetCounter().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + } +} + +func TestMetrics_WAL(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := newWALMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newWALTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) + t2 := newWALTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + base := namespace + "_" + walSubsystem + "_" + + // All the metric names + gauges := []string{ + base + "old_segment_bytes", + base + "current_segment_bytes", + base + "segments_total", + } + + counters := []string{ + base + "writes_total", + } + + // Generate some measurements. + for i, tracker := range []*walTracker{t1, t2} { + tracker.SetOldSegmentSize(uint64(i + len(gauges[0]))) + tracker.SetCurrentSegmentSize(uint64(i + len(gauges[1]))) + tracker.SetSegments(uint64(i + len(gauges[2]))) + + labels := tracker.Labels() + labels["status"] = "ok" + tracker.metrics.Writes.With(labels).Add(float64(i + len(counters[0]))) + } + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + // The label variants for the two caches. + labelVariants := []prometheus.Labels{ + prometheus.Labels{"engine_id": "0", "node_id": "0"}, + prometheus.Labels{"engine_id": "1", "node_id": "0"}, + } + + for i, labels := range labelVariants { + for _, name := range gauges { + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetGauge().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range counters { + exp := float64(i + len(name)) + + labels["status"] = "ok" + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetCounter().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + } +} + +func TestMetrics_Compactions(t *testing.T) { + // metrics to be shared by multiple file stores. + metrics := newCompactionMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) + + t1 := newCompactionTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) + t2 := newCompactionTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) + + reg := prometheus.NewRegistry() + reg.MustRegister(metrics.PrometheusCollectors()...) + + base := namespace + "_" + compactionSubsystem + "_" + + // All the metric names + gauges := []string{ + base + "active", + base + "queued", + } + + counters := []string{base + "total"} + histograms := []string{base + "duration_seconds"} + + // Generate some measurements. + for i, tracker := range []*compactionTracker{t1, t2} { + labels := tracker.Labels(2) + tracker.metrics.CompactionsActive.With(labels).Add(float64(i + len(gauges[0]))) + tracker.SetQueue(2, uint64(i+len(gauges[1]))) + + labels = tracker.Labels(2) + labels["status"] = "ok" + tracker.metrics.Compactions.With(labels).Add(float64(i + len(counters[0]))) + + labels = tracker.Labels(2) + tracker.metrics.CompactionDuration.With(labels).Observe(float64(i + len(histograms[0]))) + } + + // Test that all the correct metrics are present. + mfs, err := reg.Gather() + if err != nil { + t.Fatal(err) + } + + // The label variants for the two caches. + labelVariants := []prometheus.Labels{ + prometheus.Labels{"engine_id": "0", "node_id": "0"}, + prometheus.Labels{"engine_id": "1", "node_id": "0"}, + } + + for i, labels := range labelVariants { + labels["level"] = "2" + + for _, name := range gauges { + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetGauge().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range counters { + exp := float64(i + len(name)) + + // Make a copy since we need to add a label + l := make(prometheus.Labels, len(labels)) + for k, v := range labels { + l[k] = v + } + l["status"] = "ok" + + metric := promtest.MustFindMetric(t, mfs, name, l) + if got := metric.GetCounter().GetValue(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + + for _, name := range histograms { + exp := float64(i + len(name)) + metric := promtest.MustFindMetric(t, mfs, name, labels) + if got := metric.GetHistogram().GetSampleSum(); got != exp { + t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) + } + } + } +} diff --git a/tsdb/tsm1/scheduler.go b/tsdb/tsm1/scheduler.go index d360afc3e7..141077a8dd 100644 --- a/tsdb/tsm1/scheduler.go +++ b/tsdb/tsm1/scheduler.go @@ -1,28 +1,29 @@ package tsm1 -import ( - "sync/atomic" -) - var defaultWeights = [4]float64{0.4, 0.3, 0.2, 0.1} type scheduler struct { - maxConcurrency int - stats *EngineStatistics + maxConcurrency int + compactionTracker *compactionTracker // queues is the depth of work pending for each compaction level queues [4]int weights [4]float64 } -func newScheduler(stats *EngineStatistics, maxConcurrency int) *scheduler { +func newScheduler(maxConcurrency int) *scheduler { return &scheduler{ - stats: stats, - maxConcurrency: maxConcurrency, - weights: defaultWeights, + maxConcurrency: maxConcurrency, + weights: defaultWeights, + compactionTracker: newCompactionTracker(newCompactionMetrics(nil), nil), } } +// setCompactionTracker sets the metrics on the scheduler. It must be called before next. +func (s *scheduler) setCompactionTracker(tracker *compactionTracker) { + s.compactionTracker = tracker +} + func (s *scheduler) setDepth(level, depth int) { level = level - 1 if level < 0 || level > len(s.queues) { @@ -33,10 +34,10 @@ func (s *scheduler) setDepth(level, depth int) { } func (s *scheduler) next() (int, bool) { - level1Running := int(atomic.LoadInt64(&s.stats.TSMCompactionsActive[0])) - level2Running := int(atomic.LoadInt64(&s.stats.TSMCompactionsActive[1])) - level3Running := int(atomic.LoadInt64(&s.stats.TSMCompactionsActive[2])) - level4Running := int(atomic.LoadInt64(&s.stats.TSMFullCompactionsActive) + atomic.LoadInt64(&s.stats.TSMOptimizeCompactionsActive)) + level1Running := int(s.compactionTracker.Active(1)) + level2Running := int(s.compactionTracker.Active(2)) + level3Running := int(s.compactionTracker.Active(3)) + level4Running := int(s.compactionTracker.ActiveFull() + s.compactionTracker.ActiveOptimise()) if level1Running+level2Running+level3Running+level4Running >= s.maxConcurrency { return 0, false diff --git a/tsdb/tsm1/scheduler_test.go b/tsdb/tsm1/scheduler_test.go index 9ff40b0e5f..97871def85 100644 --- a/tsdb/tsm1/scheduler_test.go +++ b/tsdb/tsm1/scheduler_test.go @@ -3,7 +3,7 @@ package tsm1 import "testing" func TestScheduler_Runnable_Empty(t *testing.T) { - s := newScheduler(&EngineStatistics{}, 1) + s := newScheduler(1) for i := 1; i < 5; i++ { s.setDepth(i, 1) @@ -20,11 +20,10 @@ func TestScheduler_Runnable_Empty(t *testing.T) { } func TestScheduler_Runnable_MaxConcurrency(t *testing.T) { - s := newScheduler(&EngineStatistics{}, 1) + s := newScheduler(1) // level 1 - s.stats = &EngineStatistics{} - s.stats.TSMCompactionsActive[0] = 1 + s.compactionTracker.active[1] = 1 for i := 0; i <= 4; i++ { _, runnable := s.next() if exp, got := false, runnable; exp != got { @@ -33,8 +32,7 @@ func TestScheduler_Runnable_MaxConcurrency(t *testing.T) { } // level 2 - s.stats = &EngineStatistics{} - s.stats.TSMCompactionsActive[1] = 1 + s.compactionTracker.active[2] = 1 for i := 0; i <= 4; i++ { _, runnable := s.next() if exp, got := false, runnable; exp != got { @@ -43,8 +41,7 @@ func TestScheduler_Runnable_MaxConcurrency(t *testing.T) { } // level 3 - s.stats = &EngineStatistics{} - s.stats.TSMCompactionsActive[2] = 1 + s.compactionTracker.active[3] = 1 for i := 0; i <= 4; i++ { _, runnable := s.next() if exp, got := false, runnable; exp != got { @@ -53,8 +50,7 @@ func TestScheduler_Runnable_MaxConcurrency(t *testing.T) { } // optimize - s.stats = &EngineStatistics{} - s.stats.TSMOptimizeCompactionsActive++ + s.compactionTracker.active[4] = 1 for i := 0; i <= 4; i++ { _, runnable := s.next() if exp, got := false, runnable; exp != got { @@ -63,8 +59,7 @@ func TestScheduler_Runnable_MaxConcurrency(t *testing.T) { } // full - s.stats = &EngineStatistics{} - s.stats.TSMFullCompactionsActive++ + s.compactionTracker.active[5] = 1 for i := 0; i <= 4; i++ { _, runnable := s.next() if exp, got := false, runnable; exp != got { diff --git a/tsdb/tsm1/wal.go b/tsdb/tsm1/wal.go index 7d0ae9c7fc..04be15c2fb 100644 --- a/tsdb/tsm1/wal.go +++ b/tsdb/tsm1/wal.go @@ -18,9 +18,9 @@ import ( "time" "github.com/golang/snappy" - "github.com/influxdata/platform/models" "github.com/influxdata/platform/pkg/limiter" "github.com/influxdata/platform/pkg/pool" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -88,14 +88,6 @@ var ( bytesPool = pool.NewLimitedBytes(256, walEncodeBufSize*2) ) -// Statistics gathered by the WAL. -const ( - statWALOldBytes = "oldSegmentsDiskBytes" - statWALCurrentBytes = "currentSegmentDiskBytes" - statWriteOk = "writeOk" - statWriteErr = "writeErr" -) - // WAL represents the write-ahead log used for writing TSM files. type WAL struct { // goroutines waiting for the next fsync @@ -128,8 +120,7 @@ type WAL struct { // SegmentSize is the file size at which a segment file will be rotated SegmentSize int - // statistics for the WAL - stats *WALStatistics + tracker *walTracker limiter limiter.Fixed } @@ -143,10 +134,10 @@ func NewWAL(path string) *WAL { SegmentSize: DefaultSegmentSize, closing: make(chan struct{}), syncWaiters: make(chan chan error, 1024), - stats: &WALStatistics{}, limiter: limiter.NewFixed(defaultWaitingWALWrites), logger: logger, traceLogger: logger, + tracker: newWALTracker(newWALMetrics(nil), nil), } } @@ -172,28 +163,6 @@ func (l *WAL) WithLogger(log *zap.Logger) { } } -// WALStatistics maintains statistics about the WAL. -type WALStatistics struct { - OldBytes int64 - CurrentBytes int64 - WriteOK int64 - WriteErr int64 -} - -// Statistics returns statistics for periodic monitoring. -func (l *WAL) Statistics(tags map[string]string) []models.Statistic { - return []models.Statistic{{ - Name: "tsm1_wal", - Tags: tags, - Values: map[string]interface{}{ - statWALOldBytes: atomic.LoadInt64(&l.stats.OldBytes), - statWALCurrentBytes: atomic.LoadInt64(&l.stats.CurrentBytes), - statWriteOk: atomic.LoadInt64(&l.stats.WriteOK), - statWriteErr: atomic.LoadInt64(&l.stats.WriteErr), - }, - }} -} - // Path returns the directory the log was initialized with. func (l *WAL) Path() string { l.mu.RLock() @@ -217,6 +186,7 @@ func (l *WAL) Open() error { if err != nil { return err } + l.tracker.SetSegments(uint64(len(segments))) if len(segments) > 0 { lastSegment := segments[len(segments)-1] @@ -234,6 +204,7 @@ func (l *WAL) Open() error { if stat.Size() == 0 { os.Remove(lastSegment) segments = segments[:len(segments)-1] + l.tracker.DecSegments() } else { fd, err := os.OpenFile(lastSegment, os.O_RDWR, 0666) if err != nil { @@ -245,7 +216,7 @@ func (l *WAL) Open() error { l.currentSegmentWriter = NewWALSegmentWriter(fd) // Reset the current segment size stat - atomic.StoreInt64(&l.stats.CurrentBytes, stat.Size()) + l.tracker.SetCurrentSegmentSize(uint64(stat.Size())) } } @@ -263,7 +234,7 @@ func (l *WAL) Open() error { } } } - atomic.StoreInt64(&l.stats.OldBytes, totalOldDiskSize) + l.tracker.SetOldSegmentSize(uint64(totalOldDiskSize)) l.closing = make(chan struct{}) @@ -336,10 +307,10 @@ func (l *WAL) WriteMulti(values map[string][]Value) (int, error) { id, err := l.writeToLog(entry) if err != nil { - atomic.AddInt64(&l.stats.WriteErr, 1) + l.tracker.IncWritesErr() return -1, err } - atomic.AddInt64(&l.stats.WriteOK, 1) + l.tracker.IncWritesOK() return id, nil } @@ -390,6 +361,7 @@ func (l *WAL) Remove(files []string) error { if err != nil { return err } + l.tracker.SetSegments(uint64(len(segments))) var totalOldDiskSize int64 for _, seg := range segments { @@ -400,8 +372,7 @@ func (l *WAL) Remove(files []string) error { totalOldDiskSize += stat.Size() } - atomic.StoreInt64(&l.stats.OldBytes, totalOldDiskSize) - + l.tracker.SetOldSegmentSize(uint64(totalOldDiskSize)) return nil } @@ -412,8 +383,9 @@ func (l *WAL) LastWriteTime() time.Time { return l.lastWriteTime } +// DiskSizeBytes returns the on-disk size of the WAL. func (l *WAL) DiskSizeBytes() int64 { - return atomic.LoadInt64(&l.stats.OldBytes) + atomic.LoadInt64(&l.stats.CurrentBytes) + return int64(l.tracker.OldSegmentSize() + l.tracker.CurrentSegmentSize()) } func (l *WAL) writeToLog(entry WALEntry) (int, error) { @@ -464,8 +436,7 @@ func (l *WAL) writeToLog(entry WALEntry) (int, error) { l.scheduleSync() // Update stats for current segment size - atomic.StoreInt64(&l.stats.CurrentBytes, int64(l.currentSegmentWriter.size)) - + l.tracker.SetCurrentSegmentSize(uint64(l.currentSegmentWriter.size)) l.lastWriteTime = time.Now().UTC() return l.currentSegmentID, nil @@ -586,7 +557,7 @@ func (l *WAL) newSegmentFile() error { if err := l.currentSegmentWriter.close(); err != nil { return err } - atomic.StoreInt64(&l.stats.OldBytes, int64(l.currentSegmentWriter.size)) + l.tracker.SetOldSegmentSize(uint64(l.currentSegmentWriter.size)) } fileName := filepath.Join(l.path, fmt.Sprintf("%s%05d.%s", WALFilePrefix, l.currentSegmentID, WALFileExtension)) @@ -595,13 +566,94 @@ func (l *WAL) newSegmentFile() error { return err } l.currentSegmentWriter = NewWALSegmentWriter(fd) + l.tracker.IncSegments() // Reset the current segment size stat - atomic.StoreInt64(&l.stats.CurrentBytes, 0) - + l.tracker.SetCurrentSegmentSize(0) return nil } +// walTracker tracks writes to the WAL. +// +// As well as being responsible for providing atomic reads and writes to the +// statistics, walTracker also mirrors any changes to the external prometheus +// metrics, which the Engine exposes. +// +// *NOTE* - walTracker fields should not be directory modified. Doing so +// could result in the Engine exposing inaccurate metrics. +type walTracker struct { + metrics *walMetrics + labels prometheus.Labels + oldSegmentBytes uint64 +} + +func newWALTracker(metrics *walMetrics, defaultLabels prometheus.Labels) *walTracker { + return &walTracker{metrics: metrics, labels: defaultLabels} +} + +// Labels returns a copy of the default labels used by the tracker's metrics. +// The returned map is safe for modification. +func (t *walTracker) Labels() prometheus.Labels { + labels := make(prometheus.Labels, len(t.labels)) + for k, v := range t.labels { + labels[k] = v + } + return labels +} + +// IncWrites increments the number of writes to the cache, with a required status. +func (t *walTracker) IncWrites(status string) { + labels := t.Labels() + labels["status"] = status + t.metrics.Writes.With(labels).Inc() +} + +// IncWritesOK increments the number of successful writes. +func (t *walTracker) IncWritesOK() { t.IncWrites("ok") } + +// IncWritesError increments the number of writes that encountered an error. +func (t *walTracker) IncWritesErr() { t.IncWrites("error") } + +// SetOldSegmentSize sets the size of all old segments on disk. +func (t *walTracker) SetOldSegmentSize(sz uint64) { + atomic.StoreUint64(&t.oldSegmentBytes, sz) + + labels := t.labels + t.metrics.OldSegmentBytes.With(labels).Set(float64(sz)) +} + +// OldSegmentSize returns the on-disk size of all old segments. +func (t *walTracker) OldSegmentSize() uint64 { return atomic.LoadUint64(&t.oldSegmentBytes) } + +// SetCurrentSegmentSize sets the size of all old segments on disk. +func (t *walTracker) SetCurrentSegmentSize(sz uint64) { + atomic.StoreUint64(&t.oldSegmentBytes, sz) + + labels := t.labels + t.metrics.CurrentSegmentBytes.With(labels).Set(float64(sz)) +} + +// CurrentSegmentSize returns the on-disk size of all old segments. +func (t *walTracker) CurrentSegmentSize() uint64 { return atomic.LoadUint64(&t.oldSegmentBytes) } + +// SetSegments sets the number of segments files on disk. +func (t *walTracker) SetSegments(sz uint64) { + labels := t.labels + t.metrics.Segments.With(labels).Set(float64(sz)) +} + +// IncSegments increases the number of segments files by one. +func (t *walTracker) IncSegments() { + labels := t.labels + t.metrics.Segments.With(labels).Inc() +} + +// DecSegments decreases the number of segments files by one. +func (t *walTracker) DecSegments() { + labels := t.labels + t.metrics.Segments.With(labels).Dec() +} + // WALEntry is record stored in each WAL segment. Each entry has a type // and an opaque, type dependent byte slice data attribute. type WALEntry interface {