Include the number of scanned cached values in the iterator cost

2017-09-05 14:15:20 -05:00 · 2017-09-05 14:15:20 -05:00 · 590be193e5
parent 091ea5f9a5
commit 590be193e5
4 changed files with 27 additions and 14 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -24,6 +24,7 @@
 - [#8752](https://github.com/influxdata/influxdb/pull/8752): Use system cursors for measurement, series, and tag key meta queries.
 - [#6563](https://github.com/influxdata/influxdb/issues/6563): Support Ctrl+C to cancel a running query in the Influx CLI. Thanks @emluque!
 - [#8776](https://github.com/influxdata/influxdb/pull/8776): Initial implementation of explain plan.
+- [#8791](https://github.com/influxdata/influxdb/pull/8791): Include the number of scanned cached values in the iterator cost.

 ### Bugfixes

--- a/query/explain.go
+++ b/query/explain.go
@ -41,6 +41,7 @@ func (p *preparedStatement) Explain() (string, error) {
 		}
 		fmt.Fprintf(&buf, "NUMBER OF SHARDS: %d\n", node.Cost.NumShards)
 		fmt.Fprintf(&buf, "NUMBER OF SERIES: %d\n", node.Cost.NumSeries)
+		fmt.Fprintf(&buf, "CACHED VALUES: %d\n", node.Cost.CachedValues)
 		fmt.Fprintf(&buf, "NUMBER OF FILES: %d\n", node.Cost.NumFiles)
 		fmt.Fprintf(&buf, "NUMBER OF BLOCKS: %d\n", node.Cost.BlocksRead)
 		fmt.Fprintf(&buf, "SIZE OF BLOCKS: %d\n", node.Cost.BlockSize)
--- a/query/iterator.go
+++ b/query/iterator.go
@ -1355,6 +1355,10 @@ type IteratorCost struct {
 	// one cursor is created for every series.
 	NumSeries int64

+	// CachedValues returns the number of cached values that may be read by this
+	// query.
+	CachedValues int64
+
 	// The total number of non-unique files that may be accessed by this query.
 	// This will count the number of files accessed by each series so files
 	// will likely be double counted.
@ -1370,11 +1374,12 @@ type IteratorCost struct {
 // Combine combines the results of two IteratorCost structures into one.
 func (c IteratorCost) Combine(other IteratorCost) IteratorCost {
 	return IteratorCost{
-		NumShards:  c.NumShards + other.NumShards,
-		NumSeries:  c.NumSeries + other.NumSeries,
-		NumFiles:   c.NumFiles + other.NumFiles,
-		BlocksRead: c.BlocksRead + other.BlocksRead,
-		BlockSize:  c.BlockSize + other.BlockSize,
+		NumShards:    c.NumShards + other.NumShards,
+		NumSeries:    c.NumSeries + other.NumSeries,
+		CachedValues: c.CachedValues + other.CachedValues,
+		NumFiles:     c.NumFiles + other.NumFiles,
+		BlocksRead:   c.BlocksRead + other.BlocksRead,
+		BlockSize:    c.BlockSize + other.BlockSize,
 	}
 }

--- a/tsdb/engine/tsm1/engine.go
+++ b/tsdb/engine/tsm1/engine.go
@ -17,16 +17,15 @@ import (
 	"sync/atomic"
 	"time"

-	"github.com/influxdata/influxdb/query"
-	"github.com/influxdata/influxdb/tsdb/index/inmem"
-
 	"github.com/influxdata/influxdb/influxql"
 	"github.com/influxdata/influxdb/models"
 	"github.com/influxdata/influxdb/pkg/bytesutil"
 	"github.com/influxdata/influxdb/pkg/estimator"
 	"github.com/influxdata/influxdb/pkg/limiter"
+	"github.com/influxdata/influxdb/query"
 	"github.com/influxdata/influxdb/tsdb"
 	_ "github.com/influxdata/influxdb/tsdb/index"
+	"github.com/influxdata/influxdb/tsdb/index/inmem"
 	"github.com/uber-go/zap"
 )

@ -2173,8 +2172,7 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu
 		for i, key := range t.SeriesKeys {
 			// Retrieve the cost for the main expression (if it exists).
 			if ref != nil {
-				k := SeriesFieldKey(key, ref.Val)
-				c := e.FileStore.Cost([]byte(k), opt.StartTime, opt.EndTime)
+				c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime)
 				cost = cost.Combine(c)
 			}

@ -2184,8 +2182,7 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu
 			// anywhere close to the full costs of the auxiliary iterators because
 			// many of the selected values are usually skipped.
 			for _, ref := range opt.Aux {
-				k := SeriesFieldKey(key, ref.Val)
-				c := e.FileStore.Cost([]byte(k), opt.StartTime, opt.EndTime)
+				c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime)
 				cost = cost.Combine(c)
 			}

@ -2194,8 +2191,7 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu
 			if t.Filters[i] != nil {
 				refs := influxql.ExprNames(t.Filters[i])
 				for _, ref := range refs {
-					k := SeriesFieldKey(key, ref.Val)
-					c := e.FileStore.Cost([]byte(k), opt.StartTime, opt.EndTime)
+					c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime)
 					cost = cost.Combine(c)
 				}
 			}
@ -2204,6 +2200,16 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu
 	return cost, nil
 }

+func (e *Engine) seriesCost(seriesKey, field string, tmin, tmax int64) query.IteratorCost {
+	key := SeriesFieldKeyBytes(seriesKey, field)
+	c := e.FileStore.Cost(key, tmin, tmax)
+
+	// Retrieve the range of values within the cache.
+	cacheValues := e.Cache.Values(key)
+	c.CachedValues = int64(len(cacheValues.Include(tmin, tmax)))
+	return c
+}
+
 func (e *Engine) SeriesPointIterator(opt query.IteratorOptions) (query.Iterator, error) {
 	return e.index.SeriesPointIterator(opt)
 }