diff --git a/CHANGELOG.md b/CHANGELOG.md index ba1f26befd..dc6b58cc50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - [#8752](https://github.com/influxdata/influxdb/pull/8752): Use system cursors for measurement, series, and tag key meta queries. - [#6563](https://github.com/influxdata/influxdb/issues/6563): Support Ctrl+C to cancel a running query in the Influx CLI. Thanks @emluque! - [#8776](https://github.com/influxdata/influxdb/pull/8776): Initial implementation of explain plan. +- [#8791](https://github.com/influxdata/influxdb/pull/8791): Include the number of scanned cached values in the iterator cost. ### Bugfixes diff --git a/query/explain.go b/query/explain.go index 71f99ed24f..ee0fb8bb0f 100644 --- a/query/explain.go +++ b/query/explain.go @@ -41,6 +41,7 @@ func (p *preparedStatement) Explain() (string, error) { } fmt.Fprintf(&buf, "NUMBER OF SHARDS: %d\n", node.Cost.NumShards) fmt.Fprintf(&buf, "NUMBER OF SERIES: %d\n", node.Cost.NumSeries) + fmt.Fprintf(&buf, "CACHED VALUES: %d\n", node.Cost.CachedValues) fmt.Fprintf(&buf, "NUMBER OF FILES: %d\n", node.Cost.NumFiles) fmt.Fprintf(&buf, "NUMBER OF BLOCKS: %d\n", node.Cost.BlocksRead) fmt.Fprintf(&buf, "SIZE OF BLOCKS: %d\n", node.Cost.BlockSize) diff --git a/query/iterator.go b/query/iterator.go index 07767a529c..00417a9637 100644 --- a/query/iterator.go +++ b/query/iterator.go @@ -1355,6 +1355,10 @@ type IteratorCost struct { // one cursor is created for every series. NumSeries int64 + // CachedValues returns the number of cached values that may be read by this + // query. + CachedValues int64 + // The total number of non-unique files that may be accessed by this query. // This will count the number of files accessed by each series so files // will likely be double counted. @@ -1370,11 +1374,12 @@ type IteratorCost struct { // Combine combines the results of two IteratorCost structures into one. func (c IteratorCost) Combine(other IteratorCost) IteratorCost { return IteratorCost{ - NumShards: c.NumShards + other.NumShards, - NumSeries: c.NumSeries + other.NumSeries, - NumFiles: c.NumFiles + other.NumFiles, - BlocksRead: c.BlocksRead + other.BlocksRead, - BlockSize: c.BlockSize + other.BlockSize, + NumShards: c.NumShards + other.NumShards, + NumSeries: c.NumSeries + other.NumSeries, + CachedValues: c.CachedValues + other.CachedValues, + NumFiles: c.NumFiles + other.NumFiles, + BlocksRead: c.BlocksRead + other.BlocksRead, + BlockSize: c.BlockSize + other.BlockSize, } } diff --git a/tsdb/engine/tsm1/engine.go b/tsdb/engine/tsm1/engine.go index ce8edf60cd..7518853ae1 100644 --- a/tsdb/engine/tsm1/engine.go +++ b/tsdb/engine/tsm1/engine.go @@ -17,16 +17,15 @@ import ( "sync/atomic" "time" - "github.com/influxdata/influxdb/query" - "github.com/influxdata/influxdb/tsdb/index/inmem" - "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/pkg/bytesutil" "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/pkg/limiter" + "github.com/influxdata/influxdb/query" "github.com/influxdata/influxdb/tsdb" _ "github.com/influxdata/influxdb/tsdb/index" + "github.com/influxdata/influxdb/tsdb/index/inmem" "github.com/uber-go/zap" ) @@ -2173,8 +2172,7 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu for i, key := range t.SeriesKeys { // Retrieve the cost for the main expression (if it exists). if ref != nil { - k := SeriesFieldKey(key, ref.Val) - c := e.FileStore.Cost([]byte(k), opt.StartTime, opt.EndTime) + c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime) cost = cost.Combine(c) } @@ -2184,8 +2182,7 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu // anywhere close to the full costs of the auxiliary iterators because // many of the selected values are usually skipped. for _, ref := range opt.Aux { - k := SeriesFieldKey(key, ref.Val) - c := e.FileStore.Cost([]byte(k), opt.StartTime, opt.EndTime) + c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime) cost = cost.Combine(c) } @@ -2194,8 +2191,7 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu if t.Filters[i] != nil { refs := influxql.ExprNames(t.Filters[i]) for _, ref := range refs { - k := SeriesFieldKey(key, ref.Val) - c := e.FileStore.Cost([]byte(k), opt.StartTime, opt.EndTime) + c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime) cost = cost.Combine(c) } } @@ -2204,6 +2200,16 @@ func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (qu return cost, nil } +func (e *Engine) seriesCost(seriesKey, field string, tmin, tmax int64) query.IteratorCost { + key := SeriesFieldKeyBytes(seriesKey, field) + c := e.FileStore.Cost(key, tmin, tmax) + + // Retrieve the range of values within the cache. + cacheValues := e.Cache.Values(key) + c.CachedValues = int64(len(cacheValues.Include(tmin, tmax))) + return c +} + func (e *Engine) SeriesPointIterator(opt query.IteratorOptions) (query.Iterator, error) { return e.index.SeriesPointIterator(opt) }