influxdb/tsdb/store.go

package tsdb // import "github.com/influxdata/influxdb/tsdb"

import (
	"bytes"
	"errors"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"path/filepath"
	"runtime"
	"sort"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/influxdata/influxdb/logger"
	"github.com/influxdata/influxdb/models"
	"github.com/influxdata/influxdb/pkg/estimator"
	"github.com/influxdata/influxdb/pkg/estimator/hll"
	"github.com/influxdata/influxdb/pkg/limiter"
	"github.com/influxdata/influxdb/query"
	"github.com/influxdata/influxql"
	"go.uber.org/zap"
	"go.uber.org/zap/zapcore"
)

var (
	// ErrShardNotFound is returned when trying to get a non existing shard.
	ErrShardNotFound = fmt.Errorf("shard not found")
	// ErrStoreClosed is returned when trying to use a closed Store.
	ErrStoreClosed = fmt.Errorf("store is closed")
	// ErrShardDeletion is returned when trying to create a shard that is being deleted
	ErrShardDeletion = errors.New("shard is being deleted")
)

// Statistics gathered by the store.
const (
	statDatabaseSeries       = "numSeries"       // number of series in a database
	statDatabaseMeasurements = "numMeasurements" // number of measurements in a database
)

// SeriesFileDirectory is the name of the directory containing series files for
// a database.
const SeriesFileDirectory = "_series"

// Store manages shards and indexes for databases.
type Store struct {
	mu                sync.RWMutex
	shards            map[uint64]*Shard
	databases         map[string]struct{}
	sfiles            map[string]*SeriesFile
	SeriesFileMaxSize int64 // Determines size of series file mmap. Can be altered in tests.
	path              string

	// shared per-database indexes, only if using "inmem".
	indexes map[string]interface{}

	// Maintains a set of shards that are in the process of deletion.
	// This prevents new shards from being created while old ones are being deleted.
	pendingShardDeletes map[uint64]struct{}

	EngineOptions EngineOptions

	baseLogger *zap.Logger
	Logger     *zap.Logger

	closing chan struct{}
	wg      sync.WaitGroup
	opened  bool
}

// NewStore returns a new store with the given path and a default configuration.
// The returned store must be initialized by calling Open before using it.
func NewStore(path string) *Store {
	logger := zap.NewNop()
	return &Store{
		databases:           make(map[string]struct{}),
		path:                path,
		sfiles:              make(map[string]*SeriesFile),
		indexes:             make(map[string]interface{}),
		pendingShardDeletes: make(map[uint64]struct{}),
		EngineOptions:       NewEngineOptions(),
		Logger:              logger,
		baseLogger:          logger,
	}
}

// WithLogger sets the logger for the store.
func (s *Store) WithLogger(log *zap.Logger) {
	s.baseLogger = log
	s.Logger = log.With(zap.String("service", "store"))
	for _, sh := range s.shards {
		sh.WithLogger(s.baseLogger)
	}
}

// Statistics returns statistics for period monitoring.
func (s *Store) Statistics(tags map[string]string) []models.Statistic {
	s.mu.RLock()
	shards := s.shardsSlice()
	s.mu.RUnlock()

	// Add all the series and measurements cardinality estimations.
	databases := s.Databases()
	statistics := make([]models.Statistic, 0, len(databases))
	for _, database := range databases {
		sc, err := s.SeriesCardinality(database)
		if err != nil {
			s.Logger.Info("Cannot retrieve series cardinality", zap.Error(err))
			continue
		}

		mc, err := s.MeasurementsCardinality(database)
		if err != nil {
			s.Logger.Info("Cannot retrieve measurement cardinality", zap.Error(err))
			continue
		}

		statistics = append(statistics, models.Statistic{
			Name: "database",
			Tags: models.StatisticTags{"database": database}.Merge(tags),
			Values: map[string]interface{}{
				statDatabaseSeries:       sc,
				statDatabaseMeasurements: mc,
			},
		})
	}

	// Gather all statistics for all shards.
	for _, shard := range shards {
		statistics = append(statistics, shard.Statistics(tags)...)
	}
	return statistics
}

func (s *Store) IndexBytes() int {
	// Build index set to work on.
	is := IndexSet{Indexes: make([]Index, 0, len(s.shardIDs()))}
	s.mu.RLock()
	for _, sid := range s.shardIDs() {
		shard, ok := s.shards[sid]
		if !ok {
			continue
		}

		if is.SeriesFile == nil {
			is.SeriesFile = shard.sfile
		}
		is.Indexes = append(is.Indexes, shard.index)
	}
	s.mu.RUnlock()
	is = is.DedupeInmemIndexes()

	var b int
	for _, idx := range is.Indexes {
		b += idx.Bytes()
	}

	return b
}

// Path returns the store's root path.
func (s *Store) Path() string { return s.path }

// Open initializes the store, creating all necessary directories, loading all
// shards as well as initializing periodic maintenance of them.
func (s *Store) Open() error {
	s.mu.Lock()
	defer s.mu.Unlock()

	if s.opened {
		// Already open
		return nil
	}

	s.closing = make(chan struct{})
	s.shards = map[uint64]*Shard{}

	s.Logger.Info("Using data dir", zap.String("path", s.Path()))

	// Create directory.
	if err := os.MkdirAll(s.path, 0777); err != nil {
		return err
	}

	if err := s.loadShards(); err != nil {
		return err
	}

	s.opened = true

	if !s.EngineOptions.MonitorDisabled {
		s.wg.Add(1)
		go func() {
			s.wg.Done()
			s.monitorShards()
		}()
	}

	return nil
}

func (s *Store) loadShards() error {
	// res holds the result from opening each shard in a goroutine
	type res struct {
		s   *Shard
		err error
	}

	// Limit the number of concurrent TSM files to be opened to the number of cores.
	s.EngineOptions.OpenLimiter = limiter.NewFixed(runtime.GOMAXPROCS(0))

	// Setup a shared limiter for compactions
	lim := s.EngineOptions.Config.MaxConcurrentCompactions
	if lim == 0 {
		lim = runtime.GOMAXPROCS(0) / 2 // Default to 50% of cores for compactions

		// On systems with more cores, cap at 4 to reduce disk utilization
		if lim > 4 {
			lim = 4
		}

		if lim < 1 {
			lim = 1
		}
	}

	// Don't allow more compactions to run than cores.
	if lim > runtime.GOMAXPROCS(0) {
		lim = runtime.GOMAXPROCS(0)
	}

	s.EngineOptions.CompactionLimiter = limiter.NewFixed(lim)

	// Env var to disable throughput limiter.  This will be moved to a config option in 1.5.
	if os.Getenv("INFLUXDB_DATA_COMPACTION_THROUGHPUT") == "" {
		s.EngineOptions.CompactionThroughputLimiter = limiter.NewRate(48*1024*1024, 48*1024*1024)
	} else {
		s.Logger.Info("Compaction throughput limit disabled")
	}

	log, logEnd := logger.NewOperation(s.Logger, "Open store", "tsdb_open")
	defer logEnd()

	t := limiter.NewFixed(runtime.GOMAXPROCS(0))
	resC := make(chan *res)
	var n int

	// Determine how many shards we need to open by checking the store path.
	dbDirs, err := ioutil.ReadDir(s.path)
	if err != nil {
		return err
	}

	for _, db := range dbDirs {
		dbPath := filepath.Join(s.path, db.Name())
		if !db.IsDir() {
			log.Info("Skipping database dir", zap.String("name", db.Name()), zap.String("reason", "not a directory"))
			continue
		}

		if s.EngineOptions.DatabaseFilter != nil && !s.EngineOptions.DatabaseFilter(db.Name()) {
			log.Info("Skipping database dir", logger.Database(db.Name()), zap.String("reason", "failed database filter"))
			continue
		}

		// Load series file.
		sfile, err := s.openSeriesFile(db.Name())
		if err != nil {
			return err
		}

		// Retrieve database index.
		idx, err := s.createIndexIfNotExists(db.Name())
		if err != nil {
			return err
		}

		// Load each retention policy within the database directory.
		rpDirs, err := ioutil.ReadDir(dbPath)
		if err != nil {
			return err
		}

		for _, rp := range rpDirs {
			rpPath := filepath.Join(s.path, db.Name(), rp.Name())
			if !rp.IsDir() {
				log.Info("Skipping retention policy dir", zap.String("name", rp.Name()), zap.String("reason", "not a directory"))
				continue
			}

			// The .series directory is not a retention policy.
			if rp.Name() == SeriesFileDirectory {
				continue
			}

			if s.EngineOptions.RetentionPolicyFilter != nil && !s.EngineOptions.RetentionPolicyFilter(db.Name(), rp.Name()) {
				log.Info("Skipping retention policy dir", logger.RetentionPolicy(rp.Name()), zap.String("reason", "failed retention policy filter"))
				continue
			}

			shardDirs, err := ioutil.ReadDir(rpPath)
			if err != nil {
				return err
			}

			for _, sh := range shardDirs {
				n++
				go func(db, rp, sh string) {
					t.Take()
					defer t.Release()

					start := time.Now()
					path := filepath.Join(s.path, db, rp, sh)
					walPath := filepath.Join(s.EngineOptions.Config.WALDir, db, rp, sh)

					// Shard file names are numeric shardIDs
					shardID, err := strconv.ParseUint(sh, 10, 64)
					if err != nil {
						log.Info("invalid shard ID found at path", zap.String("path", path))
						resC <- &res{err: fmt.Errorf("%s is not a valid ID. Skipping shard.", sh)}
						return
					}

					if s.EngineOptions.ShardFilter != nil && !s.EngineOptions.ShardFilter(db, rp, shardID) {
						log.Info("skipping shard", zap.String("path", path), logger.Shard(shardID))
						resC <- &res{}
						return
					}

					// Copy options and assign shared index.
					opt := s.EngineOptions
					opt.InmemIndex = idx

					// Provide an implementation of the ShardIDSets
					opt.SeriesIDSets = shardSet{store: s, db: db}

					// Existing shards should continue to use inmem index.
					if _, err := os.Stat(filepath.Join(path, "index")); os.IsNotExist(err) {
						opt.IndexVersion = "inmem"
					}

					// Open engine.
					shard := NewShard(shardID, path, walPath, sfile, opt)

					// Disable compactions, writes and queries until all shards are loaded
					shard.EnableOnOpen = false
					shard.CompactionDisabled = s.EngineOptions.CompactionDisabled
					shard.WithLogger(s.baseLogger)

					err = shard.Open()
					if err != nil {
						log.Info("Failed to open shard", logger.Shard(shardID), zap.Error(err))
						resC <- &res{err: fmt.Errorf("Failed to open shard: %d: %s", shardID, err)}
						return
					}

					resC <- &res{s: shard}
					log.Info("Opened shard", zap.String("index_version", shard.IndexType()), zap.String("path", path), zap.Duration("duration", time.Since(start)))
				}(db.Name(), rp.Name(), sh.Name())
			}
		}
	}

	// indexVersions tracks counts of the number of different types of index
	// being used within each database.
	indexVersions := make(map[string]map[string]int)

	// Gather results of opening shards concurrently, keeping track of how
	// many databases we are managing.
	for i := 0; i < n; i++ {
		res := <-resC
		if res.s == nil || res.err != nil {
			continue
		}
		s.shards[res.s.id] = res.s
		s.databases[res.s.database] = struct{}{}

		if _, ok := indexVersions[res.s.database]; !ok {
			indexVersions[res.s.database] = make(map[string]int, 2)
		}
		indexVersions[res.s.database][res.s.IndexType()]++
	}
	close(resC)

	// Check if any databases are running multiple index types.
	for db, idxVersions := range indexVersions {
		if len(idxVersions) > 1 {
			var fields []zapcore.Field
			for idx, cnt := range idxVersions {
				fields = append(fields, zap.Int(fmt.Sprintf("%s_count", idx), cnt))
			}
			s.Logger.Warn("Mixed shard index types", append(fields, logger.Database(db))...)
		}
	}

	// Enable all shards
	for _, sh := range s.shards {
		sh.SetEnabled(true)
		if sh.IsIdle() {
			if err := sh.Free(); err != nil {
				return err
			}
		}
	}

	return nil
}

// Close closes the store and all associated shards. After calling Close accessing
// shards through the Store will result in ErrStoreClosed being returned.
func (s *Store) Close() error {
	s.mu.Lock()
	if s.opened {
		close(s.closing)
	}
	s.mu.Unlock()

	s.wg.Wait()
	// No other goroutines accessing the store, so no need for a Lock.

	// Close all the shards in parallel.
	if err := s.walkShards(s.shardsSlice(), func(sh *Shard) error {
		return sh.Close()
	}); err != nil {
		return err
	}

	s.mu.Lock()
	for _, sfile := range s.sfiles {
		// Close out the series files.
		if err := sfile.Close(); err != nil {
			s.mu.Unlock()
			return err
		}
	}

	s.databases = make(map[string]struct{})
	s.sfiles = map[string]*SeriesFile{}
	s.indexes = make(map[string]interface{})
	s.pendingShardDeletes = make(map[uint64]struct{})
	s.shards = nil
	s.opened = false // Store may now be opened again.
	s.mu.Unlock()
	return nil
}

// openSeriesFile either returns or creates a series file for the provided
// database. It must be called under a full lock.
func (s *Store) openSeriesFile(database string) (*SeriesFile, error) {
	if sfile := s.sfiles[database]; sfile != nil {
		return sfile, nil
	}

	sfile := NewSeriesFile(filepath.Join(s.path, database, SeriesFileDirectory))
	sfile.Logger = s.baseLogger
	if err := sfile.Open(); err != nil {
		return nil, err
	}
	s.sfiles[database] = sfile
	return sfile, nil
}

func (s *Store) seriesFile(database string) *SeriesFile {
	s.mu.RLock()
	defer s.mu.RUnlock()
	return s.sfiles[database]
}

// createIndexIfNotExists returns a shared index for a database, if the inmem
// index is being used. If the TSI index is being used, then this method is
// basically a no-op.
func (s *Store) createIndexIfNotExists(name string) (interface{}, error) {
	if idx := s.indexes[name]; idx != nil {
		return idx, nil
	}

	sfile, err := s.openSeriesFile(name)
	if err != nil {
		return nil, err
	}

	idx, err := NewInmemIndex(name, sfile)
	if err != nil {
		return nil, err
	}

	s.indexes[name] = idx
	return idx, nil
}

// Shard returns a shard by id.
func (s *Store) Shard(id uint64) *Shard {
	s.mu.RLock()
	defer s.mu.RUnlock()
	sh, ok := s.shards[id]
	if !ok {
		return nil
	}
	return sh
}

// Shards returns a list of shards by id.
func (s *Store) Shards(ids []uint64) []*Shard {
	s.mu.RLock()
	defer s.mu.RUnlock()
	a := make([]*Shard, 0, len(ids))
	for _, id := range ids {
		sh, ok := s.shards[id]
		if !ok {
			continue
		}
		a = append(a, sh)
	}
	return a
}

// ShardGroup returns a ShardGroup with a list of shards by id.
func (s *Store) ShardGroup(ids []uint64) ShardGroup {
	return Shards(s.Shards(ids))
}

// ShardN returns the number of shards in the store.
func (s *Store) ShardN() int {
	s.mu.RLock()
	defer s.mu.RUnlock()
	return len(s.shards)
}

// ShardDigest returns a digest of the shard with the specified ID.
func (s *Store) ShardDigest(id uint64) (io.ReadCloser, int64, error) {
	sh := s.Shard(id)
	if sh == nil {
		return nil, 0, ErrShardNotFound
	}

	return sh.Digest()
}

// CreateShard creates a shard with the given id and retention policy on a database.
func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64, enabled bool) error {
	s.mu.Lock()
	defer s.mu.Unlock()

	select {
	case <-s.closing:
		return ErrStoreClosed
	default:
	}

	// Shard already exists.
	if _, ok := s.shards[shardID]; ok {
		return nil
	}

	// Shard may be undergoing a pending deletion. While the shard can be
	// recreated, it must wait for the pending delete to finish.
	if _, ok := s.pendingShardDeletes[shardID]; ok {
		return ErrShardDeletion
	}

	// Create the db and retention policy directories if they don't exist.
	if err := os.MkdirAll(filepath.Join(s.path, database, retentionPolicy), 0700); err != nil {
		return err
	}

	// Create the WAL directory.
	walPath := filepath.Join(s.EngineOptions.Config.WALDir, database, retentionPolicy, fmt.Sprintf("%d", shardID))
	if err := os.MkdirAll(walPath, 0700); err != nil {
		return err
	}

	// Retrieve database series file.
	sfile, err := s.openSeriesFile(database)
	if err != nil {
		return err
	}

	// Retrieve shared index, if needed.
	idx, err := s.createIndexIfNotExists(database)
	if err != nil {
		return err
	}

	// Copy index options and pass in shared index.
	opt := s.EngineOptions
	opt.InmemIndex = idx
	opt.SeriesIDSets = shardSet{store: s, db: database}

	path := filepath.Join(s.path, database, retentionPolicy, strconv.FormatUint(shardID, 10))
	shard := NewShard(shardID, path, walPath, sfile, opt)
	shard.WithLogger(s.baseLogger)
	shard.EnableOnOpen = enabled

	if err := shard.Open(); err != nil {
		return err
	}

	s.shards[shardID] = shard
	s.databases[database] = struct{}{} // Ensure we are tracking any new db.

	return nil
}

// CreateShardSnapShot will create a hard link to the underlying shard and return a path.
// The caller is responsible for cleaning up (removing) the file path returned.
func (s *Store) CreateShardSnapshot(id uint64) (string, error) {
	sh := s.Shard(id)
	if sh == nil {
		return "", ErrShardNotFound
	}

	return sh.CreateSnapshot()
}

// SetShardEnabled enables or disables a shard for read and writes.
func (s *Store) SetShardEnabled(shardID uint64, enabled bool) error {
	sh := s.Shard(shardID)
	if sh == nil {
		return ErrShardNotFound
	}
	sh.SetEnabled(enabled)
	return nil
}

// DeleteShard removes a shard from disk.
func (s *Store) DeleteShard(shardID uint64) error {
	sh := s.Shard(shardID)
	if sh == nil {
		return nil
	}

	// Remove the shard from Store so it's not returned to callers requesting
	// shards. Also mark that this shard is currently being deleted in a separate
	// map so that we do not have to retain the global store lock while deleting
	// files.
	s.mu.Lock()
	if _, ok := s.pendingShardDeletes[shardID]; ok {
		// We are already being deleted? This is possible if delete shard
		// was called twice in sequence before the shard could be removed from
		// the mapping.
		// This is not an error because deleting a shard twice is not an error.
		s.mu.Unlock()
		return nil
	}
	delete(s.shards, shardID)
	s.pendingShardDeletes[shardID] = struct{}{}
	s.mu.Unlock()

	// Ensure the pending deletion flag is cleared on exit.
	defer func() {
		s.mu.Lock()
		defer s.mu.Unlock()
		delete(s.pendingShardDeletes, shardID)
	}()

	// Get the shard's local bitset of series IDs.
	index, err := sh.Index()
	if err != nil {
		return err
	}

	ss := index.SeriesIDSet()

	db := sh.Database()
	if err := sh.Close(); err != nil {
		return err
	}

	// Determine if the shard contained any series that are not present in any
	// other shards in the database.
	shards := s.filterShards(byDatabase(db))

	s.walkShards(shards, func(sh *Shard) error {
		index, err := sh.Index()
		if err != nil {
			return err
		}

		ss.Diff(index.SeriesIDSet())
		return nil
	})

	// Remove any remaining series in the set from the series file, as they don't
	// exist in any of the database's remaining shards.
	if ss.Cardinality() > 0 {
		sfile := s.seriesFile(db)
		if sfile != nil {
			ss.ForEach(func(id uint64) {
				sfile.DeleteSeriesID(id)
			})
		}
	}

	// Remove the on-disk shard data.
	if err := os.RemoveAll(sh.path); err != nil {
		return err
	}

	return os.RemoveAll(sh.walPath)
}

// DeleteDatabase will close all shards associated with a database and remove the directory and files from disk.
func (s *Store) DeleteDatabase(name string) error {
	s.mu.RLock()
	if _, ok := s.databases[name]; !ok {
		s.mu.RUnlock()
		// no files locally, so nothing to do
		return nil
	}
	shards := s.filterShards(func(sh *Shard) bool {
		return sh.database == name
	})
	s.mu.RUnlock()

	if err := s.walkShards(shards, func(sh *Shard) error {
		if sh.database != name {
			return nil
		}

		return sh.Close()
	}); err != nil {
		return err
	}

	dbPath := filepath.Clean(filepath.Join(s.path, name))

	s.mu.Lock()
	defer s.mu.Unlock()

	sfile := s.sfiles[name]
	delete(s.sfiles, name)

	// Close series file.
	if sfile != nil {
		if err := sfile.Close(); err != nil {
			return err
		}
	}

	// extra sanity check to make sure that even if someone named their database "../.."
	// that we don't delete everything because of it, they'll just have extra files forever
	if filepath.Clean(s.path) != filepath.Dir(dbPath) {
		return fmt.Errorf("invalid database directory location for database '%s': %s", name, dbPath)
	}

	if err := os.RemoveAll(dbPath); err != nil {
		return err
	}
	if err := os.RemoveAll(filepath.Join(s.EngineOptions.Config.WALDir, name)); err != nil {
		return err
	}

	for _, sh := range shards {
		delete(s.shards, sh.id)
	}

	// Remove database from store list of databases
	delete(s.databases, name)

	// Remove shared index for database if using inmem index.
	delete(s.indexes, name)

	return nil
}

// DeleteRetentionPolicy will close all shards associated with the
// provided retention policy, remove the retention policy directories on
// both the DB and WAL, and remove all shard files from disk.
func (s *Store) DeleteRetentionPolicy(database, name string) error {
	s.mu.RLock()
	if _, ok := s.databases[database]; !ok {
		s.mu.RUnlock()
		// unknown database, nothing to do
		return nil
	}
	shards := s.filterShards(func(sh *Shard) bool {
		return sh.database == database && sh.retentionPolicy == name
	})
	s.mu.RUnlock()

	// Close and delete all shards under the retention policy on the
	// database.
	if err := s.walkShards(shards, func(sh *Shard) error {
		if sh.database != database || sh.retentionPolicy != name {
			return nil
		}

		return sh.Close()
	}); err != nil {
		return err
	}

	// Remove the retention policy folder.
	rpPath := filepath.Clean(filepath.Join(s.path, database, name))

	// ensure Store's path is the grandparent of the retention policy
	if filepath.Clean(s.path) != filepath.Dir(filepath.Dir(rpPath)) {
		return fmt.Errorf("invalid path for database '%s', retention policy '%s': %s", database, name, rpPath)
	}

	// Remove the retention policy folder.
	if err := os.RemoveAll(filepath.Join(s.path, database, name)); err != nil {
		return err
	}

	// Remove the retention policy folder from the the WAL.
	if err := os.RemoveAll(filepath.Join(s.EngineOptions.Config.WALDir, database, name)); err != nil {
		return err
	}

	s.mu.Lock()
	for _, sh := range shards {
		delete(s.shards, sh.id)
	}
	s.mu.Unlock()
	return nil
}

// DeleteMeasurement removes a measurement and all associated series from a database.
func (s *Store) DeleteMeasurement(database, name string) error {
	s.mu.RLock()
	shards := s.filterShards(byDatabase(database))
	s.mu.RUnlock()

	// Limit to 1 delete for each shard since expanding the measurement into the list
	// of series keys can be very memory intensive if run concurrently.
	limit := limiter.NewFixed(1)
	return s.walkShards(shards, func(sh *Shard) error {
		limit.Take()
		defer limit.Release()

		return sh.DeleteMeasurement([]byte(name))
	})
}

// filterShards returns a slice of shards where fn returns true
// for the shard. If the provided predicate is nil then all shards are returned.
func (s *Store) filterShards(fn func(sh *Shard) bool) []*Shard {
	var shards []*Shard
	if fn == nil {
		shards = make([]*Shard, 0, len(s.shards))
		fn = func(*Shard) bool { return true }
	} else {
		shards = make([]*Shard, 0)
	}

	for _, sh := range s.shards {
		if fn(sh) {
			shards = append(shards, sh)
		}
	}
	return shards
}

// byDatabase provides a predicate for filterShards that matches on the name of
// the database passed in.
func byDatabase(name string) func(sh *Shard) bool {
	return func(sh *Shard) bool {
		return sh.database == name
	}
}

// walkShards apply a function to each shard in parallel. fn must be safe for
// concurrent use. If any of the functions return an error, the first error is
// returned.
func (s *Store) walkShards(shards []*Shard, fn func(sh *Shard) error) error {
	// struct to hold the result of opening each reader in a goroutine
	type res struct {
		err error
	}

	resC := make(chan res)
	var n int

	for _, sh := range shards {
		n++

		go func(sh *Shard) {
			if err := fn(sh); err != nil {
				resC <- res{err: fmt.Errorf("shard %d: %s", sh.id, err)}
				return
			}

			resC <- res{}
		}(sh)
	}

	var err error
	for i := 0; i < n; i++ {
		res := <-resC
		if res.err != nil {
			err = res.err
		}
	}
	close(resC)
	return err
}

// ShardIDs returns a slice of all ShardIDs under management.
func (s *Store) ShardIDs() []uint64 {
	s.mu.RLock()
	defer s.mu.RUnlock()
	return s.shardIDs()
}

func (s *Store) shardIDs() []uint64 {
	a := make([]uint64, 0, len(s.shards))
	for shardID := range s.shards {
		a = append(a, shardID)
	}
	return a
}

// shardsSlice returns an ordered list of shards.
func (s *Store) shardsSlice() []*Shard {
	a := make([]*Shard, 0, len(s.shards))
	for _, sh := range s.shards {
		a = append(a, sh)
	}
	sort.Sort(Shards(a))
	return a
}

// Databases returns the names of all databases managed by the store.
func (s *Store) Databases() []string {
	s.mu.RLock()
	defer s.mu.RUnlock()

	databases := make([]string, 0, len(s.databases))
	for k, _ := range s.databases {
		databases = append(databases, k)
	}
	return databases
}

// DiskSize returns the size of all the shard files in bytes.
// This size does not include the WAL size.
func (s *Store) DiskSize() (int64, error) {
	var size int64

	s.mu.RLock()
	allShards := s.filterShards(nil)
	s.mu.RUnlock()

	for _, sh := range allShards {
		sz, err := sh.DiskSize()
		if err != nil {
			return 0, err
		}
		size += sz
	}
	return size, nil
}

// sketchesForDatabase returns merged sketches for the provided database, by
// walking each shard in the database and merging the sketches found there.
func (s *Store) sketchesForDatabase(dbName string, getSketches func(*Shard) (estimator.Sketch, estimator.Sketch, error)) (estimator.Sketch, estimator.Sketch, error) {
	var (
		ss estimator.Sketch // Sketch estimating number of items.
		ts estimator.Sketch // Sketch estimating number of tombstoned items.
	)

	s.mu.RLock()
	shards := s.filterShards(byDatabase(dbName))
	s.mu.RUnlock()

	// Never return nil sketches. In the case that db exists but no data written
	// return empty sketches.
	if len(shards) == 0 {
		ss, ts = hll.NewDefaultPlus(), hll.NewDefaultPlus()
	}

	// Iterate over all shards for the database and combine all of the sketches.
	for _, shard := range shards {
		s, t, err := getSketches(shard)
		if err != nil {
			return nil, nil, err
		}

		if ss == nil {
			ss, ts = s, t
		} else if err = ss.Merge(s); err != nil {
			return nil, nil, err
		} else if err = ts.Merge(t); err != nil {
			return nil, nil, err
		}
	}
	return ss, ts, nil
}

// SeriesCardinality returns the exact series cardinality for the provided
// database.
//
// Cardinality is calculated exactly by unioning all shards' bitsets of series
// IDs. The result of this method cannot be combined with any other results.
//
func (s *Store) SeriesCardinality(database string) (int64, error) {
	s.mu.RLock()
	shards := s.filterShards(byDatabase(database))
	s.mu.RUnlock()

	var setMu sync.Mutex
	others := make([]*SeriesIDSet, 0, len(shards))

	s.walkShards(shards, func(sh *Shard) error {
		index, err := sh.Index()
		if err != nil {
			return err
		}

		seriesIDs := index.SeriesIDSet()
		setMu.Lock()
		others = append(others, seriesIDs)
		setMu.Unlock()

		return nil
	})

	ss := NewSeriesIDSet()
	ss.Merge(others...)
	return int64(ss.Cardinality()), nil
}

// SeriesSketches returns the sketches associated with the series data in all
// the shards in the provided database.
//
// The returned sketches can be combined with other sketches to provide an
// estimation across distributed databases.
func (s *Store) SeriesSketches(database string) (estimator.Sketch, estimator.Sketch, error) {
	return s.sketchesForDatabase(database, func(sh *Shard) (estimator.Sketch, estimator.Sketch, error) {
		if sh == nil {
			return nil, nil, errors.New("shard nil, can't get cardinality")
		}
		return sh.SeriesSketches()
	})
}

// MeasurementsCardinality returns an estimation of the measurement cardinality
// for the provided database.
//
// Cardinality is calculated using a sketch-based estimation. The result of this
// method cannot be combined with any other results.
func (s *Store) MeasurementsCardinality(database string) (int64, error) {
	ss, ts, err := s.sketchesForDatabase(database, func(sh *Shard) (estimator.Sketch, estimator.Sketch, error) {
		if sh == nil {
			return nil, nil, errors.New("shard nil, can't get cardinality")
		}
		return sh.MeasurementsSketches()
	})

	if err != nil {
		return 0, err
	}
	return int64(ss.Count() - ts.Count()), nil
}

// MeasurementsSketches returns the sketches associated with the measurement
// data in all the shards in the provided database.
//
// The returned sketches can be combined with other sketches to provide an
// estimation across distributed databases.
func (s *Store) MeasurementsSketches(database string) (estimator.Sketch, estimator.Sketch, error) {
	return s.sketchesForDatabase(database, func(sh *Shard) (estimator.Sketch, estimator.Sketch, error) {
		if sh == nil {
			return nil, nil, errors.New("shard nil, can't get cardinality")
		}
		return sh.MeasurementsSketches()
	})
}

// BackupShard will get the shard and have the engine backup since the passed in
// time to the writer.
func (s *Store) BackupShard(id uint64, since time.Time, w io.Writer) error {
	shard := s.Shard(id)
	if shard == nil {
		return fmt.Errorf("shard %d doesn't exist on this server", id)
	}

	path, err := relativePath(s.path, shard.path)
	if err != nil {
		return err
	}

	return shard.Backup(w, path, since)
}

func (s *Store) ExportShard(id uint64, start time.Time, end time.Time, w io.Writer) error {
	shard := s.Shard(id)
	if shard == nil {
		return fmt.Errorf("shard %d doesn't exist on this server", id)
	}

	path, err := relativePath(s.path, shard.path)
	if err != nil {
		return err
	}

	return shard.Export(w, path, start, end)
}

// RestoreShard restores a backup from r to a given shard.
// This will only overwrite files included in the backup.
func (s *Store) RestoreShard(id uint64, r io.Reader) error {
	shard := s.Shard(id)
	if shard == nil {
		return fmt.Errorf("shard %d doesn't exist on this server", id)
	}

	path, err := relativePath(s.path, shard.path)
	if err != nil {
		return err
	}

	return shard.Restore(r, path)
}

// ImportShard imports the contents of r to a given shard.
// All files in the backup are added as new files which may
// cause duplicated data to occur requiring more expensive
// compactions.
func (s *Store) ImportShard(id uint64, r io.Reader) error {
	shard := s.Shard(id)
	if shard == nil {
		return fmt.Errorf("shard %d doesn't exist on this server", id)
	}

	path, err := relativePath(s.path, shard.path)
	if err != nil {
		return err
	}

	return shard.Import(r, path)
}

// ShardRelativePath will return the relative path to the shard, i.e.,
// <database>/<retention>/<id>.
func (s *Store) ShardRelativePath(id uint64) (string, error) {
	shard := s.Shard(id)
	if shard == nil {
		return "", fmt.Errorf("shard %d doesn't exist on this server", id)
	}
	return relativePath(s.path, shard.path)
}

// DeleteSeries loops through the local shards and deletes the series data for
// the passed in series keys.
func (s *Store) DeleteSeries(database string, sources []influxql.Source, condition influxql.Expr) error {
	// Expand regex expressions in the FROM clause.
	a, err := s.ExpandSources(sources)
	if err != nil {
		return err
	} else if len(sources) > 0 && len(a) == 0 {
		return nil
	}
	sources = a

	// Determine deletion time range.
	condition, timeRange, err := influxql.ConditionExpr(condition, nil)
	if err != nil {
		return err
	}

	var min, max int64
	if !timeRange.Min.IsZero() {
		min = timeRange.Min.UnixNano()
	} else {
		min = influxql.MinTime
	}
	if !timeRange.Max.IsZero() {
		max = timeRange.Max.UnixNano()
	} else {
		max = influxql.MaxTime
	}

	s.mu.RLock()
	sfile := s.sfiles[database]
	if sfile == nil {
		s.mu.RUnlock()
		// No series file means nothing has been written to this DB and thus nothing to delete.
		return nil
	}
	shards := s.filterShards(byDatabase(database))
	s.mu.RUnlock()

	// Limit to 1 delete for each shard since expanding the measurement into the list
	// of series keys can be very memory intensive if run concurrently.
	limit := limiter.NewFixed(1)

	return s.walkShards(shards, func(sh *Shard) error {
		// Determine list of measurements from sources.
		// Use all measurements if no FROM clause was provided.
		var names []string
		if len(sources) > 0 {
			for _, source := range sources {
				names = append(names, source.(*influxql.Measurement).Name)
			}
		} else {
			if err := sh.ForEachMeasurementName(func(name []byte) error {
				names = append(names, string(name))
				return nil
			}); err != nil {
				return err
			}
		}
		sort.Strings(names)

		limit.Take()
		defer limit.Release()

		index, err := sh.Index()
		if err != nil {
			return err
		}

		indexSet := IndexSet{Indexes: []Index{index}, SeriesFile: sfile}
		// Find matching series keys for each measurement.
		for _, name := range names {
			itr, err := indexSet.MeasurementSeriesByExprIterator([]byte(name), condition)
			if err != nil {
				return err
			} else if itr == nil {
				continue
			}
			defer itr.Close()
			if err := sh.DeleteSeriesRange(NewSeriesIteratorAdapter(sfile, itr), min, max); err != nil {
				return err
			}

		}

		return nil
	})
}

// ExpandSources expands sources against all local shards.
func (s *Store) ExpandSources(sources influxql.Sources) (influxql.Sources, error) {
	shards := func() Shards {
		s.mu.RLock()
		defer s.mu.RUnlock()
		return Shards(s.shardsSlice())
	}()
	return shards.ExpandSources(sources)
}

// WriteToShard writes a list of points to a shard identified by its ID.
func (s *Store) WriteToShard(shardID uint64, points []models.Point) error {
	s.mu.RLock()

	select {
	case <-s.closing:
		s.mu.RUnlock()
		return ErrStoreClosed
	default:
	}

	sh := s.shards[shardID]
	if sh == nil {
		s.mu.RUnlock()
		return ErrShardNotFound
	}
	s.mu.RUnlock()

	// Ensure snapshot compactions are enabled since the shard might have been cold
	// and disabled by the monitor.
	if sh.IsIdle() {
		sh.SetCompactionsEnabled(true)
	}

	return sh.WritePoints(points)
}

// MeasurementNames returns a slice of all measurements. Measurements accepts an
// optional condition expression. If cond is nil, then all measurements for the
// database will be returned.
func (s *Store) MeasurementNames(auth query.Authorizer, database string, cond influxql.Expr) ([][]byte, error) {
	s.mu.RLock()
	shards := s.filterShards(byDatabase(database))
	s.mu.RUnlock()

	sfile := s.seriesFile(database)
	if sfile == nil {
		return nil, nil
	}

	// Build indexset.
	is := IndexSet{Indexes: make([]Index, 0, len(shards)), SeriesFile: sfile}
	for _, sh := range shards {
		index, err := sh.Index()
		if err != nil {
			return nil, err
		}
		is.Indexes = append(is.Indexes, index)
	}
	is = is.DedupeInmemIndexes()
	return is.MeasurementNamesByExpr(auth, cond)
}

// MeasurementSeriesCounts returns the number of measurements and series in all
// the shards' indices.
func (s *Store) MeasurementSeriesCounts(database string) (measuments int, series int) {
	// TODO: implement me
	return 0, 0
}

type TagKeys struct {
	Measurement string
	Keys        []string
}

type TagKeysSlice []TagKeys

func (a TagKeysSlice) Len() int           { return len(a) }
func (a TagKeysSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a TagKeysSlice) Less(i, j int) bool { return a[i].Measurement < a[j].Measurement }

// TagKeys returns the tag keys in the given database, matching the condition.
func (s *Store) TagKeys(auth query.Authorizer, shardIDs []uint64, cond influxql.Expr) ([]TagKeys, error) {
	if len(shardIDs) == 0 {
		return nil, nil
	}

	measurementExpr := influxql.CloneExpr(cond)
	measurementExpr = influxql.Reduce(influxql.RewriteExpr(measurementExpr, func(e influxql.Expr) influxql.Expr {
		switch e := e.(type) {
		case *influxql.BinaryExpr:
			switch e.Op {
			case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
				tag, ok := e.LHS.(*influxql.VarRef)
				if !ok || tag.Val != "_name" {
					return nil
				}
			}
		}
		return e
	}), nil)

	filterExpr := influxql.CloneExpr(cond)
	filterExpr = influxql.Reduce(influxql.RewriteExpr(filterExpr, func(e influxql.Expr) influxql.Expr {
		switch e := e.(type) {
		case *influxql.BinaryExpr:
			switch e.Op {
			case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
				tag, ok := e.LHS.(*influxql.VarRef)
				if !ok || strings.HasPrefix(tag.Val, "_") {
					return nil
				}
			}
		}
		return e
	}), nil)

	// Get all the shards we're interested in.
	is := IndexSet{Indexes: make([]Index, 0, len(shardIDs))}
	s.mu.RLock()
	for _, sid := range shardIDs {
		shard, ok := s.shards[sid]
		if !ok {
			continue
		}

		if is.SeriesFile == nil {
			is.SeriesFile = shard.sfile
		}

		is.Indexes = append(is.Indexes, shard.index)
	}
	s.mu.RUnlock()

	// Determine list of measurements.
	is = is.DedupeInmemIndexes()
	names, err := is.MeasurementNamesByExpr(nil, measurementExpr)
	if err != nil {
		return nil, err
	}

	// Iterate over each measurement.
	var results []TagKeys
	for _, name := range names {

		// Build keyset over all indexes for measurement.
		tagKeySet, err := is.MeasurementTagKeysByExpr(name, nil)
		if err != nil {
			return nil, err
		} else if len(tagKeySet) == 0 {
			continue
		}

		keys := make([]string, 0, len(tagKeySet))
		// If no tag value filter is present then all the tag keys can be returned
		// If they have authorized series associated with them.
		if filterExpr == nil {
			for tagKey := range tagKeySet {
				ok, err := is.TagKeyHasAuthorizedSeries(auth, []byte(name), []byte(tagKey))
				if err != nil {
					return nil, err
				} else if ok {
					keys = append(keys, tagKey)
				}
			}
			sort.Strings(keys)

			// Add to resultset.
			results = append(results, TagKeys{
				Measurement: string(name),
				Keys:        keys,
			})

			continue
		}

		// Tag filter provided so filter keys first.

		// Sort the tag keys.
		for k := range tagKeySet {
			keys = append(keys, k)
		}
		sort.Strings(keys)

		// Filter against tag values, skip if no values exist.
		values, err := is.MeasurementTagKeyValuesByExpr(auth, name, keys, filterExpr, true)
		if err != nil {
			return nil, err
		}

		// Filter final tag keys using the matching values. If a key has one or
		// more matching values then it will be included in the final set.
		finalKeys := keys[:0] // Use same backing array as keys to save allocation.
		for i, k := range keys {
			if len(values[i]) > 0 {
				// Tag key k has one or more matching tag values.
				finalKeys = append(finalKeys, k)
			}
		}

		// Add to resultset.
		results = append(results, TagKeys{
			Measurement: string(name),
			Keys:        finalKeys,
		})
	}
	return results, nil
}

type TagValues struct {
	Measurement string
	Values      []KeyValue
}

type TagValuesSlice []TagValues

func (a TagValuesSlice) Len() int           { return len(a) }
func (a TagValuesSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a TagValuesSlice) Less(i, j int) bool { return a[i].Measurement < a[j].Measurement }

// tagValues is a temporary representation of a TagValues. Rather than allocating
// KeyValues as we build up a TagValues object, We hold off allocating KeyValues
// until we have merged multiple tagValues together.
type tagValues struct {
	name   []byte
	keys   []string
	values [][]string
}

// Is a slice of tagValues that can be sorted by measurement.
type tagValuesSlice []tagValues

func (a tagValuesSlice) Len() int           { return len(a) }
func (a tagValuesSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a tagValuesSlice) Less(i, j int) bool { return bytes.Compare(a[i].name, a[j].name) == -1 }

// TagValues returns the tag keys and values for the provided shards, where the
// tag values satisfy the provided condition.
func (s *Store) TagValues(auth query.Authorizer, shardIDs []uint64, cond influxql.Expr) ([]TagValues, error) {
	if cond == nil {
		return nil, errors.New("a condition is required")
	}

	measurementExpr := influxql.CloneExpr(cond)
	measurementExpr = influxql.Reduce(influxql.RewriteExpr(measurementExpr, func(e influxql.Expr) influxql.Expr {
		switch e := e.(type) {
		case *influxql.BinaryExpr:
			switch e.Op {
			case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
				tag, ok := e.LHS.(*influxql.VarRef)
				if !ok || tag.Val != "_name" {
					return nil
				}
			}
		}
		return e
	}), nil)

	filterExpr := influxql.CloneExpr(cond)
	filterExpr = influxql.Reduce(influxql.RewriteExpr(filterExpr, func(e influxql.Expr) influxql.Expr {
		switch e := e.(type) {
		case *influxql.BinaryExpr:
			switch e.Op {
			case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
				tag, ok := e.LHS.(*influxql.VarRef)
				if !ok || strings.HasPrefix(tag.Val, "_") {
					return nil
				}
			}
		}
		return e
	}), nil)

	// Build index set to work on.
	is := IndexSet{Indexes: make([]Index, 0, len(shardIDs))}
	s.mu.RLock()
	for _, sid := range shardIDs {
		shard, ok := s.shards[sid]
		if !ok {
			continue
		}

		if is.SeriesFile == nil {
			is.SeriesFile = shard.sfile
		}
		is.Indexes = append(is.Indexes, shard.index)
	}
	s.mu.RUnlock()
	is = is.DedupeInmemIndexes()

	// Stores each list of TagValues for each measurement.
	var allResults []tagValues
	var maxMeasurements int // Hint as to lower bound on number of measurements.
	// names will be sorted by MeasurementNamesByExpr.
	// Authorisation can be done later on, when series may have been filtered
	// out by other conditions.
	names, err := is.MeasurementNamesByExpr(nil, measurementExpr)
	if err != nil {
		return nil, err
	}

	if len(names) > maxMeasurements {
		maxMeasurements = len(names)
	}

	if allResults == nil {
		allResults = make([]tagValues, 0, len(is.Indexes)*len(names)) // Assuming all series in all shards.
	}

	// Iterate over each matching measurement in the shard. For each
	// measurement we'll get the matching tag keys (e.g., when a WITH KEYS)
	// statement is used, and we'll then use those to fetch all the relevant
	// values from matching series. Series may be filtered using a WHERE
	// filter.
	for _, name := range names {
		// Determine a list of keys from condition.
		keySet, err := is.MeasurementTagKeysByExpr(name, cond)
		if err != nil {
			return nil, err
		}

		if len(keySet) == 0 {
			// No matching tag keys for this measurement
			continue
		}

		result := tagValues{
			name: name,
			keys: make([]string, 0, len(keySet)),
		}

		// Add the keys to the tagValues and sort them.
		for k := range keySet {
			result.keys = append(result.keys, k)
		}
		sort.Sort(sort.StringSlice(result.keys))

		// get all the tag values for each key in the keyset.
		// Each slice in the results contains the sorted values associated
		// associated with each tag key for the measurement from the key set.
		if result.values, err = is.MeasurementTagKeyValuesByExpr(auth, name, result.keys, filterExpr, true); err != nil {
			return nil, err
		}

		// remove any tag keys that didn't have any authorized values
		j := 0
		for i := range result.keys {
			if len(result.values[i]) == 0 {
				continue
			}

			result.keys[j] = result.keys[i]
			result.values[j] = result.values[i]
			j++
		}
		result.keys = result.keys[:j]
		result.values = result.values[:j]

		// only include result if there are keys with values
		if len(result.keys) > 0 {
			allResults = append(allResults, result)
		}
	}

	result := make([]TagValues, 0, maxMeasurements)

	// We need to sort all results by measurement name.
	if len(is.Indexes) > 1 {
		sort.Sort(tagValuesSlice(allResults))
	}

	// The next stage is to merge the tagValue results for each shard's measurements.
	var i, j int
	// Used as a temporary buffer in mergeTagValues. There can be at most len(shards)
	// instances of tagValues for a given measurement.
	idxBuf := make([][2]int, 0, len(is.Indexes))
	for i < len(allResults) {
		// Gather all occurrences of the same measurement for merging.
		for j+1 < len(allResults) && bytes.Equal(allResults[j+1].name, allResults[i].name) {
			j++
		}

		// An invariant is that there can't be more than n instances of tag
		// key value pairs for a given measurement, where n is the number of
		// shards.
		if got, exp := j-i+1, len(is.Indexes); got > exp {
			return nil, fmt.Errorf("unexpected results returned engine. Got %d measurement sets for %d shards", got, exp)
		}

		nextResult := mergeTagValues(idxBuf, allResults[i:j+1]...)
		i = j + 1
		if len(nextResult.Values) > 0 {
			result = append(result, nextResult)
		}
	}
	return result, nil
}

// mergeTagValues merges multiple sorted sets of temporary tagValues using a
// direct k-way merge whilst also removing duplicated entries. The result is a
// single TagValue type.
//
// TODO(edd): a Tournament based merge (see: Knuth's TAOCP 5.4.1) might be more
// appropriate at some point.
//
func mergeTagValues(valueIdxs [][2]int, tvs ...tagValues) TagValues {
	var result TagValues
	if len(tvs) == 0 {
		return TagValues{}
	} else if len(tvs) == 1 {
		result.Measurement = string(tvs[0].name)
		// TODO(edd): will be too small likely. Find a hint?
		result.Values = make([]KeyValue, 0, len(tvs[0].values))

		for ki, key := range tvs[0].keys {
			for _, value := range tvs[0].values[ki] {
				result.Values = append(result.Values, KeyValue{Key: key, Value: value})
			}
		}
		return result
	}

	result.Measurement = string(tvs[0].name)

	var maxSize int
	for _, tv := range tvs {
		if len(tv.values) > maxSize {
			maxSize = len(tv.values)
		}
	}
	result.Values = make([]KeyValue, 0, maxSize) // This will likely be too small but it's a start.

	// Resize and reset to the number of TagValues we're merging.
	valueIdxs = valueIdxs[:len(tvs)]
	for i := 0; i < len(valueIdxs); i++ {
		valueIdxs[i][0], valueIdxs[i][1] = 0, 0
	}

	var (
		j              int
		keyCmp, valCmp int
	)

	for {
		// Which of the provided TagValue sets currently holds the smallest element.
		// j is the candidate we're going to next pick for the result set.
		j = -1

		// Find the smallest element
		for i := 0; i < len(tvs); i++ {
			if valueIdxs[i][0] >= len(tvs[i].keys) {
				continue // We have completely drained all tag keys and values for this shard.
			} else if len(tvs[i].values[valueIdxs[i][0]]) == 0 {
				// There are no tag values for these keys.
				valueIdxs[i][0]++
				valueIdxs[i][1] = 0
				continue
			} else if j == -1 {
				// We haven't picked a best TagValues set yet. Pick this one.
				j = i
				continue
			}

			// It this tag key is lower than the candidate's tag key
			keyCmp = strings.Compare(tvs[i].keys[valueIdxs[i][0]], tvs[j].keys[valueIdxs[j][0]])
			if keyCmp == -1 {
				j = i
			} else if keyCmp == 0 {
				valCmp = strings.Compare(tvs[i].values[valueIdxs[i][0]][valueIdxs[i][1]], tvs[j].values[valueIdxs[j][0]][valueIdxs[j][1]])
				// Same tag key but this tag value is lower than the candidate.
				if valCmp == -1 {
					j = i
				} else if valCmp == 0 {
					// Duplicate tag key/value pair.... Remove and move onto
					// the next value for shard i.
					valueIdxs[i][1]++
					if valueIdxs[i][1] >= len(tvs[i].values[valueIdxs[i][0]]) {
						// Drained all these tag values, move onto next key.
						valueIdxs[i][0]++
						valueIdxs[i][1] = 0
					}
				}
			}
		}

		// We could have drained all of the TagValue sets and be done...
		if j == -1 {
			break
		}

		// Append the smallest KeyValue
		result.Values = append(result.Values, KeyValue{
			Key:   string(tvs[j].keys[valueIdxs[j][0]]),
			Value: tvs[j].values[valueIdxs[j][0]][valueIdxs[j][1]],
		})
		// Increment the indexes for the chosen TagValue.
		valueIdxs[j][1]++
		if valueIdxs[j][1] >= len(tvs[j].values[valueIdxs[j][0]]) {
			// Drained all these tag values, move onto next key.
			valueIdxs[j][0]++
			valueIdxs[j][1] = 0
		}
	}
	return result
}

func (s *Store) monitorShards() {
	t := time.NewTicker(10 * time.Second)
	defer t.Stop()
	t2 := time.NewTicker(time.Minute)
	defer t2.Stop()
	for {
		select {
		case <-s.closing:
			return
		case <-t.C:
			s.mu.RLock()
			for _, sh := range s.shards {
				if sh.IsIdle() {
					if err := sh.Free(); err != nil {
						s.Logger.Warn("Error while freeing cold shard resources", zap.Error(err))
					}
				} else {
					sh.SetCompactionsEnabled(true)
				}
			}
			s.mu.RUnlock()
		case <-t2.C:
			if s.EngineOptions.Config.MaxValuesPerTag == 0 {
				continue
			}

			s.mu.RLock()
			shards := s.filterShards(func(sh *Shard) bool {
				return sh.IndexType() == "inmem"
			})
			s.mu.RUnlock()

			// No inmem shards...
			if len(shards) == 0 {
				continue
			}

			var dbLock sync.Mutex
			databases := make(map[string]struct{}, len(shards))

			s.walkShards(shards, func(sh *Shard) error {
				db := sh.database

				// Only process 1 shard from each database
				dbLock.Lock()
				if _, ok := databases[db]; ok {
					dbLock.Unlock()
					return nil
				}
				databases[db] = struct{}{}
				dbLock.Unlock()

				sfile := s.seriesFile(sh.database)
				if sfile == nil {
					return nil
				}

				firstShardIndex, err := sh.Index()
				if err != nil {
					return err
				}

				index, err := sh.Index()
				if err != nil {
					return err
				}

				// inmem shards share the same index instance so just use the first one to avoid
				// allocating the same measurements repeatedly
				indexSet := IndexSet{Indexes: []Index{firstShardIndex}, SeriesFile: sfile}
				names, err := indexSet.MeasurementNamesByExpr(nil, nil)
				if err != nil {
					s.Logger.Warn("Cannot retrieve measurement names", zap.Error(err))
					return nil
				}

				indexSet.Indexes = []Index{index}
				for _, name := range names {
					indexSet.ForEachMeasurementTagKey(name, func(k []byte) error {
						n := sh.TagKeyCardinality(name, k)
						perc := int(float64(n) / float64(s.EngineOptions.Config.MaxValuesPerTag) * 100)
						if perc > 100 {
							perc = 100
						}

						// Log at 80, 85, 90-100% levels
						if perc == 80 || perc == 85 || perc >= 90 {
							s.Logger.Warn("max-values-per-tag limit may be exceeded soon",
								zap.String("perc", fmt.Sprintf("%d%%", perc)),
								zap.Int("n", n),
								zap.Int("max", s.EngineOptions.Config.MaxValuesPerTag),
								logger.Database(db),
								zap.ByteString("measurement", name),
								zap.ByteString("tag", k))
						}
						return nil
					})
				}
				return nil
			})
		}
	}
}

// KeyValue holds a string key and a string value.
type KeyValue struct {
	Key, Value string
}

// KeyValues is a sortable slice of KeyValue.
type KeyValues []KeyValue

// Len implements sort.Interface.
func (a KeyValues) Len() int { return len(a) }

// Swap implements sort.Interface.
func (a KeyValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] }

// Less implements sort.Interface. Keys are compared before values.
func (a KeyValues) Less(i, j int) bool {
	ki, kj := a[i].Key, a[j].Key
	if ki == kj {
		return a[i].Value < a[j].Value
	}
	return ki < kj
}

// decodeStorePath extracts the database and retention policy names
// from a given shard or WAL path.
func decodeStorePath(shardOrWALPath string) (database, retentionPolicy string) {
	// shardOrWALPath format: /maybe/absolute/base/then/:database/:retentionPolicy/:nameOfShardOrWAL

	// Discard the last part of the path (the shard name or the wal name).
	path, _ := filepath.Split(filepath.Clean(shardOrWALPath))

	// Extract the database and retention policy.
	path, rp := filepath.Split(filepath.Clean(path))
	_, db := filepath.Split(filepath.Clean(path))
	return db, rp
}

// relativePath will expand out the full paths passed in and return
// the relative shard path from the store
func relativePath(storePath, shardPath string) (string, error) {
	path, err := filepath.Abs(storePath)
	if err != nil {
		return "", fmt.Errorf("store abs path: %s", err)
	}

	fp, err := filepath.Abs(shardPath)
	if err != nil {
		return "", fmt.Errorf("file abs path: %s", err)
	}

	name, err := filepath.Rel(path, fp)
	if err != nil {
		return "", fmt.Errorf("file rel path: %s", err)
	}

	return name, nil
}

type shardSet struct {
	store *Store
	db    string
}

func (s shardSet) ForEach(f func(ids *SeriesIDSet)) error {
	s.store.mu.RLock()
	shards := s.store.filterShards(byDatabase(s.db))
	s.store.mu.RUnlock()

	for _, sh := range shards {
		idx, err := sh.Index()
		if err != nil {
			return err
		}

		f(idx.SeriesIDSet())
	}
	return nil
}
-												add canonical paths

											
										
										
											2016-02-10 18:30:52 +00:00
+								package tsdb // import "github.com/influxdata/influxdb/tsdb"
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
 								import (
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
+									"bytes"
-												refactor query executor

This commit moves the `QueryExecutor` to the `cluster` package
and provides an interface to it inside the `influxql` package.

											
										
										
											2016-02-12 22:10:02 +00:00
+									"errors"
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+									"fmt"
-												Implement backup/restore for TSM.

This changes backup and restore to work for TSM. It breaks it for b1 and bz1, but since those are getting removed it's ok.

The backup runs against any host that is specified and can backup either the metasstore, a database, specific retention policy, or a specific shard. It can also take incremental backups with the `since` flag, which will only backup TSM files that have been created since that timestamp.

The backup is safe to run online. However, for shards that are still hot for writes, they won't be able to create new TSM files while the backup for that single shard runs. If the backup isn't too large and the write throughput isn't too high this shouldn't be a problem since the writes will just go into the WAL cache.

											
										
										
											2015-12-25 13:23:22 +00:00
+									"io"
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
+									"io/ioutil"
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+									"os"
 									"path/filepath"
-												Limit concurrent shards loading to number of cores available

											
										
										
											2016-05-13 21:10:18 +00:00
+									"runtime"
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									"sort"
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
+									"strconv"
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									"strings"
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
+									"sync"
-												Add PerformMaintenance to store for flushes and compactions.

Also fixed shard to work again with b1 and bz1 engines.

											
										
										
											2015-09-29 02:50:00 +00:00
+									"time"
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+									"github.com/influxdata/influxdb/logger"
-												rename influxdb/influxdb to influxdata/influxdb

This commit changes all the import and URL references from:

    github.com/influxdb/influxdb

to:

    github.com/influxdata/influxdb

											
										
										
											2016-02-10 17:26:18 +00:00
+									"github.com/influxdata/influxdb/models"
-												Wire in HLL estimator

											
										
										
											2016-09-21 15:04:37 +00:00
+									"github.com/influxdata/influxdb/pkg/estimator"
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+									"github.com/influxdata/influxdb/pkg/estimator/hll"
-												Rename throttle package to limiter

											
										
										
											2016-07-18 18:00:58 +00:00
+									"github.com/influxdata/influxdb/pkg/limiter"
-												auth: add series auth to 'show tag values'

											
										
										
											2017-09-19 14:38:16 +00:00
+									"github.com/influxdata/influxdb/query"
-												influxdata/influxdb/influxql -> influxdata/influxql

											
										
										
											2017-10-30 21:40:26 +00:00
+									"github.com/influxdata/influxql"
-												Update the zap logger dependency

The previous sha was taken from a revision on a devel branch that I
thought would continue staying in the tree after it was merged. That
revision was rebased away and the API was changed for the logger.

This updates the usage of the logger and adds a simple package for
constructing the base logger.

The 1.0 version of zap changed the format of the default console logger
so this change moves over to this new logger instead of attempting to
retain backwards compatibility with the old format.

											
										
										
											2017-10-27 17:27:01 +00:00
+									"go.uber.org/zap"
-												Provide warning when mixed index used on db

											
										
										
											2018-04-25 12:47:41 +00:00
+									"go.uber.org/zap/zapcore"
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+								)
 								var (
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+									// ErrShardNotFound is returned when trying to get a non existing shard.
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+									ErrShardNotFound = fmt.Errorf("shard not found")
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+									// ErrStoreClosed is returned when trying to use a closed Store.
-												Lint tsdb and tsdb/engine package

											
										
										
											2016-02-10 20:04:18 +00:00
+									ErrStoreClosed = fmt.Errorf("store is closed")
-												TSM: TSMReader.Close blocks until reads complete

											
										
										
											2018-04-30 17:09:27 +00:00
+									// ErrShardDeletion is returned when trying to create a shard that is being deleted
 									ErrShardDeletion = errors.New("shard is being deleted")
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+								)
-												Move series and measurement stats to store

											
										
										
											2016-09-26 13:03:31 +00:00
+								// Statistics gathered by the store.
 								const (
 									statDatabaseSeries       = "numSeries"       // number of series in a database
 									statDatabaseMeasurements = "numMeasurements" // number of measurements in a database
 								)
-												rename series directory

											
										
										
											2018-01-03 15:44:58 +00:00
+								// SeriesFileDirectory is the name of the directory containing series files for
 								// a database.
-												Change series dir location

											
										
										
											2018-01-05 16:40:23 +00:00
+								const SeriesFileDirectory = "_series"
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
-												Lint tsdb and tsdb/engine package

											
										
										
											2016-02-10 20:04:18 +00:00
+								// Store manages shards and indexes for databases.
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+								type Store struct {
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									mu                sync.RWMutex
 									shards            map[uint64]*Shard
 									databases         map[string]struct{}
 									sfiles            map[string]*SeriesFile
 									SeriesFileMaxSize int64 // Determines size of series file mmap. Can be altered in tests.
 									path              string
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
+									// shared per-database indexes, only if using "inmem".
 									indexes map[string]interface{}
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+									// Maintains a set of shards that are in the process of deletion.
 									// This prevents new shards from being created while old ones are being deleted.
 									pendingShardDeletes map[uint64]struct{}
-												refactor storage to engine

											
										
										
											2015-07-22 14:53:20 +00:00
+									EngineOptions EngineOptions
-												reintegrating in-memory index

											
										
										
											2016-11-15 16:20:00 +00:00
-												Update the zap logger dependency

The previous sha was taken from a revision on a devel branch that I
thought would continue staying in the tree after it was merged. That
revision was rebased away and the API was changed for the logger.

This updates the usage of the logger and adds a simple package for
constructing the base logger.

The 1.0 version of zap changed the format of the default console logger
so this change moves over to this new logger instead of attempting to
retain backwards compatibility with the old format.

											
										
										
											2017-10-27 17:27:01 +00:00
+									baseLogger *zap.Logger
 									Logger     *zap.Logger
-												Make logging output location more programmatically configurable (#6213)

This has various benefits:

- Users embedding InfluxDB within other Go programs can specify a different logger / prefix easily.
- More consistent with code used elsewhere in InfluxDB (e.g. services, other `run.Server.*` fields, etc).
- This is also more efficient, because it means `executeQuery` no longer allocates a single `*log.Logger` each time it is called.

											
										
										
											2016-04-20 20:07:08 +00:00
-												Add PerformMaintenance to store for flushes and compactions.

Also fixed shard to work again with b1 and bz1 engines.

											
										
										
											2015-09-29 02:50:00 +00:00
+									closing chan struct{}
 									wg      sync.WaitGroup
-												Fix panic: assignment to entry in nil map

Closing the store did not properly return an error for in-flight
writes because the closing channel was set to nil when closed.  A
nil channel is not selectable so writes continue on past the guard
checks and trigger panics.

											
										
										
											2015-10-06 20:00:31 +00:00
+									opened  bool
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+								}
-												Lint tsdb and tsdb/engine package

											
										
										
											2016-02-10 20:04:18 +00:00
+								// NewStore returns a new store with the given path and a default configuration.
 								// The returned store must be initialized by calling Open before using it.
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+								func NewStore(path string) *Store {
-												Update the zap logger dependency

The previous sha was taken from a revision on a devel branch that I
thought would continue staying in the tree after it was merged. That
revision was rebased away and the API was changed for the logger.

This updates the usage of the logger and adds a simple package for
constructing the base logger.

The 1.0 version of zap changed the format of the default console logger
so this change moves over to this new logger instead of attempting to
retain backwards compatibility with the old format.

											
										
										
											2017-10-27 17:27:01 +00:00
+									logger := zap.NewNop()
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									return &Store{
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+										databases:           make(map[string]struct{}),
 										path:                path,
 										sfiles:              make(map[string]*SeriesFile),
 										indexes:             make(map[string]interface{}),
 										pendingShardDeletes: make(map[uint64]struct{}),
 										EngineOptions:       NewEngineOptions(),
 										Logger:              logger,
 										baseLogger:          logger,
-												Make logging output location more programmatically configurable (#6213)

This has various benefits:

- Users embedding InfluxDB within other Go programs can specify a different logger / prefix easily.
- More consistent with code used elsewhere in InfluxDB (e.g. services, other `run.Server.*` fields, etc).
- This is also more efficient, because it means `executeQuery` no longer allocates a single `*log.Logger` each time it is called.

											
										
										
											2016-04-20 20:07:08 +00:00
+									}
 								}
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+								// WithLogger sets the logger for the store.
-												Update the zap logger dependency

The previous sha was taken from a revision on a devel branch that I
thought would continue staying in the tree after it was merged. That
revision was rebased away and the API was changed for the logger.

This updates the usage of the logger and adds a simple package for
constructing the base logger.

The 1.0 version of zap changed the format of the default console logger
so this change moves over to this new logger instead of attempting to
retain backwards compatibility with the old format.

											
										
										
											2017-10-27 17:27:01 +00:00
+								func (s *Store) WithLogger(log *zap.Logger) {
-												Switch logging to use structured logging everywhere

The logging library has been switched to use uber-go/zap. While the
logging has been changed to use structured logging, this commit does not
change any of the logging statements to take advantage of the new
structured log or new log levels. Those changes will come in future
commits.

											
										
										
											2016-12-01 18:26:23 +00:00
+									s.baseLogger = log
 									s.Logger = log.With(zap.String("service", "store"))
 									for _, sh := range s.shards {
 										sh.WithLogger(s.baseLogger)
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									}
 								}
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+								// Statistics returns statistics for period monitoring.
-												Refactoring the monitor service to avoid expvar

Truncate the time interval output of the monitor service to be on even
time intervals rather than on every minute based on the start time. This
normalizes the output from the monitor service.

											
										
										
											2016-07-07 16:13:56 +00:00
+								func (s *Store) Statistics(tags map[string]string) []models.Statistic {
 									s.mu.RLock()
 									shards := s.shardsSlice()
 									s.mu.RUnlock()
-												Move series and measurement stats to store

											
										
										
											2016-09-26 13:03:31 +00:00
+									// Add all the series and measurements cardinality estimations.
 									databases := s.Databases()
 									statistics := make([]models.Statistic, 0, len(databases))
 									for _, database := range databases {
 										sc, err := s.SeriesCardinality(database)
 										if err != nil {
-												Update logging calls to take advantage of structured logging

Includes a style guide that details the basics of how to log.

											
										
										
											2018-02-15 21:47:08 +00:00
+											s.Logger.Info("Cannot retrieve series cardinality", zap.Error(err))
-												Move series and measurement stats to store

											
										
										
											2016-09-26 13:03:31 +00:00
+											continue
 										}
 										mc, err := s.MeasurementsCardinality(database)
 										if err != nil {
-												Update logging calls to take advantage of structured logging

Includes a style guide that details the basics of how to log.

											
										
										
											2018-02-15 21:47:08 +00:00
+											s.Logger.Info("Cannot retrieve measurement cardinality", zap.Error(err))
-												Move series and measurement stats to store

											
										
										
											2016-09-26 13:03:31 +00:00
+											continue
 										}
 										statistics = append(statistics, models.Statistic{
 											Name: "database",
 											Tags: models.StatisticTags{"database": database}.Merge(tags),
 											Values: map[string]interface{}{
 												statDatabaseSeries:       sc,
 												statDatabaseMeasurements: mc,
 											},
 										})
 									}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// Gather all statistics for all shards.
-												Refactoring the monitor service to avoid expvar

Truncate the time interval output of the monitor service to be on even
time intervals rather than on every minute based on the start time. This
normalizes the output from the monitor service.

											
										
										
											2016-07-07 16:13:56 +00:00
+									for _, shard := range shards {
 										statistics = append(statistics, shard.Statistics(tags)...)
 									}
 									return statistics
 								}
-												Get inmem index bytes without double-counting

											
										
										
											2018-05-09 21:35:38 +00:00
+								func (s *Store) IndexBytes() int {
-												Cleanup index memory footprint counting code (#9828)

* Fix IndexSet.DedupeInmemIndexes

* Cleanup index memory footprint code

											
										
										
											2018-05-15 18:25:19 +00:00
+									// Build index set to work on.
 									is := IndexSet{Indexes: make([]Index, 0, len(s.shardIDs()))}
 									s.mu.RLock()
 									for _, sid := range s.shardIDs() {
 										shard, ok := s.shards[sid]
 										if !ok {
 											continue
 										}
 										if is.SeriesFile == nil {
 											is.SeriesFile = shard.sfile
 										}
 										is.Indexes = append(is.Indexes, shard.index)
-												Get inmem index bytes without double-counting

											
										
										
											2018-05-09 21:35:38 +00:00
+									}
-												Cleanup index memory footprint counting code (#9828)

* Fix IndexSet.DedupeInmemIndexes

* Cleanup index memory footprint code

											
										
										
											2018-05-15 18:25:19 +00:00
+									s.mu.RUnlock()
 									is = is.DedupeInmemIndexes()
-												Get inmem index bytes without double-counting

											
										
										
											2018-05-09 21:35:38 +00:00
-												Cleanup index memory footprint counting code (#9828)

* Fix IndexSet.DedupeInmemIndexes

* Cleanup index memory footprint code

											
										
										
											2018-05-15 18:25:19 +00:00
+									var b int
 									for _, idx := range is.Indexes {
 										b += idx.Bytes()
-												Get inmem index bytes without double-counting

											
										
										
											2018-05-09 21:35:38 +00:00
+									}
-												Cleanup index memory footprint counting code (#9828)

* Fix IndexSet.DedupeInmemIndexes

* Cleanup index memory footprint code

											
										
										
											2018-05-15 18:25:19 +00:00
 									return b
-												Get inmem index bytes without double-counting

											
										
										
											2018-05-09 21:35:38 +00:00
+								}
-												Refactor backup and restore

This commit updates the snapshot code as well as the "backup" and
"restore" command to work with the new architecture.

											
										
										
											2015-06-08 19:07:05 +00:00
+								// Path returns the store's root path.
 								func (s *Store) Path() string { return s.path }
-												Lint tsdb and tsdb/engine package

											
										
										
											2016-02-10 20:04:18 +00:00
+								// Open initializes the store, creating all necessary directories, loading all
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// shards as well as initializing periodic maintenance of them.
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+								func (s *Store) Open() error {
 									s.mu.Lock()
 									defer s.mu.Unlock()
-												Fix race on store close

There was a very small window where it was possible to deadlock during
the close of the Store. When closing, the Store waited on its Waitgroup
under a `Lock`. Naturally, all other goroutines must have been in a
position to call `Done` on the `Waitgroup` before the `Wait` call in
`Close` would return.

For the goroutine running the `monitorShards` method it was possible
that it would be unable to do this. Specifically, if the `monitorShards`
goroutine was jumping into the `t.C` case as the `Close()` goroutine was
acquiring the `Lock` then then `monitorShards` goroutine would be unable
to acquire the `RLock`. Since it would also be unable to progress around
its loop to jump into the `s.closing` case, it would be unable to call
`Done` on the `WaitGroup` and we would have a deadlock.

This was identified during an AppVeyor CI run, though I was unable to
reproduce this locally.

											
										
										
											2017-11-07 12:48:43 +00:00
+									if s.opened {
 										// Already open
 										return nil
 									}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									s.closing = make(chan struct{})
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									s.shards = map[uint64]*Shard{}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
-												Update logging calls to take advantage of structured logging

Includes a style guide that details the basics of how to log.

											
										
										
											2018-02-15 21:47:08 +00:00
+									s.Logger.Info("Using data dir", zap.String("path", s.Path()))
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
 									// Create directory.
 									if err := os.MkdirAll(s.path, 0777); err != nil {
 										return err
 									}
 									if err := s.loadShards(); err != nil {
 										return err
 									}
 									s.opened = true
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
 									if !s.EngineOptions.MonitorDisabled {
-												fix(tsdb/store): Fix hang when closing Store if monitor is disabled

											
										
										
											2018-06-12 14:49:13 +00:00
+										s.wg.Add(1)
-												pr(influx-tools): Improvements per PR review

											
										
										
											2018-06-13 16:13:42 +00:00
+										go func() {
 											s.wg.Done()
 											s.monitorShards()
 										}()
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+									}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
 									return nil
 								}
 								func (s *Store) loadShards() error {
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// res holds the result from opening each shard in a goroutine
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+									type res struct {
 										s   *Shard
 										err error
 									}
-												Limit how many shards are loaded concurrently

Since loading a shard can allocate a lot of memory, running them all
at once could OOM the process.  This limits the number of shards
loaded to 4.  This will be changed to a config option provided the
approach helps.

											
										
										
											2016-03-16 23:15:22 +00:00
-												Add tsm1 open limiter

This commit restricts the number of TSM1 files that can be opened
concurrently across the entire `tsdb.Store`. There is currently
a limit for the number of shards that can be opened concurrently,
however, this limit does not help when the number of CPU cores
is higher than the number of shards. Because TSM1 files have a 2GB
limit and there is no limit on the number of files per shard,
extremely large shards (1TB+) can load 1,000s of files simultaneously.

											
										
										
											2018-05-29 16:15:48 +00:00
+									// Limit the number of concurrent TSM files to be opened to the number of cores.
 									s.EngineOptions.OpenLimiter = limiter.NewFixed(runtime.GOMAXPROCS(0))
-												Add max-concurrent-compactions limit

This limit allows the number of concurrent level and full compactions
to be throttled.  Snapshot compactions are not affected by this limit
as then need to run continously.

This limit can be used to control how much CPU is consumed by compactions.
The default is to limit to the number of CPU available.

											
										
										
											2017-05-01 17:11:29 +00:00
+									// Setup a shared limiter for compactions
 									lim := s.EngineOptions.Config.MaxConcurrentCompactions
 									if lim == 0 {
-												Default concurrent compactions to 50% of available cores

											
										
										
											2017-09-20 21:27:34 +00:00
+										lim = runtime.GOMAXPROCS(0) / 2 // Default to 50% of cores for compactions
-												Cap concurrent compactions when large number of cores exists

The default max-concurrent-compactions settings allows up to 50%
of cores to be used for compactions.  When the number of cores is
high (>8), this can lead to high disk utilization.  Capping at
4 and combined with high snapshot sizes seems to keep the compaction
backlog reasonable and not tax the disks as much.  Systems with lots
of IOPS, RAM and CPU cores may want to increase these.

											
										
										
											2017-12-06 16:40:49 +00:00
 										// On systems with more cores, cap at 4 to reduce disk utilization
 										if lim > 4 {
 											lim = 4
 										}
-												Default concurrent compactions to 50% of available cores

											
										
										
											2017-09-20 21:27:34 +00:00
+										if lim < 1 {
 											lim = 1
 										}
 									}
 									// Don't allow more compactions to run than cores.
 									if lim > runtime.GOMAXPROCS(0) {
-												Add max-concurrent-compactions limit

This limit allows the number of concurrent level and full compactions
to be throttled.  Snapshot compactions are not affected by this limit
as then need to run continously.

This limit can be used to control how much CPU is consumed by compactions.
The default is to limit to the number of CPU available.

											
										
										
											2017-05-01 17:11:29 +00:00
+										lim = runtime.GOMAXPROCS(0)
 									}
-												Rework compaction scheduling

This changes the compaction scheduling to better utilize the available
cores that are free.  Previously, a level was planned in its own goroutine
and would kick off a number of compactions groups.  The problem with this
model was that if there were 4 groups, and 3 completed quickly, the planning
would be blocked for that level until the last group finished.  If the compactions
at the prior level are running more quickly, a large backlog could accumlate.

This now moves the planning to a single goroutine that plans each level in
succession and starts as many groups as it can.  When one group finishes,
the planning will start the next group for the level.

											
										
										
											2017-09-27 00:15:39 +00:00
+									s.EngineOptions.CompactionLimiter = limiter.NewFixed(lim)
-												Default concurrent compactions to 50% of available cores

											
										
										
											2017-09-20 21:27:34 +00:00
-												Rate limit disk IO when writing TSM files

This limits the disk IO for writing TSM files during compactions
and snapshots.  This helps reduce the spiky IO patterns on SSDs and
when compactions run very quickly.

											
										
										
											2017-12-12 19:16:16 +00:00
+									// Env var to disable throughput limiter.  This will be moved to a config option in 1.5.
 									if os.Getenv("INFLUXDB_DATA_COMPACTION_THROUGHPUT") == "" {
 										s.EngineOptions.CompactionThroughputLimiter = limiter.NewRate(48*1024*1024, 48*1024*1024)
 									} else {
 										s.Logger.Info("Compaction throughput limit disabled")
 									}
-												use underscore vs period, fix doc comment, add database name to CQ

											
										
										
											2018-02-26 17:08:43 +00:00
+									log, logEnd := logger.NewOperation(s.Logger, "Open store", "tsdb_open")
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+									defer logEnd()
-												Default concurrent compactions to 50% of available cores

											
										
										
											2017-09-20 21:27:34 +00:00
+									t := limiter.NewFixed(runtime.GOMAXPROCS(0))
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+									resC := make(chan *res)
 									var n int
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// Determine how many shards we need to open by checking the store path.
 									dbDirs, err := ioutil.ReadDir(s.path)
 									if err != nil {
 										return err
 									}
 									for _, db := range dbDirs {
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+										dbPath := filepath.Join(s.path, db.Name())
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+										if !db.IsDir() {
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+											log.Info("Skipping database dir", zap.String("name", db.Name()), zap.String("reason", "not a directory"))
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+											continue
 										}
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+										if s.EngineOptions.DatabaseFilter != nil && !s.EngineOptions.DatabaseFilter(db.Name()) {
-												PR feedback

											
										
										
											2018-04-19 22:58:50 +00:00
+											log.Info("Skipping database dir", logger.Database(db.Name()), zap.String("reason", "failed database filter"))
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+											continue
 										}
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
+										// Load series file.
 										sfile, err := s.openSeriesFile(db.Name())
 										if err != nil {
 											return err
 										}
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
+										// Retrieve database index.
 										idx, err := s.createIndexIfNotExists(db.Name())
 										if err != nil {
 											return err
 										}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+										// Load each retention policy within the database directory.
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+										rpDirs, err := ioutil.ReadDir(dbPath)
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+										if err != nil {
 											return err
 										}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+										for _, rp := range rpDirs {
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+											rpPath := filepath.Join(s.path, db.Name(), rp.Name())
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+											if !rp.IsDir() {
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+												log.Info("Skipping retention policy dir", zap.String("name", rp.Name()), zap.String("reason", "not a directory"))
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+												continue
 											}
-												Don't try to load .series directory

											
										
										
											2018-01-04 14:19:20 +00:00
+											// The .series directory is not a retention policy.
 											if rp.Name() == SeriesFileDirectory {
 												continue
 											}
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+											if s.EngineOptions.RetentionPolicyFilter != nil && !s.EngineOptions.RetentionPolicyFilter(db.Name(), rp.Name()) {
-												PR feedback

											
										
										
											2018-04-19 22:58:50 +00:00
+												log.Info("Skipping retention policy dir", logger.RetentionPolicy(rp.Name()), zap.String("reason", "failed retention policy filter"))
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+												continue
 											}
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+											shardDirs, err := ioutil.ReadDir(rpPath)
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+											if err != nil {
 												return err
 											}
-												Use original index type for existing shards.

											
										
										
											2017-02-02 17:43:48 +00:00
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+											for _, sh := range shardDirs {
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+												n++
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+												go func(db, rp, sh string) {
-												Limit in-flight wal writes/encodings

A slower disk can can cause excessive allocations to occur when
writing to the WAL because the slower encoding and compression occurs
before taking the write lock.  The encoding/compression grabs a large
byte slice from a pool and ultimately waits until it can acquire the
write lock.

This adds a throttle to limit how many inflight WAL writes can be queued
up to prevent OOMing the processess with slower disks and heavy writes.

											
										
										
											2016-07-16 05:26:25 +00:00
+													t.Take()
 													defer t.Release()
-												Limit how many shards are loaded concurrently

Since loading a shard can allocate a lot of memory, running them all
at once could OOM the process.  This limits the number of shards
loaded to 4.  This will be changed to a config option provided the
approach helps.

											
										
										
											2016-03-16 23:15:22 +00:00
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+													start := time.Now()
 													path := filepath.Join(s.path, db, rp, sh)
 													walPath := filepath.Join(s.EngineOptions.Config.WALDir, db, rp, sh)
 													// Shard file names are numeric shardIDs
 													shardID, err := strconv.ParseUint(sh, 10, 64)
 													if err != nil {
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+														log.Info("invalid shard ID found at path", zap.String("path", path))
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+														resC <- &res{err: fmt.Errorf("%s is not a valid ID. Skipping shard.", sh)}
 														return
 													}
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+													if s.EngineOptions.ShardFilter != nil && !s.EngineOptions.ShardFilter(db, rp, shardID) {
 														log.Info("skipping shard", zap.String("path", path), logger.Shard(shardID))
 														resC <- &res{}
 														return
 													}
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
+													// Copy options and assign shared index.
 													opt := s.EngineOptions
 													opt.InmemIndex = idx
-												Mark series deleted in series file

This commit adds the ability to correctly mark a series as deleted in
the global series file. Whenever a shard engine determines that a series
should be deleted, it checks with each shard's bitset for series that
are to be deleted and are no longer contained in any shard-local
bitsets.

These series are then removed from the series file.

											
										
										
											2018-01-10 23:37:18 +00:00
+													// Provide an implementation of the ShardIDSets
 													opt.SeriesIDSets = shardSet{store: s, db: db}
-												Use inmem index on existing shards.

											
										
										
											2017-02-02 17:04:25 +00:00
+													// Existing shards should continue to use inmem index.
 													if _, err := os.Stat(filepath.Join(path, "index")); os.IsNotExist(err) {
 														opt.IndexVersion = "inmem"
 													}
-												reintegrating in-memory index

											
										
										
											2016-11-15 16:20:00 +00:00
+													// Open engine.
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
+													shard := NewShard(shardID, path, walPath, sfile, opt)
-												Enable shards after loading

Compactions are enabled as soon as the shard is opened.  This can
slow down startup or cause the system to spike in CPU usage at startup
if many shards need to be compacted.

This now delays compactions until after they are loaded.

											
										
										
											2017-05-01 15:14:04 +00:00
 													// Disable compactions, writes and queries until all shards are loaded
 													shard.EnableOnOpen = false
-												feat(tsdb/store): Option to disable compactions for offline tools

Allows an offline tool to open the tsdb.Store with compactions disabled.

											
										
										
											2018-06-12 14:52:00 +00:00
+													shard.CompactionDisabled = s.EngineOptions.CompactionDisabled
-												Switch logging to use structured logging everywhere

The logging library has been switched to use uber-go/zap. While the
logging has been changed to use structured logging, this commit does not
change any of the logging statements to take advantage of the new
structured log or new log levels. Those changes will come in future
commits.

											
										
										
											2016-12-01 18:26:23 +00:00
+													shard.WithLogger(s.baseLogger)
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
 													err = shard.Open()
 													if err != nil {
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+														log.Info("Failed to open shard", logger.Shard(shardID), zap.Error(err))
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+														resC <- &res{err: fmt.Errorf("Failed to open shard: %d: %s", shardID, err)}
 														return
 													}
 													resC <- &res{s: shard}
-												Log index type when opening shard

											
										
										
											2018-04-25 12:02:09 +00:00
+													log.Info("Opened shard", zap.String("index_version", shard.IndexType()), zap.String("path", path), zap.Duration("duration", time.Since(start)))
-												Fixing rebase.

											
										
										
											2017-01-06 16:31:25 +00:00
+												}(db.Name(), rp.Name(), sh.Name())
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+											}
 										}
 									}
-												Provide warning when mixed index used on db

											
										
										
											2018-04-25 12:47:41 +00:00
+									// indexVersions tracks counts of the number of different types of index
 									// being used within each database.
 									indexVersions := make(map[string]map[string]int)
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// Gather results of opening shards concurrently, keeping track of how
 									// many databases we are managing.
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+									for i := 0; i < n; i++ {
 										res := <-resC
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+										if res.s == nil || res.err != nil {
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+											continue
 										}
 										s.shards[res.s.id] = res.s
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+										s.databases[res.s.database] = struct{}{}
-												Provide warning when mixed index used on db

											
										
										
											2018-04-25 12:47:41 +00:00
 										if _, ok := indexVersions[res.s.database]; !ok {
 											indexVersions[res.s.database] = make(map[string]int, 2)
 										}
 										indexVersions[res.s.database][res.s.IndexType()]++
-												Load shards concurrently

											
										
										
											2016-01-11 18:00:25 +00:00
+									}
 									close(resC)
-												Enable shards after loading

Compactions are enabled as soon as the shard is opened.  This can
slow down startup or cause the system to spike in CPU usage at startup
if many shards need to be compacted.

This now delays compactions until after they are loaded.

											
										
										
											2017-05-01 15:14:04 +00:00
-												Provide warning when mixed index used on db

											
										
										
											2018-04-25 12:47:41 +00:00
+									// Check if any databases are running multiple index types.
 									for db, idxVersions := range indexVersions {
 										if len(idxVersions) > 1 {
 											var fields []zapcore.Field
 											for idx, cnt := range idxVersions {
 												fields = append(fields, zap.Int(fmt.Sprintf("%s_count", idx), cnt))
 											}
 											s.Logger.Warn("Mixed shard index types", append(fields, logger.Database(db))...)
 										}
 									}
-												Enable shards after loading

Compactions are enabled as soon as the shard is opened.  This can
slow down startup or cause the system to spike in CPU usage at startup
if many shards need to be compacted.

This now delays compactions until after they are loaded.

											
										
										
											2017-05-01 15:14:04 +00:00
+									// Enable all shards
 									for _, sh := range s.shards {
 										sh.SetEnabled(true)
-												Stop background compaction goroutines when shard is cold

Each shard has a number of goroutines for compacting different levels
of TSM files.  When a shard goes cold and is fully compacted, these
goroutines are still running.

This change will stop background shard goroutines when the shard goes
cold and start them back up if new writes arrive.

											
										
										
											2017-05-02 15:20:01 +00:00
+										if sh.IsIdle() {
-												Release mmap pages when shard is cold

This instructs the kernel that it can release memory used by mmap'd
TSM files when they are not actively being used.  It the mappings are
use, the kernel will fault the pages back in.  On linux, this causes
RES memory to drop immediately when run.

											
										
										
											2017-09-14 18:42:34 +00:00
+											if err := sh.Free(); err != nil {
 												return err
 											}
-												Stop background compaction goroutines when shard is cold

Each shard has a number of goroutines for compacting different levels
of TSM files.  When a shard goes cold and is fully compacted, these
goroutines are still running.

This change will stop background shard goroutines when the shard goes
cold and start them back up if new writes arrive.

											
										
										
											2017-05-02 15:20:01 +00:00
+										}
-												Enable shards after loading

Compactions are enabled as soon as the shard is opened.  This can
slow down startup or cause the system to spike in CPU usage at startup
if many shards need to be compacted.

This now delays compactions until after they are loaded.

											
										
										
											2017-05-01 15:14:04 +00:00
+									}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									return nil
 								}
-												Lint tsdb and tsdb/engine package

											
										
										
											2016-02-10 20:04:18 +00:00
+								// Close closes the store and all associated shards. After calling Close accessing
 								// shards through the Store will result in ErrStoreClosed being returned.
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+								func (s *Store) Close() error {
 									s.mu.Lock()
 									if s.opened {
 										close(s.closing)
 									}
-												Fix race on store close

There was a very small window where it was possible to deadlock during
the close of the Store. When closing, the Store waited on its Waitgroup
under a `Lock`. Naturally, all other goroutines must have been in a
position to call `Done` on the `Waitgroup` before the `Wait` call in
`Close` would return.

For the goroutine running the `monitorShards` method it was possible
that it would be unable to do this. Specifically, if the `monitorShards`
goroutine was jumping into the `t.C` case as the `Close()` goroutine was
acquiring the `Lock` then then `monitorShards` goroutine would be unable
to acquire the `RLock`. Since it would also be unable to progress around
its loop to jump into the `s.closing` case, it would be unable to call
`Done` on the `WaitGroup` and we would have a deadlock.

This was identified during an AppVeyor CI run, though I was unable to
reproduce this locally.

											
										
										
											2017-11-07 12:48:43 +00:00
+									s.mu.Unlock()
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									s.wg.Wait()
-												Fix race on store close

There was a very small window where it was possible to deadlock during
the close of the Store. When closing, the Store waited on its Waitgroup
under a `Lock`. Naturally, all other goroutines must have been in a
position to call `Done` on the `Waitgroup` before the `Wait` call in
`Close` would return.

For the goroutine running the `monitorShards` method it was possible
that it would be unable to do this. Specifically, if the `monitorShards`
goroutine was jumping into the `t.C` case as the `Close()` goroutine was
acquiring the `Lock` then then `monitorShards` goroutine would be unable
to acquire the `RLock`. Since it would also be unable to progress around
its loop to jump into the `s.closing` case, it would be unable to call
`Done` on the `WaitGroup` and we would have a deadlock.

This was identified during an AppVeyor CI run, though I was unable to
reproduce this locally.

											
										
										
											2017-11-07 12:48:43 +00:00
+									// No other goroutines accessing the store, so no need for a Lock.
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
-												Speed up shutdown

On my machine with about 20 shards, it would take 10+ seconds to shut
down InfluxDB with SIGINT. After this change, it shuts down in nearly
instantly.

(*tsdb.Store).Close was shutting down each of its shards sequentially.
Each shard's engine would signal to its compaction goroutines to quit,
and because each compaction goroutine has a hardcoded 1-second sleep in
between checks, waiting for the goroutines would often block for up to a
second.

This change closes all of the TSDB store's shards in parallel. This
means it's possible that multiple close values could error at once, but
we're still only returning the first error, consistent with previous
behavior. That being said, the return value of (*tsdb.Store).Close is
ignored in (*cmd/influxd/run.Server).Close anyway.

											
										
										
											2016-10-10 15:42:02 +00:00
+									// Close all the shards in parallel.
 									if err := s.walkShards(s.shardsSlice(), func(sh *Shard) error {
-												All closes are now fast

											
										
										
											2018-01-15 15:06:14 +00:00
+										return sh.Close()
-												Speed up shutdown

On my machine with about 20 shards, it would take 10+ seconds to shut
down InfluxDB with SIGINT. After this change, it shuts down in nearly
instantly.

(*tsdb.Store).Close was shutting down each of its shards sequentially.
Each shard's engine would signal to its compaction goroutines to quit,
and because each compaction goroutine has a hardcoded 1-second sleep in
between checks, waiting for the goroutines would often block for up to a
second.

This change closes all of the TSDB store's shards in parallel. This
means it's possible that multiple close values could error at once, but
we're still only returning the first error, consistent with previous
behavior. That being said, the return value of (*tsdb.Store).Close is
ignored in (*cmd/influxd/run.Server).Close anyway.

											
										
										
											2016-10-10 15:42:02 +00:00
+									}); err != nil {
 										return err
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									}
-												Speed up shutdown

On my machine with about 20 shards, it would take 10+ seconds to shut
down InfluxDB with SIGINT. After this change, it shuts down in nearly
instantly.

(*tsdb.Store).Close was shutting down each of its shards sequentially.
Each shard's engine would signal to its compaction goroutines to quit,
and because each compaction goroutine has a hardcoded 1-second sleep in
between checks, waiting for the goroutines would often block for up to a
second.

This change closes all of the TSDB store's shards in parallel. This
means it's possible that multiple close values could error at once, but
we're still only returning the first error, consistent with previous
behavior. That being said, the return value of (*tsdb.Store).Close is
ignored in (*cmd/influxd/run.Server).Close anyway.

											
										
										
											2016-10-10 15:42:02 +00:00
-												Fix race on store close

There was a very small window where it was possible to deadlock during
the close of the Store. When closing, the Store waited on its Waitgroup
under a `Lock`. Naturally, all other goroutines must have been in a
position to call `Done` on the `Waitgroup` before the `Wait` call in
`Close` would return.

For the goroutine running the `monitorShards` method it was possible
that it would be unable to do this. Specifically, if the `monitorShards`
goroutine was jumping into the `t.C` case as the `Close()` goroutine was
acquiring the `Lock` then then `monitorShards` goroutine would be unable
to acquire the `RLock`. Since it would also be unable to progress around
its loop to jump into the `s.closing` case, it would be unable to call
`Done` on the `WaitGroup` and we would have a deadlock.

This was identified during an AppVeyor CI run, though I was unable to
reproduce this locally.

											
										
										
											2017-11-07 12:48:43 +00:00
+									s.mu.Lock()
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									for _, sfile := range s.sfiles {
 										// Close out the series files.
 										if err := sfile.Close(); err != nil {
-												Fix missing Store.Close() unlock.

											
										
										
											2018-03-06 17:36:44 +00:00
+											s.mu.Unlock()
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+											return err
 										}
 									}
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+									s.databases = make(map[string]struct{})
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									s.sfiles = map[string]*SeriesFile{}
-												tsdb: add additional engine options

* filters allow specific combinations of database, retention policy and
  shard groups to be opened. This was added to reduce the start-up time
  of the export tool and limit the memory usage.

											
										
										
											2018-04-19 04:11:04 +00:00
+									s.indexes = make(map[string]interface{})
 									s.pendingShardDeletes = make(map[uint64]struct{})
 									s.shards = nil
-												Fix race on store close

There was a very small window where it was possible to deadlock during
the close of the Store. When closing, the Store waited on its Waitgroup
under a `Lock`. Naturally, all other goroutines must have been in a
position to call `Done` on the `Waitgroup` before the `Wait` call in
`Close` would return.

For the goroutine running the `monitorShards` method it was possible
that it would be unable to do this. Specifically, if the `monitorShards`
goroutine was jumping into the `t.C` case as the `Close()` goroutine was
acquiring the `Lock` then then `monitorShards` goroutine would be unable
to acquire the `RLock`. Since it would also be unable to progress around
its loop to jump into the `s.closing` case, it would be unable to call
`Done` on the `WaitGroup` and we would have a deadlock.

This was identified during an AppVeyor CI run, though I was unable to
reproduce this locally.

											
										
										
											2017-11-07 12:48:43 +00:00
+									s.opened = false // Store may now be opened again.
 									s.mu.Unlock()
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									return nil
 								}
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+								// openSeriesFile either returns or creates a series file for the provided
 								// database. It must be called under a full lock.
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
+								func (s *Store) openSeriesFile(database string) (*SeriesFile, error) {
 									if sfile := s.sfiles[database]; sfile != nil {
 										return sfile, nil
 									}
-												rename series directory

											
										
										
											2018-01-03 15:44:58 +00:00
+									sfile := NewSeriesFile(filepath.Join(s.path, database, SeriesFileDirectory))
-												Integrate SeriesFileCompactor

											
										
										
											2018-01-02 19:20:03 +00:00
+									sfile.Logger = s.baseLogger
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
+									if err := sfile.Open(); err != nil {
 										return nil, err
 									}
 									s.sfiles[database] = sfile
 									return sfile, nil
 								}
-												Fix series file removal after DROP DATABASE.

											
										
										
											2018-01-08 18:34:04 +00:00
+								func (s *Store) seriesFile(database string) *SeriesFile {
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									s.mu.RLock()
 									defer s.mu.RUnlock()
-												Fix series file removal after DROP DATABASE.

											
										
										
											2018-01-08 18:34:04 +00:00
+									return s.sfiles[database]
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+								}
-												Ensure shared index removed on database drop

When using the inmem index, if one drops a database, and then creates it
again, the previous index object will be reused. This includes the
previous cardinality estimation sketches, leading to inaccurate
cardinality estimations.

											
										
										
											2017-03-30 12:05:31 +00:00
+								// createIndexIfNotExists returns a shared index for a database, if the inmem
 								// index is being used. If the TSI index is being used, then this method is
 								// basically a no-op.
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
+								func (s *Store) createIndexIfNotExists(name string) (interface{}, error) {
 									if idx := s.indexes[name]; idx != nil {
 										return idx, nil
 									}
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
+									sfile, err := s.openSeriesFile(name)
 									if err != nil {
 										return nil, err
 									}
 									idx, err := NewInmemIndex(name, sfile)
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
+									if err != nil {
 										return nil, err
 									}
 									s.indexes[name] = idx
 									return idx, nil
 								}
-												refactor tsdb tests into test package

											
										
										
											2015-07-20 19:59:46 +00:00
+								// Shard returns a shard by id.
 								func (s *Store) Shard(id uint64) *Shard {
 									s.mu.RLock()
 									defer s.mu.RUnlock()
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									sh, ok := s.shards[id]
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+									if !ok {
 										return nil
 									}
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									return sh
-												refactor tsdb tests into test package

											
										
										
											2015-07-20 19:59:46 +00:00
+								}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+								// Shards returns a list of shards by id.
 								func (s *Store) Shards(ids []uint64) []*Shard {
 									s.mu.RLock()
 									defer s.mu.RUnlock()
 									a := make([]*Shard, 0, len(ids))
 									for _, id := range ids {
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+										sh, ok := s.shards[id]
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+										if !ok {
-												fix non-existent shard handling

This commit removes `nil` shards returned from `tsdb.Store.Shards()`
which caused panics in some SELECTs. This can occur if the meta
store has created shards before the store or if the shards are
distributed throughout a cluster.

Fixes #5555

											
										
										
											2016-02-08 18:02:33 +00:00
+											continue
 										}
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+										a = append(a, sh)
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									}
 									return a
 								}
-												Support subquery execution in the query language

This adds query syntax support for subqueries and adds support to the
query engine to execute queries on subqueries.

Subqueries act as a source for another query. It is the equivalent of
writing the results of a query to a temporary database, executing
a query on that temporary database, and then deleting the database
(except this is all performed in-memory).

The syntax is like this:

    SELECT sum(derivative) FROM (SELECT derivative(mean(value)) FROM cpu GROUP BY *)

This will execute derivative and then sum the result of those derivatives.
Another example:

    SELECT max(min) FROM (SELECT min(value) FROM cpu GROUP BY host)

This would let you find the maximum minimum value of each host.

There is complete freedom to mix subqueries with auxiliary fields. The only
caveat is that the following two queries:

    SELECT mean(value) FROM cpu
    SELECT mean(value) FROM (SELECT value FROM cpu)

Have different performance characteristics. The first will calculate
`mean(value)` at the shard level and will be faster, especially when it comes to
clustered setups. The second will process the mean at the top level and will not
include that optimization.

											
										
										
											2016-11-23 20:32:42 +00:00
+								// ShardGroup returns a ShardGroup with a list of shards by id.
 								func (s *Store) ShardGroup(ids []uint64) ShardGroup {
 									return Shards(s.Shards(ids))
 								}
-												Miscellaneous cleanup in tsdb package

* When possible, initialize maps/slices to exact length/capacity
  * See slice benchmarks at
    https://gist.github.com/mark-rushakoff/b5650bd8f06bece0b9fd
* Fixed some typos
* Removed an unnecessary loop in stringset.intersect

											
										
										
											2015-12-19 19:32:44 +00:00
+								// ShardN returns the number of shards in the store.
-												refactor tsdb tests into test package

											
										
										
											2015-07-20 19:59:46 +00:00
+								func (s *Store) ShardN() int {
 									s.mu.RLock()
 									defer s.mu.RUnlock()
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									return len(s.shards)
-												refactor tsdb tests into test package

											
										
										
											2015-07-20 19:59:46 +00:00
+								}
-												feat #9212: add ability to generate shard digests

											
										
										
											2017-11-14 01:35:53 +00:00
+								// ShardDigest returns a digest of the shard with the specified ID.
-												fix #9286: return digest size

											
										
										
											2018-01-05 18:39:33 +00:00
+								func (s *Store) ShardDigest(id uint64) (io.ReadCloser, int64, error) {
-												feat #9212: add ability to generate shard digests

											
										
										
											2017-11-14 01:35:53 +00:00
+									sh := s.Shard(id)
 									if sh == nil {
-												fix #9286: return digest size

											
										
										
											2018-01-05 18:39:33 +00:00
+										return nil, 0, ErrShardNotFound
-												feat #9212: add ability to generate shard digests

											
										
										
											2017-11-14 01:35:53 +00:00
+									}
 									return sh.Digest()
 								}
-												Lint tsdb and tsdb/engine package

											
										
										
											2016-02-10 20:04:18 +00:00
+								// CreateShard creates a shard with the given id and retention policy on a database.
-												Allow creating shards in a disable state

For restoring a shard, we need to be able to have the shard open,
but disabled.  It was racy to open it and then disable it separately
since writes/queries could occur in between that time.

											
										
										
											2016-06-01 22:17:18 +00:00
+								func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64, enabled bool) error {
-												Create and open shards on-demand

Uses a structure like:

  /root/
      /db1/rp1/1
              /2
      /db2/rp2/3

If a write is assigned to a shard on the local node but the shard
has not been created, create it when the write returns an error
and retry the write.

											
										
										
											2015-05-26 22:35:16 +00:00
+									s.mu.Lock()
 									defer s.mu.Unlock()
-												Ensure the tsdb.Store is not closed before creating a shard

Fixes panic: assignment to entry in nil map

Fixes #3848

											
										
										
											2015-09-04 22:32:08 +00:00
+									select {
 									case <-s.closing:
-												Fix panic: assignment to entry in nil map

Closing the store did not properly return an error for in-flight
writes because the closing channel was set to nil when closed.  A
nil channel is not selectable so writes continue on past the guard
checks and trigger panics.

											
										
										
											2015-10-06 20:00:31 +00:00
+										return ErrStoreClosed
-												Ensure the tsdb.Store is not closed before creating a shard

Fixes panic: assignment to entry in nil map

Fixes #3848

											
										
										
											2015-09-04 22:32:08 +00:00
+									default:
 									}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// Shard already exists.
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									if _, ok := s.shards[shardID]; ok {
-												Create and open shards on-demand

Uses a structure like:

  /root/
      /db1/rp1/1
              /2
      /db2/rp2/3

If a write is assigned to a shard on the local node but the shard
has not been created, create it when the write returns an error
and retry the write.

											
										
										
											2015-05-26 22:35:16 +00:00
+										return nil
 									}
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+									// Shard may be undergoing a pending deletion. While the shard can be
 									// recreated, it must wait for the pending delete to finish.
 									if _, ok := s.pendingShardDeletes[shardID]; ok {
-												TSM: TSMReader.Close blocks until reads complete

											
										
										
											2018-04-30 17:09:27 +00:00
+										return ErrShardDeletion
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+									}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// Create the db and retention policy directories if they don't exist.
-												Create and open shards on-demand

Uses a structure like:

  /root/
      /db1/rp1/1
              /2
      /db2/rp2/3

If a write is assigned to a shard on the local node but the shard
has not been created, create it when the write returns an error
and retry the write.

											
										
										
											2015-05-26 22:35:16 +00:00
+									if err := os.MkdirAll(filepath.Join(s.path, database, retentionPolicy), 0700); err != nil {
 										return err
 									}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// Create the WAL directory.
-												Update store to properly manage WAL create/delete.

* Update the store to remove the WAL directories associated with a shard or database when they are deleted.
* Fix the Store so that it creates separate WAL directories for databases and retention policies.
											
										
										
											2015-08-21 15:22:04 +00:00
+									walPath := filepath.Join(s.EngineOptions.Config.WALDir, database, retentionPolicy, fmt.Sprintf("%d", shardID))
 									if err := os.MkdirAll(walPath, 0700); err != nil {
 										return err
 									}
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
+									// Retrieve database series file.
 									sfile, err := s.openSeriesFile(database)
 									if err != nil {
 										return err
 									}
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
+									// Retrieve shared index, if needed.
 									idx, err := s.createIndexIfNotExists(database)
 									if err != nil {
 										return err
 									}
 									// Copy index options and pass in shared index.
 									opt := s.EngineOptions
 									opt.InmemIndex = idx
-												Mark series deleted in series file

This commit adds the ability to correctly mark a series as deleted in
the global series file. Whenever a shard engine determines that a series
should be deleted, it checks with each shard's bitset for series that
are to be deleted and are no longer contained in any shard-local
bitsets.

These series are then removed from the series file.

											
										
										
											2018-01-10 23:37:18 +00:00
+									opt.SeriesIDSets = shardSet{store: s, db: database}
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
-												Tag TSM stats with database, retention policy

... by extracting the db/rp from the given path.

Now that the code has "standardized" on extracting db/rp this way, the
ShardLocation struct is no longer necessary and thus has been removed.
We're back on the previous style of passing the path and walPath to
NewShard.

											
										
										
											2016-02-26 19:41:54 +00:00
+									path := filepath.Join(s.path, database, retentionPolicy, strconv.FormatUint(shardID, 10))
-												intermediate

											
										
										
											2017-11-15 23:09:25 +00:00
+									shard := NewShard(shardID, path, walPath, sfile, opt)
-												Switch logging to use structured logging everywhere

The logging library has been switched to use uber-go/zap. While the
logging has been changed to use structured logging, this commit does not
change any of the logging statements to take advantage of the new
structured log or new log levels. Those changes will come in future
commits.

											
										
										
											2016-12-01 18:26:23 +00:00
+									shard.WithLogger(s.baseLogger)
-												Allow creating shards in a disable state

For restoring a shard, we need to be able to have the shard open,
but disabled.  It was racy to open it and then disable it separately
since writes/queries could occur in between that time.

											
										
										
											2016-06-01 22:17:18 +00:00
+									shard.EnableOnOpen = enabled
-												Make store open every shard on load. Fix shard to set measurementFields on load.

Fixes issue where queries wouldn't be able to hit anything because the index does't load until the shard is open.

Fix an issue where field codecs weren't populated in the shard when loading.

											
										
										
											2015-05-29 21:15:05 +00:00
+									if err := shard.Open(); err != nil {
 										return err
 									}
-												Create and open shards on-demand

Uses a structure like:

  /root/
      /db1/rp1/1
              /2
      /db2/rp2/3

If a write is assigned to a shard on the local node but the shard
has not been created, create it when the write returns an error
and retry the write.

											
										
										
											2015-05-26 22:35:16 +00:00
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									s.shards[shardID] = shard
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									s.databases[database] = struct{}{} // Ensure we are tracking any new db.
-												Create and open shards on-demand

Uses a structure like:

  /root/
      /db1/rp1/1
              /2
      /db2/rp2/3

If a write is assigned to a shard on the local node but the shard
has not been created, create it when the write returns an error
and retry the write.

											
										
										
											2015-05-26 22:35:16 +00:00
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+									return nil
 								}
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+								// CreateShardSnapShot will create a hard link to the underlying shard and return a path.
 								// The caller is responsible for cleaning up (removing) the file path returned.
-												wip

											
										
										
											2016-05-09 15:53:34 +00:00
+								func (s *Store) CreateShardSnapshot(id uint64) (string, error) {
 									sh := s.Shard(id)
 									if sh == nil {
 										return "", ErrShardNotFound
 									}
 									return sh.CreateSnapshot()
 								}
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+								// SetShardEnabled enables or disables a shard for read and writes.
-												Add ability to disable shards

Disabling a shard causes all writes and queries to a shard to return
an error.  This also disables compactions for the shard.

											
										
										
											2016-05-27 22:47:33 +00:00
+								func (s *Store) SetShardEnabled(shardID uint64, enabled bool) error {
 									sh := s.Shard(shardID)
 									if sh == nil {
 										return ErrShardNotFound
 									}
 									sh.SetEnabled(enabled)
 									return nil
 								}
-												Add DeleteShard to TSDB store

											
										
										
											2015-06-04 01:02:49 +00:00
+								// DeleteShard removes a shard from disk.
 								func (s *Store) DeleteShard(shardID uint64) error {
-												Inline deleteShard

Only used by one caller now

											
										
										
											2016-07-14 22:41:07 +00:00
+									sh := s.Shard(shardID)
 									if sh == nil {
-												Add DeleteShard to TSDB store

											
										
										
											2015-06-04 01:02:49 +00:00
+										return nil
 									}
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									// Remove the shard from Store so it's not returned to callers requesting
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+									// shards. Also mark that this shard is currently being deleted in a separate
 									// map so that we do not have to retain the global store lock while deleting
 									// files.
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									s.mu.Lock()
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+									if _, ok := s.pendingShardDeletes[shardID]; ok {
 										// We are already being deleted? This is possible if delete shard
 										// was called twice in sequence before the shard could be removed from
 										// the mapping.
 										// This is not an error because deleting a shard twice is not an error.
 										s.mu.Unlock()
 										return nil
 									}
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									delete(s.shards, shardID)
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+									s.pendingShardDeletes[shardID] = struct{}{}
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									s.mu.Unlock()
-												Re-add shared in-memory index.

											
										
										
											2016-12-19 16:57:05 +00:00
-												Mark a shard as in process of being deleted

Without this, deleting a shard could trigger things so that a write
would attempt to create the shard again before it was actually deleted.

											
										
										
											2018-02-20 19:00:08 +00:00
+									// Ensure the pending deletion flag is cleared on exit.
 									defer func() {
 										s.mu.Lock()
 										defer s.mu.Unlock()
 										delete(s.pendingShardDeletes, shardID)
 									}()
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									// Get the shard's local bitset of series IDs.
 									index, err := sh.Index()
 									if err != nil {
-												Add DeleteShard to TSDB store

											
										
										
											2015-06-04 01:02:49 +00:00
+										return err
 									}
-												Add SeriesIDSet() to Index interface

											
										
										
											2018-05-15 21:57:37 +00:00
+									ss := index.SeriesIDSet()
-												Add DeleteShard to TSDB store

											
										
										
											2015-06-04 01:02:49 +00:00
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									db := sh.Database()
 									if err := sh.Close(); err != nil {
-												Update store to properly manage WAL create/delete.

* Update the store to remove the WAL directories associated with a shard or database when they are deleted.
* Fix the Store so that it creates separate WAL directories for databases and retention policies.
											
										
										
											2015-08-21 15:22:04 +00:00
+										return err
 									}
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									// Determine if the shard contained any series that are not present in any
 									// other shards in the database.
 									shards := s.filterShards(byDatabase(db))
-												Inline deleteShard

Only used by one caller now

											
										
										
											2016-07-14 22:41:07 +00:00
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+									s.walkShards(shards, func(sh *Shard) error {
 										index, err := sh.Index()
 										if err != nil {
 											return err
 										}
-												Add SeriesIDSet() to Index interface

											
										
										
											2018-05-15 21:57:37 +00:00
+										ss.Diff(index.SeriesIDSet())
-												Remove series when shard rolls over

Series should only be removed from the series file when they're no
longer present in any shard. This commit ensures that during a shard
rollover, the series local to the shard are checked against all other
series in the database.

Series that are no longer present in any other shards' bitsets, are then
marked as deleted in the series file.

											
										
										
											2018-01-15 14:30:02 +00:00
+										return nil
 									})
 									// Remove any remaining series in the set from the series file, as they don't
 									// exist in any of the database's remaining shards.
 									if ss.Cardinality() > 0 {
 										sfile := s.seriesFile(db)
 										if sfile != nil {
 											ss.ForEach(func(id uint64) {
 												sfile.DeleteSeriesID(id)
 											})
 										}
 									}
 									// Remove the on-disk shard data.
 									if err := os.RemoveAll(sh.path); err != nil {
 										return err
 									}
 									return os.RemoveAll(sh.walPath)
-												Add DeleteShard to TSDB store

											
										
										
											2015-06-04 01:02:49 +00:00
+								}
-												add remote execution

This commit adds remote execution to the query engine.

											
										
										
											2016-02-19 20:38:02 +00:00
+								// DeleteDatabase will close all shards associated with a database and remove the directory and files from disk.
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+								func (s *Store) DeleteDatabase(name string) error {
-												Speed up drop database

Drop database was closing and deleting each shard dir individually and
serially.  It would then delete the empty database dirs.

This changes drop database to close all shards in parallel and run
one os.RemoveAll to remove everything under the db dir which is more
efficient.

This also reworked the locking to avoid locking the tsdb.Store for
long periods of time.  That can cause queries and writes for other
databases to block as well.

											
										
										
											2016-05-13 16:14:49 +00:00
+									s.mu.RLock()
-												Merge remote-tracking branch 'upstream/master' into tsi

											
										
										
											2017-01-24 16:27:47 +00:00
+									if _, ok := s.databases[name]; !ok {
-												Add some checks before removing directories

Fixes #7822

This change first ensures that databases and retention policies exist
before attempting to remove them from the Store. It also adds some
checks in the `DeleteDatabase` and `DeleteRetentionPolicy` to ensure
that maliciously named entries won't remove anything outside of the
configured data directory.

											
										
										
											2017-01-12 15:02:25 +00:00
+										s.mu.RUnlock()
 										// no files locally, so nothing to do
 										return nil
 									}
-												Refactor DeleteDatabase to use filter/walk funcs

											
										
										
											2016-07-14 22:37:46 +00:00
+									shards := s.filterShards(func(sh *Shard) bool {
 										return sh.database == name
 									})
-												Speed up drop database

Drop database was closing and deleting each shard dir individually and
serially.  It would then delete the empty database dirs.

This changes drop database to close all shards in parallel and run
one os.RemoveAll to remove everything under the db dir which is more
efficient.

This also reworked the locking to avoid locking the tsdb.Store for
long periods of time.  That can cause queries and writes for other
databases to block as well.

											
										
										
											2016-05-13 16:14:49 +00:00
+									s.mu.RUnlock()
-												Refactor DeleteDatabase to use filter/walk funcs

											
										
										
											2016-07-14 22:37:46 +00:00
+									if err := s.walkShards(shards, func(sh *Shard) error {
 										if sh.database != name {
 											return nil
-												Wire up DROP DATABASE query.
											
										
										
											2015-06-05 16:31:04 +00:00
+										}
-												Refactor DeleteDatabase to use filter/walk funcs

											
										
										
											2016-07-14 22:37:46 +00:00
-												All closes are now fast

											
										
										
											2018-01-15 15:06:14 +00:00
+										return sh.Close()
-												Refactor DeleteDatabase to use filter/walk funcs

											
										
										
											2016-07-14 22:37:46 +00:00
+									}); err != nil {
 										return err
-												Wire up DROP DATABASE query.
											
										
										
											2015-06-05 16:31:04 +00:00
+									}
-												Remove the shard references when database is dropped

The shards map still held a reference to a shard that was dropped
which caused the periodic mainteance task to report errors continuously.

											
										
										
											2015-10-26 19:04:57 +00:00
-												Add some checks before removing directories

Fixes #7822

This change first ensures that databases and retention policies exist
before attempting to remove them from the Store. It also adds some
checks in the `DeleteDatabase` and `DeleteRetentionPolicy` to ensure
that maliciously named entries won't remove anything outside of the
configured data directory.

											
										
										
											2017-01-12 15:02:25 +00:00
+									dbPath := filepath.Clean(filepath.Join(s.path, name))
-												Close series file on database deletion.

											
										
										
											2018-01-05 18:06:16 +00:00
+									s.mu.Lock()
 									defer s.mu.Unlock()
-												Fix race in DeleteDatabase

											
										
										
											2018-01-10 14:25:15 +00:00
+									sfile := s.sfiles[name]
 									delete(s.sfiles, name)
-												Close series file on database deletion.

											
										
										
											2018-01-05 18:06:16 +00:00
+									// Close series file.
 									if sfile != nil {
 										if err := sfile.Close(); err != nil {
 											return err
 										}
 									}
-												Add some checks before removing directories

Fixes #7822

This change first ensures that databases and retention policies exist
before attempting to remove them from the Store. It also adds some
checks in the `DeleteDatabase` and `DeleteRetentionPolicy` to ensure
that maliciously named entries won't remove anything outside of the
configured data directory.

											
										
										
											2017-01-12 15:02:25 +00:00
+									// extra sanity check to make sure that even if someone named their database "../.."
 									// that we don't delete everything because of it, they'll just have extra files forever
 									if filepath.Clean(s.path) != filepath.Dir(dbPath) {
 										return fmt.Errorf("invalid database directory location for database '%s': %s", name, dbPath)
 									}
 									if err := os.RemoveAll(dbPath); err != nil {
-												On DROP DATABASE remove from TSDB index

											
										
										
											2015-06-22 18:44:46 +00:00
+										return err
 									}
-												Update store to properly manage WAL create/delete.

* Update the store to remove the WAL directories associated with a shard or database when they are deleted.
* Fix the Store so that it creates separate WAL directories for databases and retention policies.
											
										
										
											2015-08-21 15:22:04 +00:00
+									if err := os.RemoveAll(filepath.Join(s.EngineOptions.Config.WALDir, name)); err != nil {
 										return err
 									}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
-												Refactor DeleteDatabase to use filter/walk funcs

											
										
										
											2016-07-14 22:37:46 +00:00
+									for _, sh := range shards {
 										delete(s.shards, sh.id)
 									}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
 									// Remove database from store list of databases
 									delete(s.databases, name)
-												Ensure shared index removed on database drop

When using the inmem index, if one drops a database, and then creates it
again, the previous index object will be reused. This includes the
previous cardinality estimation sketches, leading to inaccurate
cardinality estimations.

											
										
										
											2017-03-30 12:05:31 +00:00
 									// Remove shared index for database if using inmem index.
 									delete(s.indexes, name)
-												Refactor DeleteDatabase to use filter/walk funcs

											
										
										
											2016-07-14 22:37:46 +00:00
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									return nil
 								}
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+								// DeleteRetentionPolicy will close all shards associated with the
 								// provided retention policy, remove the retention policy directories on
 								// both the DB and WAL, and remove all shard files from disk.
 								func (s *Store) DeleteRetentionPolicy(database, name string) error {
-												Speed up drop retention policy

Reduce the lock contention on tsdb.Store by taking a short lived
read-lock instead of a long write lock.  Also close shards in parallel
and drop the whole RP dir in bulk instead of each shard dir.

											
										
										
											2016-07-14 22:24:01 +00:00
+									s.mu.RLock()
-												Merge remote-tracking branch 'upstream/master' into tsi

											
										
										
											2017-01-24 16:27:47 +00:00
+									if _, ok := s.databases[database]; !ok {
-												Add db/rp name validation

This change adds some very basic name validation with the following
plain-english description: names must be non-zero sequence of printable
characters that do not contain slashes ('/' or '\') and are not equal to
either "." or "..".

The intent is that, since we currently just use database and retention
policy names directly as path elements, these rules will hopefully leave
us with names that should be at least close to valid directory names.

Ideally, we would restrict names even further or not use them as path
elements directly, but this should be a step towards the former without
restricting names "too much"

											
										
										
											2017-01-12 20:19:43 +00:00
+										s.mu.RUnlock()
-												Add some checks before removing directories

Fixes #7822

This change first ensures that databases and retention policies exist
before attempting to remove them from the Store. It also adds some
checks in the `DeleteDatabase` and `DeleteRetentionPolicy` to ensure
that maliciously named entries won't remove anything outside of the
configured data directory.

											
										
										
											2017-01-12 15:02:25 +00:00
+										// unknown database, nothing to do
 										return nil
 									}
-												Speed up drop retention policy

Reduce the lock contention on tsdb.Store by taking a short lived
read-lock instead of a long write lock.  Also close shards in parallel
and drop the whole RP dir in bulk instead of each shard dir.

											
										
										
											2016-07-14 22:24:01 +00:00
+									shards := s.filterShards(func(sh *Shard) bool {
 										return sh.database == database && sh.retentionPolicy == name
 									})
 									s.mu.RUnlock()
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
 									// Close and delete all shards under the retention policy on the
 									// database.
-												Speed up drop retention policy

Reduce the lock contention on tsdb.Store by taking a short lived
read-lock instead of a long write lock.  Also close shards in parallel
and drop the whole RP dir in bulk instead of each shard dir.

											
										
										
											2016-07-14 22:24:01 +00:00
+									if err := s.walkShards(shards, func(sh *Shard) error {
 										if sh.database != database || sh.retentionPolicy != name {
 											return nil
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+										}
-												Speed up drop retention policy

Reduce the lock contention on tsdb.Store by taking a short lived
read-lock instead of a long write lock.  Also close shards in parallel
and drop the whole RP dir in bulk instead of each shard dir.

											
										
										
											2016-07-14 22:24:01 +00:00
 										return sh.Close()
 									}); err != nil {
 										return err
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+									}
-												Merge remote-tracking branch 'upstream/master' into tsi

											
										
										
											2017-01-24 16:27:47 +00:00
+									// Remove the retention policy folder.
-												Add some checks before removing directories

Fixes #7822

This change first ensures that databases and retention policies exist
before attempting to remove them from the Store. It also adds some
checks in the `DeleteDatabase` and `DeleteRetentionPolicy` to ensure
that maliciously named entries won't remove anything outside of the
configured data directory.

											
										
										
											2017-01-12 15:02:25 +00:00
+									rpPath := filepath.Clean(filepath.Join(s.path, database, name))
 									// ensure Store's path is the grandparent of the retention policy
 									if filepath.Clean(s.path) != filepath.Dir(filepath.Dir(rpPath)) {
 										return fmt.Errorf("invalid path for database '%s', retention policy '%s': %s", database, name, rpPath)
 									}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									// Remove the retention policy folder.
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+									if err := os.RemoveAll(filepath.Join(s.path, database, name)); err != nil {
 										return err
 									}
 									// Remove the retention policy folder from the the WAL.
-												Speed up drop retention policy

Reduce the lock contention on tsdb.Store by taking a short lived
read-lock instead of a long write lock.  Also close shards in parallel
and drop the whole RP dir in bulk instead of each shard dir.

											
										
										
											2016-07-14 22:24:01 +00:00
+									if err := os.RemoveAll(filepath.Join(s.EngineOptions.Config.WALDir, database, name)); err != nil {
 										return err
 									}
 									s.mu.Lock()
 									for _, sh := range shards {
 										delete(s.shards, sh.id)
 									}
 									s.mu.Unlock()
 									return nil
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
+								}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+								// DeleteMeasurement removes a measurement and all associated series from a database.
 								func (s *Store) DeleteMeasurement(database, name string) error {
-												Fix missing read locks before filtering

											
										
										
											2016-07-15 16:08:26 +00:00
+									s.mu.RLock()
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									shards := s.filterShards(byDatabase(database))
-												Fix missing read locks before filtering

											
										
										
											2016-07-15 16:08:26 +00:00
+									s.mu.RUnlock()
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
-												Limit delete to run one shard at a time

There was a change to speed up deleting and dropping measurements
that executed the deletes in parallel for all shards at once. #7015

When TSI was merged in #7618, the series keys passed into Shard.DeleteMeasurement
were removed and were expanded lower down.  This causes memory to blow up
when a delete across many shards occurs as we now expand the set of series
keys N times instead of just once as before.

While running the deletes in parallel would be ideal, there have been a number
of optimizations in the delete path that make running deletes serially pretty
good.  This change just limits the concurrency of the deletes which keeps memory
more stable.

											
										
										
											2017-07-25 22:20:52 +00:00
+									// Limit to 1 delete for each shard since expanding the measurement into the list
 									// of series keys can be very memory intensive if run concurrently.
 									limit := limiter.NewFixed(1)
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									return s.walkShards(shards, func(sh *Shard) error {
-												Limit delete to run one shard at a time

There was a change to speed up deleting and dropping measurements
that executed the deletes in parallel for all shards at once. #7015

When TSI was merged in #7618, the series keys passed into Shard.DeleteMeasurement
were removed and were expanded lower down.  This causes memory to blow up
when a delete across many shards occurs as we now expand the set of series
keys N times instead of just once as before.

While running the deletes in parallel would be ideal, there have been a number
of optimizations in the delete path that make running deletes serially pretty
good.  This change just limits the concurrency of the deletes which keeps memory
more stable.

											
										
										
											2017-07-25 22:20:52 +00:00
+										limit.Take()
 										defer limit.Release()
-												simplify packages under tsdb

											
										
										
											2018-01-21 17:41:27 +00:00
+										return sh.DeleteMeasurement([]byte(name))
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									})
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+								}
 								// filterShards returns a slice of shards where fn returns true
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
+								// for the shard. If the provided predicate is nil then all shards are returned.
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+								func (s *Store) filterShards(fn func(sh *Shard) bool) []*Shard {
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
+									var shards []*Shard
 									if fn == nil {
 										shards = make([]*Shard, 0, len(s.shards))
 										fn = func(*Shard) bool { return true }
 									} else {
 										shards = make([]*Shard, 0)
 									}
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									for _, sh := range s.shards {
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+										if fn(sh) {
 											shards = append(shards, sh)
-												add dedupe iterator

											
										
										
											2016-02-05 17:23:35 +00:00
+										}
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+									}
 									return shards
 								}
-												Wire up DROP retention policy to TSDB store.

Fixes #5653 and #5394.

Previously dropping retention policies did not propogate to local TSDB
shards. Instead, the retention policiess would just be removed from the
Meta Store.

This PR adds ensures that data associated with retention policies is
removed, when the retention policy is dropped.

Also, it cleans up a couple of other methods in `tsdb`, including the
requirement to provide (redundant) shardIDs when deleting databases.

											
										
										
											2016-02-15 13:00:58 +00:00
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// byDatabase provides a predicate for filterShards that matches on the name of
 								// the database passed in.
-												Tidy up

											
										
										
											2016-10-04 18:45:09 +00:00
+								func byDatabase(name string) func(sh *Shard) bool {
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									return func(sh *Shard) bool {
 										return sh.database == name
 									}
 								}
-												Fix data race when collecting sketches

											
										
										
											2018-02-15 11:16:32 +00:00
+								// walkShards apply a function to each shard in parallel. fn must be safe for
 								// concurrent use. If any of the functions return an error, the first error is
 								// returned.
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+								func (s *Store) walkShards(shards []*Shard, fn func(sh *Shard) error) error {
 									// struct to hold the result of opening each reader in a goroutine
 									type res struct {
 										err error
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									}
-												Simplify throttle type

											
										
										
											2016-07-15 18:14:25 +00:00
+									resC := make(chan res)
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+									var n int
 									for _, sh := range shards {
 										n++
 										go func(sh *Shard) {
 											if err := fn(sh); err != nil {
-												Simplify throttle type

											
										
										
											2016-07-15 18:14:25 +00:00
+												resC <- res{err: fmt.Errorf("shard %d: %s", sh.id, err)}
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+												return
 											}
-												Simplify throttle type

											
										
										
											2016-07-15 18:14:25 +00:00
+											resC <- res{}
-												Speed up drop measurement

Reduces the lock contention on the tsdb.Store by taking a short
read lock instead of a long write lock.  Also processes shards
in parallel instead of serially.

											
										
										
											2016-07-14 22:06:13 +00:00
+										}(sh)
 									}
 									var err error
 									for i := 0; i < n; i++ {
 										res := <-resC
 										if res.err != nil {
 											err = res.err
 										}
 									}
 									close(resC)
 									return err
-												Wire up DROP DATABASE query.
											
										
										
											2015-06-05 16:31:04 +00:00
+								}
-												Revert back to original approach

											
										
										
											2016-10-04 18:51:50 +00:00
+								// ShardIDs returns a slice of all ShardIDs under management.
 								func (s *Store) ShardIDs() []uint64 {
 									s.mu.RLock()
 									defer s.mu.RUnlock()
 									return s.shardIDs()
 								}
 								func (s *Store) shardIDs() []uint64 {
 									a := make([]uint64, 0, len(s.shards))
 									for shardID := range s.shards {
 										a = append(a, shardID)
 									}
 									return a
 								}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+								// shardsSlice returns an ordered list of shards.
 								func (s *Store) shardsSlice() []*Shard {
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									a := make([]*Shard, 0, len(s.shards))
 									for _, sh := range s.shards {
 										a = append(a, sh)
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
+									}
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+									sort.Sort(Shards(a))
 									return a
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
+								}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// Databases returns the names of all databases managed by the store.
-												Add inspect tool

Start of a lower-level file inspection tool.  This currently dumps
summary statistics for the shards, index and WAL that can be used to
understand the shape of the data is in the local shards.  This util
operates on the shards itself and not through the server and is intended
more for debugging/troubleshooting.

											
										
										
											2015-08-25 21:44:42 +00:00
+								func (s *Store) Databases() []string {
 									s.mu.RLock()
 									defer s.mu.RUnlock()
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									databases := make([]string, 0, len(s.databases))
 									for k, _ := range s.databases {
 										databases = append(databases, k)
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
+									}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									return databases
-												Update tx.go to work with tsdb

											
										
										
											2015-05-28 22:02:12 +00:00
+								}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// DiskSize returns the size of all the shard files in bytes.
 								// This size does not include the WAL size.
-												Add inspect tool

Start of a lower-level file inspection tool.  This currently dumps
summary statistics for the shards, index and WAL that can be used to
understand the shape of the data is in the local shards.  This util
operates on the shards itself and not through the server and is intended
more for debugging/troubleshooting.

											
										
										
											2015-08-25 21:44:42 +00:00
+								func (s *Store) DiskSize() (int64, error) {
 									var size int64
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
 									s.mu.RLock()
 									allShards := s.filterShards(nil)
 									s.mu.RUnlock()
 									for _, sh := range allShards {
 										sz, err := sh.DiskSize()
-												Add inspect tool

Start of a lower-level file inspection tool.  This currently dumps
summary statistics for the shards, index and WAL that can be used to
understand the shape of the data is in the local shards.  This util
operates on the shards itself and not through the server and is intended
more for debugging/troubleshooting.

											
										
										
											2015-08-25 21:44:42 +00:00
+										if err != nil {
 											return 0, err
 										}
 										size += sz
 									}
 									return size, nil
 								}
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+								// sketchesForDatabase returns merged sketches for the provided database, by
 								// walking each shard in the database and merging the sketches found there.
 								func (s *Store) sketchesForDatabase(dbName string, getSketches func(*Shard) (estimator.Sketch, estimator.Sketch, error)) (estimator.Sketch, estimator.Sketch, error) {
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+									var (
 										ss estimator.Sketch // Sketch estimating number of items.
 										ts estimator.Sketch // Sketch estimating number of tombstoned items.
 									)
-												Wire in HLL estimator

											
										
										
											2016-09-21 15:04:37 +00:00
+									s.mu.RLock()
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+									shards := s.filterShards(byDatabase(dbName))
-												Wire in HLL estimator

											
										
										
											2016-09-21 15:04:37 +00:00
+									s.mu.RUnlock()
-												Ensure nil sketches never returned

											
										
										
											2018-02-09 15:29:42 +00:00
+									// Never return nil sketches. In the case that db exists but no data written
 									// return empty sketches.
 									if len(shards) == 0 {
 										ss, ts = hll.NewDefaultPlus(), hll.NewDefaultPlus()
 									}
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+									// Iterate over all shards for the database and combine all of the sketches.
-												Wire in HLL estimator

											
										
										
											2016-09-21 15:04:37 +00:00
+									for _, shard := range shards {
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+										s, t, err := getSketches(shard)
-												Wire in HLL estimator

											
										
										
											2016-09-21 15:04:37 +00:00
+										if err != nil {
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+											return nil, nil, err
-												Wire in HLL estimator

											
										
										
											2016-09-21 15:04:37 +00:00
+										}
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+										if ss == nil {
 											ss, ts = s, t
 										} else if err = ss.Merge(s); err != nil {
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+											return nil, nil, err
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+										} else if err = ts.Merge(t); err != nil {
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+											return nil, nil, err
-												Wire in HLL estimator

											
										
										
											2016-09-21 15:04:37 +00:00
+										}
 									}
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+									return ss, ts, nil
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
+								}
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+								// SeriesCardinality returns the exact series cardinality for the provided
 								// database.
 								//
 								// Cardinality is calculated exactly by unioning all shards' bitsets of series
 								// IDs. The result of this method cannot be combined with any other results.
-												Use bitsets to calculate series cardinality

											
										
										
											2018-01-16 23:02:57 +00:00
+								//
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+								func (s *Store) SeriesCardinality(database string) (int64, error) {
-												Use bitsets to calculate series cardinality

											
										
										
											2018-01-16 23:02:57 +00:00
+									s.mu.RLock()
 									shards := s.filterShards(byDatabase(database))
 									s.mu.RUnlock()
-												Fix data race when collecting sketches

											
										
										
											2018-02-15 11:16:32 +00:00
+									var setMu sync.Mutex
-												Use bitsets to calculate series cardinality

											
										
										
											2018-01-16 23:02:57 +00:00
+									others := make([]*SeriesIDSet, 0, len(shards))
-												Fix data race when collecting sketches

											
										
										
											2018-02-15 11:16:32 +00:00
-												Use bitsets to calculate series cardinality

											
										
										
											2018-01-16 23:02:57 +00:00
+									s.walkShards(shards, func(sh *Shard) error {
 										index, err := sh.Index()
 										if err != nil {
 											return err
 										}
-												Add SeriesIDSet() to Index interface

											
										
										
											2018-05-15 21:57:37 +00:00
+										seriesIDs := index.SeriesIDSet()
 										setMu.Lock()
 										others = append(others, seriesIDs)
 										setMu.Unlock()
-												Use bitsets to calculate series cardinality

											
										
										
											2018-01-16 23:02:57 +00:00
+										return nil
 									})
 									ss := NewSeriesIDSet()
 									ss.Merge(others...)
 									return int64(ss.Cardinality()), nil
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+								}
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+								// SeriesSketches returns the sketches associated with the series data in all
 								// the shards in the provided database.
 								//
 								// The returned sketches can be combined with other sketches to provide an
 								// estimation across distributed databases.
 								func (s *Store) SeriesSketches(database string) (estimator.Sketch, estimator.Sketch, error) {
 									return s.sketchesForDatabase(database, func(sh *Shard) (estimator.Sketch, estimator.Sketch, error) {
 										if sh == nil {
 											return nil, nil, errors.New("shard nil, can't get cardinality")
 										}
 										return sh.SeriesSketches()
 									})
 								}
 								// MeasurementsCardinality returns an estimation of the measurement cardinality
 								// for the provided database.
-												Use bitsets to calculate series cardinality

											
										
										
											2018-01-16 23:02:57 +00:00
+								//
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+								// Cardinality is calculated using a sketch-based estimation. The result of this
 								// method cannot be combined with any other results.
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
+								func (s *Store) MeasurementsCardinality(database string) (int64, error) {
-												Add empty series sketches back to tsi1 index

This commit adds initial empty sketches back to the tsi1 index, as well
as ensuring that ephemeral sketches in the index `LogFile` are updated
accordingly.

The commit also adds a test that verifies that the merged sketches at
the store level produce the correct results under writes, deletions and
re-opening of the store.

This commit does not provide working sketches for post-compaction on the
tsi1 index.

											
										
										
											2018-02-01 16:20:52 +00:00
+									ss, ts, err := s.sketchesForDatabase(database, func(sh *Shard) (estimator.Sketch, estimator.Sketch, error) {
 										if sh == nil {
 											return nil, nil, errors.New("shard nil, can't get cardinality")
 										}
 										return sh.MeasurementsSketches()
 									})
 									if err != nil {
 										return 0, err
 									}
 									return int64(ss.Count() - ts.Count()), nil
 								}
 								// MeasurementsSketches returns the sketches associated with the measurement
 								// data in all the shards in the provided database.
 								//
 								// The returned sketches can be combined with other sketches to provide an
 								// estimation across distributed databases.
 								func (s *Store) MeasurementsSketches(database string) (estimator.Sketch, estimator.Sketch, error) {
 									return s.sketchesForDatabase(database, func(sh *Shard) (estimator.Sketch, estimator.Sketch, error) {
-												Prevent panic when shard nil

											
										
										
											2016-09-28 12:44:57 +00:00
+										if sh == nil {
 											return nil, nil, errors.New("shard nil, can't get cardinality")
 										}
-												Fixes #8138

											
										
										
											2017-03-15 12:16:28 +00:00
+										return sh.MeasurementsSketches()
-												Sketches working

											
										
										
											2016-09-23 13:33:47 +00:00
+									})
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
+								}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// BackupShard will get the shard and have the engine backup since the passed in
 								// time to the writer.
-												Implement backup/restore for TSM.

This changes backup and restore to work for TSM. It breaks it for b1 and bz1, but since those are getting removed it's ok.

The backup runs against any host that is specified and can backup either the metasstore, a database, specific retention policy, or a specific shard. It can also take incremental backups with the `since` flag, which will only backup TSM files that have been created since that timestamp.

The backup is safe to run online. However, for shards that are still hot for writes, they won't be able to create new TSM files while the backup for that single shard runs. If the backup isn't too large and the write throughput isn't too high this shouldn't be a problem since the writes will just go into the WAL cache.

											
										
										
											2015-12-25 13:23:22 +00:00
+								func (s *Store) BackupShard(id uint64, since time.Time, w io.Writer) error {
 									shard := s.Shard(id)
 									if shard == nil {
 										return fmt.Errorf("shard %d doesn't exist on this server", id)
 									}
-												Tag TSM stats with database, retention policy

... by extracting the db/rp from the given path.

Now that the code has "standardized" on extracting db/rp this way, the
ShardLocation struct is no longer necessary and thus has been removed.
We're back on the previous style of passing the path and walPath to
NewShard.

											
										
										
											2016-02-26 19:41:54 +00:00
+									path, err := relativePath(s.path, shard.path)
-												Implement backup/restore for TSM.

This changes backup and restore to work for TSM. It breaks it for b1 and bz1, but since those are getting removed it's ok.

The backup runs against any host that is specified and can backup either the metasstore, a database, specific retention policy, or a specific shard. It can also take incremental backups with the `since` flag, which will only backup TSM files that have been created since that timestamp.

The backup is safe to run online. However, for shards that are still hot for writes, they won't be able to create new TSM files while the backup for that single shard runs. If the backup isn't too large and the write throughput isn't too high this shouldn't be a problem since the writes will just go into the WAL cache.

											
										
										
											2015-12-25 13:23:22 +00:00
+									if err != nil {
 										return err
 									}
-												Prevent store from directly accessing Shard's engine

											
										
										
											2017-09-19 12:33:45 +00:00
+									return shard.Backup(w, path, since)
-												Implement backup/restore for TSM.

This changes backup and restore to work for TSM. It breaks it for b1 and bz1, but since those are getting removed it's ok.

The backup runs against any host that is specified and can backup either the metasstore, a database, specific retention policy, or a specific shard. It can also take incremental backups with the `since` flag, which will only backup TSM files that have been created since that timestamp.

The backup is safe to run online. However, for shards that are still hot for writes, they won't be able to create new TSM files while the backup for that single shard runs. If the backup isn't too large and the write throughput isn't too high this shouldn't be a problem since the writes will just go into the WAL cache.

											
										
										
											2015-12-25 13:23:22 +00:00
+								}
-												Pulled in backup-relevant code for review (#9193)

for issue #8879 
											
										
										
											2017-12-07 16:35:20 +00:00
+								func (s *Store) ExportShard(id uint64, start time.Time, end time.Time, w io.Writer) error {
 									shard := s.Shard(id)
 									if shard == nil {
 										return fmt.Errorf("shard %d doesn't exist on this server", id)
 									}
 									path, err := relativePath(s.path, shard.path)
 									if err != nil {
 										return err
 									}
 									return shard.Export(w, path, start, end)
 								}
-												added baseline backup/restore plumbing

											
										
										
											2016-04-29 00:29:09 +00:00
+								// RestoreShard restores a backup from r to a given shard.
 								// This will only overwrite files included in the backup.
 								func (s *Store) RestoreShard(id uint64, r io.Reader) error {
 									shard := s.Shard(id)
 									if shard == nil {
 										return fmt.Errorf("shard %d doesn't exist on this server", id)
 									}
 									path, err := relativePath(s.path, shard.path)
 									if err != nil {
 										return err
 									}
 									return shard.Restore(r, path)
 								}
-												Add Store.Import capability

This allows the contents of a backup to be imported into a shard without
requiring the whole shard to be replaced.

											
										
										
											2017-04-26 23:16:59 +00:00
+								// ImportShard imports the contents of r to a given shard.
 								// All files in the backup are added as new files which may
 								// cause duplicated data to occur requiring more expensive
 								// compactions.
 								func (s *Store) ImportShard(id uint64, r io.Reader) error {
 									shard := s.Shard(id)
 									if shard == nil {
 										return fmt.Errorf("shard %d doesn't exist on this server", id)
 									}
 									path, err := relativePath(s.path, shard.path)
 									if err != nil {
 										return err
 									}
 									return shard.Import(r, path)
 								}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// ShardRelativePath will return the relative path to the shard, i.e.,
 								// <database>/<retention>/<id>.
-												Implement backup/restore for TSM.

This changes backup and restore to work for TSM. It breaks it for b1 and bz1, but since those are getting removed it's ok.

The backup runs against any host that is specified and can backup either the metasstore, a database, specific retention policy, or a specific shard. It can also take incremental backups with the `since` flag, which will only backup TSM files that have been created since that timestamp.

The backup is safe to run online. However, for shards that are still hot for writes, they won't be able to create new TSM files while the backup for that single shard runs. If the backup isn't too large and the write throughput isn't too high this shouldn't be a problem since the writes will just go into the WAL cache.

											
										
										
											2015-12-25 13:23:22 +00:00
+								func (s *Store) ShardRelativePath(id uint64) (string, error) {
 									shard := s.Shard(id)
 									if shard == nil {
 										return "", fmt.Errorf("shard %d doesn't exist on this server", id)
 									}
-												Tag TSM stats with database, retention policy

... by extracting the db/rp from the given path.

Now that the code has "standardized" on extracting db/rp this way, the
ShardLocation struct is no longer necessary and thus has been removed.
We're back on the previous style of passing the path and walPath to
NewShard.

											
										
										
											2016-02-26 19:41:54 +00:00
+									return relativePath(s.path, shard.path)
-												Implement backup/restore for TSM.

This changes backup and restore to work for TSM. It breaks it for b1 and bz1, but since those are getting removed it's ok.

The backup runs against any host that is specified and can backup either the metasstore, a database, specific retention policy, or a specific shard. It can also take incremental backups with the `since` flag, which will only backup TSM files that have been created since that timestamp.

The backup is safe to run online. However, for shards that are still hot for writes, they won't be able to create new TSM files while the backup for that single shard runs. If the backup isn't too large and the write throughput isn't too high this shouldn't be a problem since the writes will just go into the WAL cache.

											
										
										
											2015-12-25 13:23:22 +00:00
+								}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// DeleteSeries loops through the local shards and deletes the series data for
 								// the passed in series keys.
-												Return to original DELETE/DROP SERIES semantics

This reverts commit 59afd8cc9055023e611bb52b5a1d0059bb3eb262.

											
										
										
											2018-01-03 12:11:17 +00:00
+								func (s *Store) DeleteSeries(database string, sources []influxql.Source, condition influxql.Expr) error {
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									// Expand regex expressions in the FROM clause.
 									a, err := s.ExpandSources(sources)
 									if err != nil {
 										return err
-												simplify packages under tsdb

											
										
										
											2018-01-21 17:41:27 +00:00
+									} else if len(sources) > 0 && len(a) == 0 {
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										return nil
 									}
 									sources = a
-												add DELETE query support

This commit adds query language support for deleting series with a
`DELETE` query.

											
										
										
											2016-04-26 21:43:10 +00:00
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									// Determine deletion time range.
-												Remove time comparisons from the inner sections of the storage engine

											
										
										
											2017-08-16 15:55:41 +00:00
+									condition, timeRange, err := influxql.ConditionExpr(condition, nil)
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									if err != nil {
 										return err
 									}
-												fix #4303: don't drop from multiple databases

											
										
										
											2015-12-12 04:36:34 +00:00
-												Remove TimeRange function and replace with a more accurate ConditionExpr function

The ConditionExpr function is more accurate because it parses the
condition and ensures that time conditions are actually used correctly.
That means that attempting to combine conditions with OR will not result
in the query silently pretending it's an AND and nested conditions work
correctly so there is only one way to read the query.

It also extracts the non-time conditions into a separate condition so we
can stop attempting to parse around the time conditions in lower layers
of the storage engine. This change does not remove those hacks, but a
following commit should be able to sanitize the condition and remove
them.

											
										
										
											2017-08-16 15:33:00 +00:00
+									var min, max int64
 									if !timeRange.Min.IsZero() {
 										min = timeRange.Min.UnixNano()
 									} else {
 										min = influxql.MinTime
 									}
 									if !timeRange.Max.IsZero() {
 										max = timeRange.Max.UnixNano()
 									} else {
 										max = influxql.MaxTime
 									}
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									s.mu.RLock()
-												Fix race on sfiles in Store

											
										
										
											2017-11-27 15:41:16 +00:00
+									sfile := s.sfiles[database]
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									if sfile == nil {
-												Fix deadlock in DeleteSeries

The Store.Delete series held an RLock while deleting from each shard.
While deleting, the Engine uses shardSet to see if a series is fully
deleted.  The shardSet.ForEach also takes and RLock.  If a Lock is
requested between these two calls, a deadlock occurs.

To fix, we don't need to hold an RLock for the duration of the delete
in the store as each Shard handles concurrency itself and we have a
snapshot of the shards we need to access.

											
										
										
											2018-01-17 17:28:21 +00:00
+										s.mu.RUnlock()
-												Don't return error for non-existent series file

When dropping series, if the series file does not exists we returned
and error.  This breaks compatibility with prior versions that would
not return an error if the series do not exists.

											
										
										
											2018-01-14 19:53:26 +00:00
+										// No series file means nothing has been written to this DB and thus nothing to delete.
 										return nil
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									}
-												Fix race on sfiles in Store

											
										
										
											2017-11-27 15:41:16 +00:00
+									shards := s.filterShards(byDatabase(database))
-												Fix deadlock in DeleteSeries

The Store.Delete series held an RLock while deleting from each shard.
While deleting, the Engine uses shardSet to see if a series is fully
deleted.  The shardSet.ForEach also takes and RLock.  If a Lock is
requested between these two calls, a deadlock occurs.

To fix, we don't need to hold an RLock for the duration of the delete
in the store as each Shard handles concurrency itself and we have a
snapshot of the shards we need to access.

											
										
										
											2018-01-17 17:28:21 +00:00
+									s.mu.RUnlock()
-												Fix race on sfiles in Store

											
										
										
											2017-11-27 15:41:16 +00:00
-												Limit delete to run one shard at a time

There was a change to speed up deleting and dropping measurements
that executed the deletes in parallel for all shards at once. #7015

When TSI was merged in #7618, the series keys passed into Shard.DeleteMeasurement
were removed and were expanded lower down.  This causes memory to blow up
when a delete across many shards occurs as we now expand the set of series
keys N times instead of just once as before.

While running the deletes in parallel would be ideal, there have been a number
of optimizations in the delete path that make running deletes serially pretty
good.  This change just limits the concurrency of the deletes which keeps memory
more stable.

											
										
										
											2017-07-25 22:20:52 +00:00
+									// Limit to 1 delete for each shard since expanding the measurement into the list
 									// of series keys can be very memory intensive if run concurrently.
 									limit := limiter.NewFixed(1)
-												Fix tsi1 series deletion.

											
										
										
											2016-11-30 19:45:14 +00:00
+									return s.walkShards(shards, func(sh *Shard) error {
 										// Determine list of measurements from sources.
 										// Use all measurements if no FROM clause was provided.
 										var names []string
 										if len(sources) > 0 {
 											for _, source := range sources {
 												names = append(names, source.(*influxql.Measurement).Name)
-												refactor query executor

This commit moves the `QueryExecutor` to the `cluster` package
and provides an interface to it inside the `influxql` package.

											
										
										
											2016-02-12 22:10:02 +00:00
+											}
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										} else {
-												Prevent store from directly accessing Shard's engine

											
										
										
											2017-09-19 12:33:45 +00:00
+											if err := sh.ForEachMeasurementName(func(name []byte) error {
-												Fix tsi1 series deletion.

											
										
										
											2016-11-30 19:45:14 +00:00
+												names = append(names, string(name))
 												return nil
 											}); err != nil {
 												return err
 											}
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										}
-												Fix tsi1 series deletion.

											
										
										
											2016-11-30 19:45:14 +00:00
+										sort.Strings(names)
-												refactor query executor

This commit moves the `QueryExecutor` to the `cluster` package
and provides an interface to it inside the `influxql` package.

											
										
										
											2016-02-12 22:10:02 +00:00
-												Limit delete to run one shard at a time

There was a change to speed up deleting and dropping measurements
that executed the deletes in parallel for all shards at once. #7015

When TSI was merged in #7618, the series keys passed into Shard.DeleteMeasurement
were removed and were expanded lower down.  This causes memory to blow up
when a delete across many shards occurs as we now expand the set of series
keys N times instead of just once as before.

While running the deletes in parallel would be ideal, there have been a number
of optimizations in the delete path that make running deletes serially pretty
good.  This change just limits the concurrency of the deletes which keeps memory
more stable.

											
										
										
											2017-07-25 22:20:52 +00:00
+										limit.Take()
 										defer limit.Release()
-												Fix shard races when accessing index

											
										
										
											2017-12-15 17:54:58 +00:00
+										index, err := sh.Index()
 										if err != nil {
 											return err
 										}
 										indexSet := IndexSet{Indexes: []Index{index}, SeriesFile: sfile}
-												Fix tsi1 series deletion.

											
										
										
											2016-11-30 19:45:14 +00:00
+										// Find matching series keys for each measurement.
 										for _, name := range names {
-												intermediate: tsdb compile

											
										
										
											2017-11-29 18:20:18 +00:00
+											itr, err := indexSet.MeasurementSeriesByExprIterator([]byte(name), condition)
-												Fix tsi1 series deletion.

											
										
										
											2016-11-30 19:45:14 +00:00
+											if err != nil {
 												return err
-												Use MeasurementSeriesKeysByExprIterator for deletes

											
										
										
											2017-10-31 16:03:51 +00:00
+											} else if itr == nil {
 												continue
-												Fix tsi1 series deletion.

											
										
										
											2016-11-30 19:45:14 +00:00
+											}
-												intermediate: tsdb compile

											
										
										
											2017-11-29 18:20:18 +00:00
+											defer itr.Close()
-												Return to original DELETE/DROP SERIES semantics

This reverts commit 59afd8cc9055023e611bb52b5a1d0059bb3eb262.

											
										
										
											2018-01-03 12:11:17 +00:00
+											if err := sh.DeleteSeriesRange(NewSeriesIteratorAdapter(sfile, itr), min, max); err != nil {
-												Make DeleteSeriesRange take SeriesIterator

											
										
										
											2017-10-31 18:18:34 +00:00
+												return err
-												Use MeasurementSeriesKeysByExprIterator for deletes

											
										
										
											2017-10-31 16:03:51 +00:00
+											}
-												Remove index meta data when series and measurements are gone

This remove the dropMeta param from the tsdb.Store.DeleteSeries and
lets the shard determine when to remove the meta data from the index
based on what series still have data in the shard.

This uncovered a nasty bug in compactions where a fully deleted series would
prematurely end the compactions and not carry forward the rest of the data
in the TSM file.  This is now fixed as well.

											
										
										
											2016-04-29 22:31:57 +00:00
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										}
-												Use MeasurementSeriesKeysByExprIterator for deletes

											
										
										
											2017-10-31 16:03:51 +00:00
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										return nil
 									})
-												Wire up DROP MEASUREMENT

* Add deleteMeasurement to store and shard
* Add DropMeasurement to DatabaseIndex
* Update ErrMeasurementNotFound and ErrDatabaseNotFound to not include the first line of the stack trace.
											
										
										
											2015-06-03 15:32:50 +00:00
+								}
-												add support for remote expansion of regex

This commit moves the `tsdb.Store.ExpandSources()` function onto
the `influxql.IteratorCreator` and provides support for issuing
source expansion across a cluster.

											
										
										
											2016-03-04 18:01:41 +00:00
+								// ExpandSources expands sources against all local shards.
-												refactor query engine

											
										
										
											2015-11-04 21:06:06 +00:00
+								func (s *Store) ExpandSources(sources influxql.Sources) (influxql.Sources, error) {
-												Support subquery execution in the query language

This adds query syntax support for subqueries and adds support to the
query engine to execute queries on subqueries.

Subqueries act as a source for another query. It is the equivalent of
writing the results of a query to a temporary database, executing
a query on that temporary database, and then deleting the database
(except this is all performed in-memory).

The syntax is like this:

    SELECT sum(derivative) FROM (SELECT derivative(mean(value)) FROM cpu GROUP BY *)

This will execute derivative and then sum the result of those derivatives.
Another example:

    SELECT max(min) FROM (SELECT min(value) FROM cpu GROUP BY host)

This would let you find the maximum minimum value of each host.

There is complete freedom to mix subqueries with auxiliary fields. The only
caveat is that the following two queries:

    SELECT mean(value) FROM cpu
    SELECT mean(value) FROM (SELECT value FROM cpu)

Have different performance characteristics. The first will calculate
`mean(value)` at the shard level and will be faster, especially when it comes to
clustered setups. The second will process the mean at the top level and will not
include that optimization.

											
										
										
											2016-11-23 20:32:42 +00:00
+									shards := func() Shards {
 										s.mu.RLock()
 										defer s.mu.RUnlock()
 										return Shards(s.shardsSlice())
 									}()
 									return shards.ExpandSources(sources)
-												Cleanup QueryExecutor and split statement execution code

The QueryExecutor had a lot of dead code made obsolete by the query
engine refactor that has now been removed. The TSDBStore interface has
also been cleaned up so we can have multiple implementations of this
(such as a local and remote version).

A StatementExecutor interface has been created for adding custom
functionality to the QueryExecutor that may not be available in the open
source version. The QueryExecutor delegate all statement execution to
the StatementExecutor and the QueryExecutor will only keep track of
housekeeping. Implementing additional queries is as simple as wrapping
the cluster.StatementExecutor struct or replacing it with something
completely different.

The PointsWriter in the QueryExecutor has been changed to a simple
interface that implements the one method needed by the query executor.
This is to allow different PointsWriter implementations to be used by
the QueryExecutor. It has also been moved into the StatementExecutor
instead.

The TSDBStore interface has now been modified to contain the code for
creating an IteratorCreator. This is so the underlying TSDBStore can
implement different ways of accessing the underlying shards rather than
always having to access each shard individually (such as batch
requests).

Remove the show servers handling. This isn't a valid command in the open
source version of InfluxDB anymore.

The QueryManager interface is now built into QueryExecutor and is no
longer necessary. The StatementExecutor and QueryExecutor split allows
task management to much more easily be built into QueryExecutor rather
than as a separate struct.

											
										
										
											2016-03-31 22:12:29 +00:00
+								}
-												Lint tsdb and tsdb/engine package

											
										
										
											2016-02-10 20:04:18 +00:00
+								// WriteToShard writes a list of points to a shard identified by its ID.
-												refactor Points and Rows to dedicated packages

											
										
										
											2015-09-16 20:33:08 +00:00
+								func (s *Store) WriteToShard(shardID uint64, points []models.Point) error {
-												fix data race in WriteToShard

											
										
										
											2015-06-03 17:46:18 +00:00
+									s.mu.RLock()
-												Fix panic: assignment to entry in nil map

Closing the store did not properly return an error for in-flight
writes because the closing channel was set to nil when closed.  A
nil channel is not selectable so writes continue on past the guard
checks and trigger panics.

											
										
										
											2015-10-06 20:00:31 +00:00
 									select {
 									case <-s.closing:
-												Fix race on measurementFields

Both Shard and Engine had the same reference to the measurementField map,
but they each protected it with their own locks.  This causes a race when
write and queries are occurring because writes can add new fields to the
map while queries are reading from it.

The fix moves the ownership to the Engine and provides protected accessors
to that Shard now users.  For the most parts, the access on shard were old
dead code.

Fixing the measurementFields map race created a new race on the internal
fields map.  This is now unexported and protected via MeasurementFields
exported funcs.

Fixes #6188

											
										
										
											2016-04-01 19:30:09 +00:00
+										s.mu.RUnlock()
-												Fix panic: assignment to entry in nil map

Closing the store did not properly return an error for in-flight
writes because the closing channel was set to nil when closed.  A
nil channel is not selectable so writes continue on past the guard
checks and trigger panics.

											
										
										
											2015-10-06 20:00:31 +00:00
+										return ErrStoreClosed
 									default:
 									}
-												models: Add FieldIterator type

The FieldIterator is used to scan over the fields of a point, providing
information, and delaying parsing/decoding the value until it is needed.
This change uses this new type to avoid the allocation of a map for the
fields which is then thrown away as soon as the points get converted
into columns within the datastore.

											
										
										
											2016-09-21 17:12:09 +00:00
+									sh := s.shards[shardID]
 									if sh == nil {
-												Fix race on measurementFields

Both Shard and Engine had the same reference to the measurementField map,
but they each protected it with their own locks.  This causes a race when
write and queries are occurring because writes can add new fields to the
map while queries are reading from it.

The fix moves the ownership to the Engine and provides protected accessors
to that Shard now users.  For the most parts, the access on shard were old
dead code.

Fixing the measurementFields map race created a new race on the internal
fields map.  This is now unexported and protected via MeasurementFields
exported funcs.

Fixes #6188

											
										
										
											2016-04-01 19:30:09 +00:00
+										s.mu.RUnlock()
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+										return ErrShardNotFound
 									}
-												Fix race on measurementFields

Both Shard and Engine had the same reference to the measurementField map,
but they each protected it with their own locks.  This causes a race when
write and queries are occurring because writes can add new fields to the
map while queries are reading from it.

The fix moves the ownership to the Engine and provides protected accessors
to that Shard now users.  For the most parts, the access on shard were old
dead code.

Fixing the measurementFields map race created a new race on the internal
fields map.  This is now unexported and protected via MeasurementFields
exported funcs.

Fixes #6188

											
										
										
											2016-04-01 19:30:09 +00:00
+									s.mu.RUnlock()
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
-												Re-enable compactions during writes

A cold shard that suddenly receives a lot of writes could get a very
big cache that takes a long time to snapshot or causes the cache
max memory limit to be hit more quickly.  This re-enables the compactions
if necessary during writes so we don't have to wait for the shard monitor
goroutine to re-enable them.

											
										
										
											2017-08-31 14:10:32 +00:00
+									// Ensure snapshot compactions are enabled since the shard might have been cold
 									// and disabled by the monitor.
 									if sh.IsIdle() {
 										sh.SetCompactionsEnabled(true)
 									}
-												Add database, RP as tags on shard stats

This commit updates tsdb.Shard to contain a ShardConfig and updates
tsdb.Store to directly reference a map of tsdb.Shard rather than the
previous tsdb.shardLocation abstraction.

											
										
										
											2016-02-23 20:07:21 +00:00
+									return sh.WritePoints(points)
-												Move data.Node to tsdb.Store. Move data to cluster.
											
										
										
											2015-05-26 19:56:54 +00:00
+								}
-												Fix measurement system queries.

											
										
										
											2016-12-05 17:51:06 +00:00
+								// MeasurementNames returns a slice of all measurements. Measurements accepts an
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// optional condition expression. If cond is nil, then all measurements for the
 								// database will be returned.
-												Add FGA support to SHOW MEASUREMENTS

											
										
										
											2017-11-15 15:48:23 +00:00
+								func (s *Store) MeasurementNames(auth query.Authorizer, database string, cond influxql.Expr) ([][]byte, error) {
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									s.mu.RLock()
 									shards := s.filterShards(byDatabase(database))
 									s.mu.RUnlock()
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
-												Fix series file removal after DROP DATABASE.

											
										
										
											2018-01-08 18:34:04 +00:00
+									sfile := s.seriesFile(database)
 									if sfile == nil {
 										return nil, nil
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									}
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									// Build indexset.
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+									is := IndexSet{Indexes: make([]Index, 0, len(shards)), SeriesFile: sfile}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+									for _, sh := range shards {
-												Fix shard races when accessing index

											
										
										
											2017-12-15 17:54:58 +00:00
+										index, err := sh.Index()
 										if err != nil {
 											return nil, err
-												Ensure MeasurementNames deduplicates measurements across shards

											
										
										
											2017-04-06 18:17:29 +00:00
+										}
-												Fix shard races when accessing index

											
										
										
											2017-12-15 17:54:58 +00:00
+										is.Indexes = append(is.Indexes, index)
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+									}
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									is = is.DedupeInmemIndexes()
-												Merge master into branch

											
										
										
											2017-12-08 17:11:07 +00:00
+									return is.MeasurementNamesByExpr(auth, cond)
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+								}
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
+								// MeasurementSeriesCounts returns the number of measurements and series in all
 								// the shards' indices.
 								func (s *Store) MeasurementSeriesCounts(database string) (measuments int, series int) {
 									// TODO: implement me
 									return 0, 0
 								}
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+								type TagKeys struct {
 									Measurement string
 									Keys        []string
 								}
 								type TagKeysSlice []TagKeys
 								func (a TagKeysSlice) Len() int           { return len(a) }
 								func (a TagKeysSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 								func (a TagKeysSlice) Less(i, j int) bool { return a[i].Measurement < a[j].Measurement }
 								// TagKeys returns the tag keys in the given database, matching the condition.
 								func (s *Store) TagKeys(auth query.Authorizer, shardIDs []uint64, cond influxql.Expr) ([]TagKeys, error) {
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+									if len(shardIDs) == 0 {
 										return nil, nil
 									}
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+									measurementExpr := influxql.CloneExpr(cond)
 									measurementExpr = influxql.Reduce(influxql.RewriteExpr(measurementExpr, func(e influxql.Expr) influxql.Expr {
 										switch e := e.(type) {
 										case *influxql.BinaryExpr:
 											switch e.Op {
 											case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
 												tag, ok := e.LHS.(*influxql.VarRef)
 												if !ok || tag.Val != "_name" {
 													return nil
 												}
 											}
 										}
 										return e
 									}), nil)
 									filterExpr := influxql.CloneExpr(cond)
 									filterExpr = influxql.Reduce(influxql.RewriteExpr(filterExpr, func(e influxql.Expr) influxql.Expr {
 										switch e := e.(type) {
 										case *influxql.BinaryExpr:
 											switch e.Op {
 											case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
 												tag, ok := e.LHS.(*influxql.VarRef)
 												if !ok || strings.HasPrefix(tag.Val, "_") {
 													return nil
 												}
 											}
 										}
 										return e
 									}), nil)
 									// Get all the shards we're interested in.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+									is := IndexSet{Indexes: make([]Index, 0, len(shardIDs))}
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+									s.mu.RLock()
 									for _, sid := range shardIDs {
 										shard, ok := s.shards[sid]
 										if !ok {
-												Skip shards we don't have

											
										
										
											2017-11-08 13:33:52 +00:00
+											continue
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+										}
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
 										if is.SeriesFile == nil {
 											is.SeriesFile = shard.sfile
 										}
 										is.Indexes = append(is.Indexes, shard.index)
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+									}
 									s.mu.RUnlock()
 									// Determine list of measurements.
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									is = is.DedupeInmemIndexes()
-												Merge master into branch

											
										
										
											2017-12-08 17:11:07 +00:00
+									names, err := is.MeasurementNamesByExpr(nil, measurementExpr)
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									if err != nil {
 										return nil, err
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+									}
 									// Iterate over each measurement.
 									var results []TagKeys
 									for _, name := range names {
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										// Build keyset over all indexes for measurement.
-												Merge master into branch

											
										
										
											2017-12-08 17:11:07 +00:00
+										tagKeySet, err := is.MeasurementTagKeysByExpr(name, nil)
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										if err != nil {
 											return nil, err
-												Merge master into branch

											
										
										
											2017-12-08 17:11:07 +00:00
+										} else if len(tagKeySet) == 0 {
 											continue
 										}
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+										keys := make([]string, 0, len(tagKeySet))
-												Merge master into branch

											
										
										
											2017-12-08 17:11:07 +00:00
+										// If no tag value filter is present then all the tag keys can be returned
 										// If they have authorized series associated with them.
 										if filterExpr == nil {
 											for tagKey := range tagKeySet {
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+												ok, err := is.TagKeyHasAuthorizedSeries(auth, []byte(name), []byte(tagKey))
 												if err != nil {
 													return nil, err
 												} else if ok {
 													keys = append(keys, tagKey)
-												Improve performance of TagKeys

											
										
										
											2017-11-20 22:17:12 +00:00
+												}
 											}
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+											sort.Strings(keys)
 											// Add to resultset.
 											results = append(results, TagKeys{
 												Measurement: string(name),
 												Keys:        keys,
 											})
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+											continue
 										}
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+										// Tag filter provided so filter keys first.
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										// Sort the tag keys.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+										for k := range tagKeySet {
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+											keys = append(keys, k)
 										}
 										sort.Strings(keys)
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										// Filter against tag values, skip if no values exist.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+										values, err := is.MeasurementTagKeyValuesByExpr(auth, name, keys, filterExpr, true)
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										if err != nil {
 											return nil, err
 										}
-												Optimise TagKeys and fix duplication bug

											
										
										
											2018-01-05 12:51:21 +00:00
+										// Filter final tag keys using the matching values. If a key has one or
 										// more matching values then it will be included in the final set.
 										finalKeys := keys[:0] // Use same backing array as keys to save allocation.
 										for i, k := range keys {
 											if len(values[i]) > 0 {
 												// Tag key k has one or more matching tag values.
 												finalKeys = append(finalKeys, k)
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+											}
 										}
 										// Add to resultset.
 										results = append(results, TagKeys{
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+											Measurement: string(name),
-												Optimise TagKeys and fix duplication bug

											
										
										
											2018-01-05 12:51:21 +00:00
+											Keys:        finalKeys,
-												Improve SHOW TAG KEYS performance.

											
										
										
											2017-11-06 15:31:04 +00:00
+										})
 									}
 									return results, nil
 								}
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+								type TagValues struct {
 									Measurement string
 									Values      []KeyValue
 								}
-												Fix SHOW TAG VALUES deduplication.

											
										
										
											2017-06-01 21:35:56 +00:00
+								type TagValuesSlice []TagValues
 								func (a TagValuesSlice) Len() int           { return len(a) }
 								func (a TagValuesSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 								func (a TagValuesSlice) Less(i, j int) bool { return a[i].Measurement < a[j].Measurement }
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+								// tagValues is a temporary representation of a TagValues. Rather than allocating
 								// KeyValues as we build up a TagValues object, We hold off allocating KeyValues
 								// until we have merged multiple tagValues together.
 								type tagValues struct {
 									name   []byte
 									keys   []string
 									values [][]string
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
+								}
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+								// Is a slice of tagValues that can be sorted by measurement.
 								type tagValuesSlice []tagValues
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+								func (a tagValuesSlice) Len() int           { return len(a) }
 								func (a tagValuesSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 								func (a tagValuesSlice) Less(i, j int) bool { return bytes.Compare(a[i].name, a[j].name) == -1 }
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
-												Support WHERE time clause in SHOW TAG VALUES

This commit adds time support to SHOW TAG VALUES. Time can be used as
both a lower and upper boundary. However, there are some caveats.

For the `inmem` index, filtering by time will still return all results
because the index data is shared across shards.

For the `tsi1` index, filtering by time will only work down to the shard
lever. Specifically, when querying by time all shards within that time
range will be used to generate the results.

											
										
										
											2017-11-03 16:53:23 +00:00
+								// TagValues returns the tag keys and values for the provided shards, where the
 								// tag values satisfy the provided condition.
 								func (s *Store) TagValues(auth query.Authorizer, shardIDs []uint64, cond influxql.Expr) ([]TagValues, error) {
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									if cond == nil {
 										return nil, errors.New("a condition is required")
 									}
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									measurementExpr := influxql.CloneExpr(cond)
 									measurementExpr = influxql.Reduce(influxql.RewriteExpr(measurementExpr, func(e influxql.Expr) influxql.Expr {
 										switch e := e.(type) {
 										case *influxql.BinaryExpr:
 											switch e.Op {
 											case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
 												tag, ok := e.LHS.(*influxql.VarRef)
 												if !ok || tag.Val != "_name" {
 													return nil
-												intermediate

											
										
										
											2016-11-08 21:07:01 +00:00
+												}
 											}
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										}
 										return e
 									}), nil)
-												Initialise index in shards

											
										
										
											2016-09-01 12:40:16 +00:00
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									filterExpr := influxql.CloneExpr(cond)
 									filterExpr = influxql.Reduce(influxql.RewriteExpr(filterExpr, func(e influxql.Expr) influxql.Expr {
 										switch e := e.(type) {
 										case *influxql.BinaryExpr:
 											switch e.Op {
 											case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
 												tag, ok := e.LHS.(*influxql.VarRef)
 												if !ok || strings.HasPrefix(tag.Val, "_") {
 													return nil
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+												}
 											}
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										}
 										return e
 									}), nil)
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
-												Fixing more tests.

											
										
										
											2017-12-04 17:29:04 +00:00
+									// Build index set to work on.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+									is := IndexSet{Indexes: make([]Index, 0, len(shardIDs))}
-												Fix measurement system queries.

											
										
										
											2016-12-05 17:51:06 +00:00
+									s.mu.RLock()
-												Support WHERE time clause in SHOW TAG VALUES

This commit adds time support to SHOW TAG VALUES. Time can be used as
both a lower and upper boundary. However, there are some caveats.

For the `inmem` index, filtering by time will still return all results
because the index data is shared across shards.

For the `tsi1` index, filtering by time will only work down to the shard
lever. Specifically, when querying by time all shards within that time
range will be used to generate the results.

											
										
										
											2017-11-03 16:53:23 +00:00
+									for _, sid := range shardIDs {
 										shard, ok := s.shards[sid]
 										if !ok {
-												Skip shards we don't have

											
										
										
											2017-11-08 13:33:52 +00:00
+											continue
-												Support WHERE time clause in SHOW TAG VALUES

This commit adds time support to SHOW TAG VALUES. Time can be used as
both a lower and upper boundary. However, there are some caveats.

For the `inmem` index, filtering by time will still return all results
because the index data is shared across shards.

For the `tsi1` index, filtering by time will only work down to the shard
lever. Specifically, when querying by time all shards within that time
range will be used to generate the results.

											
										
										
											2017-11-03 16:53:23 +00:00
+										}
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
 										if is.SeriesFile == nil {
 											is.SeriesFile = shard.sfile
 										}
 										is.Indexes = append(is.Indexes, shard.index)
-												Support WHERE time clause in SHOW TAG VALUES

This commit adds time support to SHOW TAG VALUES. Time can be used as
both a lower and upper boundary. However, there are some caveats.

For the `inmem` index, filtering by time will still return all results
because the index data is shared across shards.

For the `tsi1` index, filtering by time will only work down to the shard
lever. Specifically, when querying by time all shards within that time
range will be used to generate the results.

											
										
										
											2017-11-03 16:53:23 +00:00
+									}
-												Fix measurement system queries.

											
										
										
											2016-12-05 17:51:06 +00:00
+									s.mu.RUnlock()
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									is = is.DedupeInmemIndexes()
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
 									// Stores each list of TagValues for each measurement.
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+									var allResults []tagValues
 									var maxMeasurements int // Hint as to lower bound on number of measurements.
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									// names will be sorted by MeasurementNamesByExpr.
-												Merge master into branch

											
										
										
											2017-12-08 17:11:07 +00:00
+									// Authorisation can be done later on, when series may have been filtered
 									// out by other conditions.
 									names, err := is.MeasurementNamesByExpr(nil, measurementExpr)
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									if err != nil {
 										return nil, err
 									}
 									if len(names) > maxMeasurements {
 										maxMeasurements = len(names)
 									}
 									if allResults == nil {
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+										allResults = make([]tagValues, 0, len(is.Indexes)*len(names)) // Assuming all series in all shards.
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+									}
 									// Iterate over each matching measurement in the shard. For each
 									// measurement we'll get the matching tag keys (e.g., when a WITH KEYS)
 									// statement is used, and we'll then use those to fetch all the relevant
 									// values from matching series. Series may be filtered using a WHERE
 									// filter.
 									for _, name := range names {
 										// Determine a list of keys from condition.
 										keySet, err := is.MeasurementTagKeysByExpr(name, cond)
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+										if err != nil {
 											return nil, err
 										}
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										if len(keySet) == 0 {
 											// No matching tag keys for this measurement
 											continue
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
+										}
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										result := tagValues{
 											name: name,
 											keys: make([]string, 0, len(keySet)),
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
+										}
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										// Add the keys to the tagValues and sort them.
 										for k := range keySet {
 											result.keys = append(result.keys, k)
 										}
 										sort.Sort(sort.StringSlice(result.keys))
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										// get all the tag values for each key in the keyset.
 										// Each slice in the results contains the sorted values associated
 										// associated with each tag key for the measurement from the key set.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+										if result.values, err = is.MeasurementTagKeyValuesByExpr(auth, name, result.keys, filterExpr, true); err != nil {
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+											return nil, err
 										}
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										// remove any tag keys that didn't have any authorized values
 										j := 0
 										for i := range result.keys {
 											if len(result.values[i]) == 0 {
 												continue
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+											}
-												auth: add series auth to 'show tag values'

											
										
										
											2017-09-19 14:38:16 +00:00
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+											result.keys[j] = result.keys[i]
 											result.values[j] = result.values[i]
 											j++
 										}
 										result.keys = result.keys[:j]
 										result.values = result.values[:j]
-												auth: add series auth to 'show tag values'

											
										
										
											2017-09-19 14:38:16 +00:00
-												inmem tests passing.

											
										
										
											2017-12-05 17:49:58 +00:00
+										// only include result if there are keys with values
 										if len(result.keys) > 0 {
 											allResults = append(allResults, result)
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+										}
-												shared in-memory index

											
										
										
											2016-11-16 18:57:55 +00:00
+									}
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+									result := make([]TagValues, 0, maxMeasurements)
 									// We need to sort all results by measurement name.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+									if len(is.Indexes) > 1 {
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+										sort.Sort(tagValuesSlice(allResults))
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
+									}
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+									// The next stage is to merge the tagValue results for each shard's measurements.
 									var i, j int
 									// Used as a temporary buffer in mergeTagValues. There can be at most len(shards)
 									// instances of tagValues for a given measurement.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+									idxBuf := make([][2]int, 0, len(is.Indexes))
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+									for i < len(allResults) {
 										// Gather all occurrences of the same measurement for merging.
 										for j+1 < len(allResults) && bytes.Equal(allResults[j+1].name, allResults[i].name) {
 											j++
 										}
 										// An invariant is that there can't be more than n instances of tag
 										// key value pairs for a given measurement, where n is the number of
 										// shards.
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+										if got, exp := j-i+1, len(is.Indexes); got > exp {
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+											return nil, fmt.Errorf("unexpected results returned engine. Got %d measurement sets for %d shards", got, exp)
 										}
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+										nextResult := mergeTagValues(idxBuf, allResults[i:j+1]...)
 										i = j + 1
 										if len(nextResult.Values) > 0 {
 											result = append(result, nextResult)
-												Fix SHOW TAG VALUES deduplication.

											
										
										
											2017-06-01 21:35:56 +00:00
+										}
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+									}
 									return result, nil
 								}
-												Fix SHOW TAG VALUES deduplication.

											
										
										
											2017-06-01 21:35:56 +00:00
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+								// mergeTagValues merges multiple sorted sets of temporary tagValues using a
 								// direct k-way merge whilst also removing duplicated entries. The result is a
 								// single TagValue type.
 								//
 								// TODO(edd): a Tournament based merge (see: Knuth's TAOCP 5.4.1) might be more
 								// appropriate at some point.
 								//
 								func mergeTagValues(valueIdxs [][2]int, tvs ...tagValues) TagValues {
 									var result TagValues
 									if len(tvs) == 0 {
 										return TagValues{}
 									} else if len(tvs) == 1 {
 										result.Measurement = string(tvs[0].name)
 										// TODO(edd): will be too small likely. Find a hint?
 										result.Values = make([]KeyValue, 0, len(tvs[0].values))
 										for ki, key := range tvs[0].keys {
 											for _, value := range tvs[0].values[ki] {
 												result.Values = append(result.Values, KeyValue{Key: key, Value: value})
 											}
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
+										}
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+										return result
 									}
 									result.Measurement = string(tvs[0].name)
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+									var maxSize int
 									for _, tv := range tvs {
 										if len(tv.values) > maxSize {
 											maxSize = len(tv.values)
 										}
 									}
 									result.Values = make([]KeyValue, 0, maxSize) // This will likely be too small but it's a start.
 									// Resize and reset to the number of TagValues we're merging.
 									valueIdxs = valueIdxs[:len(tvs)]
 									for i := 0; i < len(valueIdxs); i++ {
 										valueIdxs[i][0], valueIdxs[i][1] = 0, 0
 									}
 									var (
 										j              int
 										keyCmp, valCmp int
 									)
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+									for {
 										// Which of the provided TagValue sets currently holds the smallest element.
 										// j is the candidate we're going to next pick for the result set.
 										j = -1
 										// Find the smallest element
 										for i := 0; i < len(tvs); i++ {
 											if valueIdxs[i][0] >= len(tvs[i].keys) {
 												continue // We have completely drained all tag keys and values for this shard.
 											} else if len(tvs[i].values[valueIdxs[i][0]]) == 0 {
 												// There are no tag values for these keys.
 												valueIdxs[i][0]++
 												valueIdxs[i][1] = 0
 												continue
 											} else if j == -1 {
 												// We haven't picked a best TagValues set yet. Pick this one.
 												j = i
 												continue
 											}
 											// It this tag key is lower than the candidate's tag key
 											keyCmp = strings.Compare(tvs[i].keys[valueIdxs[i][0]], tvs[j].keys[valueIdxs[j][0]])
 											if keyCmp == -1 {
 												j = i
 											} else if keyCmp == 0 {
 												valCmp = strings.Compare(tvs[i].values[valueIdxs[i][0]][valueIdxs[i][1]], tvs[j].values[valueIdxs[j][0]][valueIdxs[j][1]])
 												// Same tag key but this tag value is lower than the candidate.
 												if valCmp == -1 {
 													j = i
 												} else if valCmp == 0 {
 													// Duplicate tag key/value pair.... Remove and move onto
 													// the next value for shard i.
 													valueIdxs[i][1]++
 													if valueIdxs[i][1] >= len(tvs[i].values[valueIdxs[i][0]]) {
 														// Drained all these tag values, move onto next key.
 														valueIdxs[i][0]++
 														valueIdxs[i][1] = 0
 													}
-												Add test for TagValues

											
										
										
											2017-07-27 11:48:31 +00:00
+												}
 											}
 										}
-												Use a merge-based approach for TagValues

											
										
										
											2017-08-01 17:26:35 +00:00
+										// We could have drained all of the TagValue sets and be done...
 										if j == -1 {
 											break
 										}
 										// Append the smallest KeyValue
 										result.Values = append(result.Values, KeyValue{
 											Key:   string(tvs[j].keys[valueIdxs[j][0]]),
 											Value: tvs[j].values[valueIdxs[j][0]][valueIdxs[j][1]],
 										})
 										// Increment the indexes for the chosen TagValue.
 										valueIdxs[j][1]++
 										if valueIdxs[j][1] >= len(tvs[j].values[valueIdxs[j][0]]) {
 											// Drained all these tag values, move onto next key.
 											valueIdxs[j][0]++
 											valueIdxs[j][1] = 0
 										}
 									}
 									return result
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+								}
-												Stop background compaction goroutines when shard is cold

Each shard has a number of goroutines for compacting different levels
of TSM files.  When a shard goes cold and is fully compacted, these
goroutines are still running.

This change will stop background shard goroutines when the shard goes
cold and start them back up if new writes arrive.

											
										
										
											2017-05-02 15:20:01 +00:00
+								func (s *Store) monitorShards() {
 									t := time.NewTicker(10 * time.Second)
 									defer t.Stop()
-												Remove per shard monitor goroutine

The monitor goroutine ran for each shard and updated disk stats
as well as logged cardinality warnings.  This goroutine has been
removed by making the disks stats more lightweight and callable
direclty from Statisics and move the logging to the tsdb.Store.  The
latter allows one goroutine to handle all shards.

											
										
										
											2017-05-03 04:42:09 +00:00
+									t2 := time.NewTicker(time.Minute)
 									defer t2.Stop()
-												Stop background compaction goroutines when shard is cold

Each shard has a number of goroutines for compacting different levels
of TSM files.  When a shard goes cold and is fully compacted, these
goroutines are still running.

This change will stop background shard goroutines when the shard goes
cold and start them back up if new writes arrive.

											
										
										
											2017-05-02 15:20:01 +00:00
+									for {
 										select {
 										case <-s.closing:
 											return
 										case <-t.C:
 											s.mu.RLock()
 											for _, sh := range s.shards {
 												if sh.IsIdle() {
-												Release mmap pages when shard is cold

This instructs the kernel that it can release memory used by mmap'd
TSM files when they are not actively being used.  It the mappings are
use, the kernel will fault the pages back in.  On linux, this causes
RES memory to drop immediately when run.

											
										
										
											2017-09-14 18:42:34 +00:00
+													if err := sh.Free(); err != nil {
-												Update logging calls to take advantage of structured logging

Includes a style guide that details the basics of how to log.

											
										
										
											2018-02-15 21:47:08 +00:00
+														s.Logger.Warn("Error while freeing cold shard resources", zap.Error(err))
-												Release mmap pages when shard is cold

This instructs the kernel that it can release memory used by mmap'd
TSM files when they are not actively being used.  It the mappings are
use, the kernel will fault the pages back in.  On linux, this causes
RES memory to drop immediately when run.

											
										
										
											2017-09-14 18:42:34 +00:00
+													}
-												Stop background compaction goroutines when shard is cold

Each shard has a number of goroutines for compacting different levels
of TSM files.  When a shard goes cold and is fully compacted, these
goroutines are still running.

This change will stop background shard goroutines when the shard goes
cold and start them back up if new writes arrive.

											
										
										
											2017-05-02 15:20:01 +00:00
+												} else {
 													sh.SetCompactionsEnabled(true)
 												}
 											}
 											s.mu.RUnlock()
-												Remove per shard monitor goroutine

The monitor goroutine ran for each shard and updated disk stats
as well as logged cardinality warnings.  This goroutine has been
removed by making the disks stats more lightweight and callable
direclty from Statisics and move the logging to the tsdb.Store.  The
latter allows one goroutine to handle all shards.

											
										
										
											2017-05-03 04:42:09 +00:00
+										case <-t2.C:
 											if s.EngineOptions.Config.MaxValuesPerTag == 0 {
 												continue
 											}
 											s.mu.RLock()
 											shards := s.filterShards(func(sh *Shard) bool {
 												return sh.IndexType() == "inmem"
 											})
 											s.mu.RUnlock()
-												Reduce allocations when monitoring shards

When monitoring shards, a slice of measurements is allocated for
each shard.  With many shards and measurements, these allocations
can be large.  Since inmem shards share the same index, we only
need to do this once since the resulting slices are all the same.
This reduces memory usage when monitoring shard cardinality.

											
										
										
											2017-05-08 19:34:40 +00:00
+											// No inmem shards...
 											if len(shards) == 0 {
 												continue
 											}
-												Limit shard cardinality checks to 1 per database

The tag cardinality checks were run for all inmem shards.  Since inmem
shards share the same index, a lot of the work is redundant.  Inmem shards
also need to sort their measurmenet and tag keys which can be CPU intensive
with many shards or higher cardinality.

This changes the monitoring to just check one shard in each database which
should lower CPU usage due to excessive sorting.  The longer term solution
is to use TSI which would not have this check or required sorting.

											
										
										
											2017-08-15 18:17:18 +00:00
+											var dbLock sync.Mutex
 											databases := make(map[string]struct{}, len(shards))
-												Reduce allocations when monitoring shards

When monitoring shards, a slice of measurements is allocated for
each shard.  With many shards and measurements, these allocations
can be large.  Since inmem shards share the same index, we only
need to do this once since the resulting slices are all the same.
This reduces memory usage when monitoring shard cardinality.

											
										
										
											2017-05-08 19:34:40 +00:00
-												Remove per shard monitor goroutine

The monitor goroutine ran for each shard and updated disk stats
as well as logged cardinality warnings.  This goroutine has been
removed by making the disks stats more lightweight and callable
direclty from Statisics and move the logging to the tsdb.Store.  The
latter allows one goroutine to handle all shards.

											
										
										
											2017-05-03 04:42:09 +00:00
+											s.walkShards(shards, func(sh *Shard) error {
 												db := sh.database
-												Limit shard cardinality checks to 1 per database

The tag cardinality checks were run for all inmem shards.  Since inmem
shards share the same index, a lot of the work is redundant.  Inmem shards
also need to sort their measurmenet and tag keys which can be CPU intensive
with many shards or higher cardinality.

This changes the monitoring to just check one shard in each database which
should lower CPU usage due to excessive sorting.  The longer term solution
is to use TSI which would not have this check or required sorting.

											
										
										
											2017-08-15 18:17:18 +00:00
 												// Only process 1 shard from each database
 												dbLock.Lock()
 												if _, ok := databases[db]; ok {
 													dbLock.Unlock()
 													return nil
 												}
 												databases[db] = struct{}{}
 												dbLock.Unlock()
-												Fix series file removal after DROP DATABASE.

											
										
										
											2018-01-08 18:34:04 +00:00
+												sfile := s.seriesFile(sh.database)
 												if sfile == nil {
 													return nil
-												Add 32-bit support to series file

This commit ensures that the series file should work appropriately on
32-bit architecturs. It does this by reducing the maximum size of a
series file to 512MB on 32-bit systems, which should be fully
addressable.

It further updates tests so that the series file size can be reduced
further when running many tests in parallel on 32-bit architectures.

											
										
										
											2017-12-15 14:24:26 +00:00
+												}
-												Fix shard races when accessing index

											
										
										
											2017-12-15 17:54:58 +00:00
+												firstShardIndex, err := sh.Index()
 												if err != nil {
 													return err
 												}
 												index, err := sh.Index()
 												if err != nil {
 													return err
 												}
-												Limit shard cardinality checks to 1 per database

The tag cardinality checks were run for all inmem shards.  Since inmem
shards share the same index, a lot of the work is redundant.  Inmem shards
also need to sort their measurmenet and tag keys which can be CPU intensive
with many shards or higher cardinality.

This changes the monitoring to just check one shard in each database which
should lower CPU usage due to excessive sorting.  The longer term solution
is to use TSI which would not have this check or required sorting.

											
										
										
											2017-08-15 18:17:18 +00:00
+												// inmem shards share the same index instance so just use the first one to avoid
 												// allocating the same measurements repeatedly
-												Fix shard races when accessing index

											
										
										
											2017-12-15 17:54:58 +00:00
+												indexSet := IndexSet{Indexes: []Index{firstShardIndex}, SeriesFile: sfile}
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+												names, err := indexSet.MeasurementNamesByExpr(nil, nil)
-												Limit shard cardinality checks to 1 per database

The tag cardinality checks were run for all inmem shards.  Since inmem
shards share the same index, a lot of the work is redundant.  Inmem shards
also need to sort their measurmenet and tag keys which can be CPU intensive
with many shards or higher cardinality.

This changes the monitoring to just check one shard in each database which
should lower CPU usage due to excessive sorting.  The longer term solution
is to use TSI which would not have this check or required sorting.

											
										
										
											2017-08-15 18:17:18 +00:00
+												if err != nil {
-												Update logging calls to take advantage of structured logging

Includes a style guide that details the basics of how to log.

											
										
										
											2018-02-15 21:47:08 +00:00
+													s.Logger.Warn("Cannot retrieve measurement names", zap.Error(err))
-												Limit shard cardinality checks to 1 per database

The tag cardinality checks were run for all inmem shards.  Since inmem
shards share the same index, a lot of the work is redundant.  Inmem shards
also need to sort their measurmenet and tag keys which can be CPU intensive
with many shards or higher cardinality.

This changes the monitoring to just check one shard in each database which
should lower CPU usage due to excessive sorting.  The longer term solution
is to use TSI which would not have this check or required sorting.

											
										
										
											2017-08-15 18:17:18 +00:00
+													return nil
 												}
-												Remove per shard monitor goroutine

The monitor goroutine ran for each shard and updated disk stats
as well as logged cardinality warnings.  This goroutine has been
removed by making the disks stats more lightweight and callable
direclty from Statisics and move the logging to the tsdb.Store.  The
latter allows one goroutine to handle all shards.

											
										
										
											2017-05-03 04:42:09 +00:00
-												Fix shard races when accessing index

											
										
										
											2017-12-15 17:54:58 +00:00
+												indexSet.Indexes = []Index{index}
-												Remove per shard monitor goroutine

The monitor goroutine ran for each shard and updated disk stats
as well as logged cardinality warnings.  This goroutine has been
removed by making the disks stats more lightweight and callable
direclty from Statisics and move the logging to the tsdb.Store.  The
latter allows one goroutine to handle all shards.

											
										
										
											2017-05-03 04:42:09 +00:00
+												for _, name := range names {
-												Fix auth tests

											
										
										
											2017-12-12 21:22:42 +00:00
+													indexSet.ForEachMeasurementTagKey(name, func(k []byte) error {
-												Remove per shard monitor goroutine

The monitor goroutine ran for each shard and updated disk stats
as well as logged cardinality warnings.  This goroutine has been
removed by making the disks stats more lightweight and callable
direclty from Statisics and move the logging to the tsdb.Store.  The
latter allows one goroutine to handle all shards.

											
										
										
											2017-05-03 04:42:09 +00:00
+														n := sh.TagKeyCardinality(name, k)
 														perc := int(float64(n) / float64(s.EngineOptions.Config.MaxValuesPerTag) * 100)
 														if perc > 100 {
 															perc = 100
 														}
 														// Log at 80, 85, 90-100% levels
 														if perc == 80 || perc == 85 || perc >= 90 {
-												Update logging calls to take advantage of structured logging

Includes a style guide that details the basics of how to log.

											
										
										
											2018-02-15 21:47:08 +00:00
+															s.Logger.Warn("max-values-per-tag limit may be exceeded soon",
 																zap.String("perc", fmt.Sprintf("%d%%", perc)),
 																zap.Int("n", n),
 																zap.Int("max", s.EngineOptions.Config.MaxValuesPerTag),
-												Generate trace logs for a number of significant influx operations

* tsdb Store.Open traces all events related to opening files
    * op.name : tsdb.open
* retention policy shard deletions
    * op.name : retention.delete_check
* all TSM compaction strategies
    * op.name : tsm1.compact_group
* series file compactions
    * op.name : series_partition.compaction
* continuous query execution (if logging enabled)
    * op.name : continuous_querier.execute
* TSI log file compaction
    * op_name: index.tsi.compact_log_file
* TSI level compaction
    * op.name: index.tsi.compact_to_level

											
										
										
											2018-02-21 20:08:44 +00:00
+																logger.Database(db),
-												Update logging calls to take advantage of structured logging

Includes a style guide that details the basics of how to log.

											
										
										
											2018-02-15 21:47:08 +00:00
+																zap.ByteString("measurement", name),
 																zap.ByteString("tag", k))
-												Remove per shard monitor goroutine

The monitor goroutine ran for each shard and updated disk stats
as well as logged cardinality warnings.  This goroutine has been
removed by making the disks stats more lightweight and callable
direclty from Statisics and move the logging to the tsdb.Store.  The
latter allows one goroutine to handle all shards.

											
										
										
											2017-05-03 04:42:09 +00:00
+														}
 														return nil
 													})
 												}
 												return nil
 											})
-												Stop background compaction goroutines when shard is cold

Each shard has a number of goroutines for compacting different levels
of TSM files.  When a shard goes cold and is fully compacted, these
goroutines are still running.

This change will stop background shard goroutines when the shard goes
cold and start them back up if new writes arrive.

											
										
										
											2017-05-02 15:20:01 +00:00
+										}
 									}
 								}
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+								// KeyValue holds a string key and a string value.
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+								type KeyValue struct {
 									Key, Value string
 								}
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+								// KeyValues is a sortable slice of KeyValue.
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+								type KeyValues []KeyValue
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
+								// Len implements sort.Interface.
 								func (a KeyValues) Len() int { return len(a) }
 								// Swap implements sort.Interface.
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+								func (a KeyValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-												Update godoc for the tsdb package

											
										
										
											2016-12-31 05:12:37 +00:00
 								// Less implements sort.Interface. Keys are compared before values.
-												Switch SHOW MEASUREMENTS and SHOW TAG VALUES to directly access the tsdb.Store

The `SHOW MEASUREMENTS` and `SHOW TAG VALUES` cannot go through the
query engine to get the speed they need. They also only need access to
the database index and do not need access to specific shards. This
removes the query rewriting that was done to turn these two queries into
a select statement and reimplements them inside of the coordinator as an
interface on the TSDBStore.

											
										
										
											2016-07-28 22:38:08 +00:00
+								func (a KeyValues) Less(i, j int) bool {
 									ki, kj := a[i].Key, a[j].Key
 									if ki == kj {
 										return a[i].Value < a[j].Value
 									}
 									return ki < kj
 								}
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
+								// decodeStorePath extracts the database and retention policy names
-												Tag TSM stats with database, retention policy

... by extracting the db/rp from the given path.

Now that the code has "standardized" on extracting db/rp this way, the
ShardLocation struct is no longer necessary and thus has been removed.
We're back on the previous style of passing the path and walPath to
NewShard.

											
										
										
											2016-02-26 19:41:54 +00:00
+								// from a given shard or WAL path.
-												Refactor

											
										
										
											2016-09-14 13:55:44 +00:00
+								func decodeStorePath(shardOrWALPath string) (database, retentionPolicy string) {
-												Tag TSM stats with database, retention policy

... by extracting the db/rp from the given path.

Now that the code has "standardized" on extracting db/rp this way, the
ShardLocation struct is no longer necessary and thus has been removed.
We're back on the previous style of passing the path and walPath to
NewShard.

											
										
										
											2016-02-26 19:41:54 +00:00
+									// shardOrWALPath format: /maybe/absolute/base/then/:database/:retentionPolicy/:nameOfShardOrWAL
 									// Discard the last part of the path (the shard name or the wal name).
 									path, _ := filepath.Split(filepath.Clean(shardOrWALPath))
 									// Extract the database and retention policy.
 									path, rp := filepath.Split(filepath.Clean(path))
 									_, db := filepath.Split(filepath.Clean(path))
 									return db, rp
 								}
-												Implement backup/restore for TSM.

This changes backup and restore to work for TSM. It breaks it for b1 and bz1, but since those are getting removed it's ok.

The backup runs against any host that is specified and can backup either the metasstore, a database, specific retention policy, or a specific shard. It can also take incremental backups with the `since` flag, which will only backup TSM files that have been created since that timestamp.

The backup is safe to run online. However, for shards that are still hot for writes, they won't be able to create new TSM files while the backup for that single shard runs. If the backup isn't too large and the write throughput isn't too high this shouldn't be a problem since the writes will just go into the WAL cache.

											
										
										
											2015-12-25 13:23:22 +00:00
+								// relativePath will expand out the full paths passed in and return
 								// the relative shard path from the store
 								func relativePath(storePath, shardPath string) (string, error) {
 									path, err := filepath.Abs(storePath)
 									if err != nil {
 										return "", fmt.Errorf("store abs path: %s", err)
 									}
 									fp, err := filepath.Abs(shardPath)
 									if err != nil {
 										return "", fmt.Errorf("file abs path: %s", err)
 									}
 									name, err := filepath.Rel(path, fp)
 									if err != nil {
 										return "", fmt.Errorf("file rel path: %s", err)
 									}
 									return name, nil
 								}
-												Mark series deleted in series file

This commit adds the ability to correctly mark a series as deleted in
the global series file. Whenever a shard engine determines that a series
should be deleted, it checks with each shard's bitset for series that
are to be deleted and are no longer contained in any shard-local
bitsets.

These series are then removed from the series file.

											
										
										
											2018-01-10 23:37:18 +00:00
 								type shardSet struct {
 									store *Store
 									db    string
 								}
 								func (s shardSet) ForEach(f func(ids *SeriesIDSet)) error {
 									s.store.mu.RLock()
 									shards := s.store.filterShards(byDatabase(s.db))
 									s.store.mu.RUnlock()
 									for _, sh := range shards {
 										idx, err := sh.Index()
 										if err != nil {
 											return err
 										}
-												Add SeriesIDSet() to Index interface

											
										
										
											2018-05-15 21:57:37 +00:00
+										f(idx.SeriesIDSet())
-												Mark series deleted in series file

This commit adds the ability to correctly mark a series as deleted in
the global series file. Whenever a shard engine determines that a series
should be deleted, it checks with each shard's bitset for series that
are to be deleted and are no longer contained in any shard-local
bitsets.

These series are then removed from the series file.

											
										
										
											2018-01-10 23:37:18 +00:00
+									}
 									return nil
 								}