influxdb/tsdb/tsm1/engine_delete_prefix.go

package tsm1

import (
	"bytes"
	"math"
	"sync"

	"github.com/influxdata/influxdb/models"
	"github.com/influxdata/influxdb/pkg/bytesutil"
	"github.com/influxdata/influxdb/tsdb"
	"github.com/influxdata/influxql"
)

// Predicate is something that can match on a series key. It also exports some other
// methods that can be used in order to more efficiently walk indexes.
type Predicate interface {
	Matches(key []byte) bool
	Measurement() []byte // if non-nil, specifies a specific measurement to match
}

// DeletePrefixRange removes all TSM data belonging to a bucket, and removes all index
// and series file data associated with the bucket. The provided time range ensures
// that only bucket data for that range is removed.
func (e *Engine) DeletePrefixRange(name []byte, min, max int64, pred Predicate) error {
	// TODO(jeff): we need to block writes to this prefix while deletes are in progress
	// otherwise we can end up in a situation where we have staged data in the cache or
	// WAL that was deleted from the index, or worse. This needs to happen at a higher
	// layer.

	// TODO(jeff): ensure the engine is not closed while we're running this. At least
	// now we know that the series file or index won't be closed out from underneath
	// of us.

	// Ensure that the index does not compact away the measurement or series we're
	// going to delete before we're done with them.
	e.index.DisableCompactions()
	defer e.index.EnableCompactions()
	e.index.Wait()

	// Disable and abort running compactions so that tombstones added existing tsm
	// files don't get removed. This would cause deleted measurements/series to
	// re-appear once the compaction completed. We only disable the level compactions
	// so that snapshotting does not stop while writing out tombstones. If it is stopped,
	// and writing tombstones takes a long time, writes can get rejected due to the cache
	// filling up.
	e.disableLevelCompactions(true)
	defer e.enableLevelCompactions(true)

	e.sfile.DisableCompactions()
	defer e.sfile.EnableCompactions()

	// TODO(jeff): are the query language values still a thing?
	// Min and max time in the engine are slightly different from the query language values.
	if min == influxql.MinTime {
		min = math.MinInt64
	}
	if max == influxql.MaxTime {
		max = math.MaxInt64
	}

	// Run the delete on each TSM file in parallel and keep track of possibly dead keys.

	// TODO(jeff): keep a set of keys for each file to avoid contention.
	// TODO(jeff): come up with a better way to figure out what keys we need to delete
	// from the index.

	var possiblyDead struct {
		sync.RWMutex
		keys map[string]struct{}
	}
	possiblyDead.keys = make(map[string]struct{})

	if err := e.FileStore.Apply(func(r TSMFile) error {
		return r.DeletePrefix(name, min, max, func(key []byte) {
			possiblyDead.Lock()
			possiblyDead.keys[string(key)] = struct{}{}
			possiblyDead.Unlock()
		})
	}); err != nil {
		return err
	}

	var deleteKeys [][]byte

	// ApplySerialEntryFn cannot return an error in this invocation.
	_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {
		if bytes.HasPrefix(k, name) {
			if deleteKeys == nil {
				deleteKeys = make([][]byte, 0, 10000)
			}
			deleteKeys = append(deleteKeys, k)

			// we have to double check every key in the cache because maybe
			// it exists in the index but not yet on disk.
			possiblyDead.keys[string(k)] = struct{}{}
		}
		return nil
	})

	// Sort the series keys because ApplyEntryFn iterates over the keys randomly.
	bytesutil.Sort(deleteKeys)

	// Delete from the cache.
	e.Cache.DeleteBucketRange(name, min, max)

	// Now that all of the data is purged, we need to find if some keys are fully deleted
	// and if so, remove them from the index.
	if err := e.FileStore.Apply(func(r TSMFile) error {
		possiblyDead.RLock()
		defer possiblyDead.RUnlock()

		iter := r.Iterator(name)
		for i := 0; iter.Next(); i++ {
			key := iter.Key()
			if !bytes.HasPrefix(key, name) {
				break
			}

			// TODO(jeff): benchmark the locking here.
			if i%1024 == 0 { // allow writes to proceed.
				possiblyDead.RUnlock()
				possiblyDead.RLock()
			}

			if _, ok := possiblyDead.keys[string(key)]; ok {
				possiblyDead.RUnlock()
				possiblyDead.Lock()
				delete(possiblyDead.keys, string(key))
				possiblyDead.Unlock()
				possiblyDead.RLock()
			}
		}

		return iter.Err()
	}); err != nil {
		return err
	}

	// ApplySerialEntryFn cannot return an error in this invocation.
	_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {
		if bytes.HasPrefix(k, name) {
			delete(possiblyDead.keys, string(k))
		}
		return nil
	})

	if len(possiblyDead.keys) > 0 {
		buf := make([]byte, 1024)

		// TODO(jeff): all of these methods have possible errors which opens us to partial
		// failure scenarios. we need to either ensure that partial errors here are ok or
		// do something to fix it.
		// TODO(jeff): it's also important that all of the deletes happen atomically with
		// the deletes of the data in the tsm files.

		// In this case the entire measurement (bucket) can be removed from the index.
		if min == math.MinInt64 && max == math.MaxInt64 {
			// The TSI index and Series File do not store series data in escaped form.
			name = models.UnescapeMeasurement(name)

			// Build up a set of series IDs that we need to remove from the series file.
			set := tsdb.NewSeriesIDSet()
			itr, err := e.index.MeasurementSeriesIDIterator(name)
			if err != nil {
				return err
			}

			var elem tsdb.SeriesIDElem
			for elem, err = itr.Next(); err != nil; elem, err = itr.Next() {
				if elem.SeriesID.IsZero() {
					break
				}

				set.AddNoLock(elem.SeriesID)
			}

			if err != nil {
				return err
			} else if err := itr.Close(); err != nil {
				return err
			}

			// Remove the measurement from the index before the series file.
			if err := e.index.DropMeasurement(name); err != nil {
				return err
			}

			// Iterate over the series ids we previously extracted from the index
			// and remove from the series file.
			set.ForEachNoLock(func(id tsdb.SeriesID) {
				if err = e.sfile.DeleteSeriesID(id); err != nil {
					return
				}
			})
			return err
		}

		// This is the slow path, when not dropping the entire bucket (measurement)
		for key := range possiblyDead.keys {
			// TODO(jeff): ugh reduce copies here
			keyb := []byte(key)
			keyb, _ = SeriesAndFieldFromCompositeKey(keyb)

			name, tags := models.ParseKeyBytes(keyb)
			sid := e.sfile.SeriesID(name, tags, buf)
			if sid.IsZero() {
				continue
			}

			if err := e.index.DropSeries(sid, keyb, true); err != nil {
				return err
			}

			if err := e.sfile.DeleteSeriesID(sid); err != nil {
				return err
			}
		}
	}

	return nil
}
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`package tsm1`

			`import (`
			`"bytes"`
			`"math"`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`"sync"`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00
chore: rename imports from platform to influxdb I did this with a dumb editor macro, so some comments changed too. Also rename root package from platform to influxdb. In interest of minimizing risk, anyone importing the root package has now aliased it to "platform" so that no changes beyond imports were necessary in those files. Lastly, replace the old platform module to local path /dev/null so that nobody can accidentally reintroduce a platform dependency while migrating platform code to influxdb. 2019-01-08 00:37:16 +00:00			`"github.com/influxdata/influxdb/models"`
			`"github.com/influxdata/influxdb/pkg/bytesutil"`
Track deleted series ids to remove from series file Previously series that were being removed were tracked at the key level. This means that when removing them from the series file, the series id first had to be looked up. This can cause lock thrashing when there are many series ids to look up (such as with a bulk delete), because there are no bulk methods to do this. This commit changes how the series file delete is done by extracting the series ids from the index before we remove the index entries. It's then possible to delete all those series ids from the series file without having to lookup the ids. 2019-01-14 10:56:43 +00:00			`"github.com/influxdata/influxdb/tsdb"`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`"github.com/influxdata/influxql"`
			`)`

tsm1: rename engine method to DeletePrefixRange The storage/engine knows about buckets, but the tsm1/engine doesn't, so name the tsm1/engine method Prefix and keep the storage/engine named Bucket. 2019-04-11 04:53:06 +00:00			`// Predicate is something that can match on a series key. It also exports some other`
			`// methods that can be used in order to more efficiently walk indexes.`
			`type Predicate interface {`
			`Matches(key []byte) bool`
			`Measurement() []byte // if non-nil, specifies a specific measurement to match`
			`}`

			`// DeletePrefixRange removes all TSM data belonging to a bucket, and removes all index`
Improve efficiency of TSI index series drop This commit improves the performance of a mass delete on the TSI index by deleting at the measurement level instead of deleting each series individually. 2019-01-11 19:28:46 +00:00			`// and series file data associated with the bucket. The provided time range ensures`
			`// that only bucket data for that range is removed.`
tsm1: rename engine method to DeletePrefixRange The storage/engine knows about buckets, but the tsm1/engine doesn't, so name the tsm1/engine method Prefix and keep the storage/engine named Bucket. 2019-04-11 04:53:06 +00:00			`func (e *Engine) DeletePrefixRange(name []byte, min, max int64, pred Predicate) error {`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`// TODO(jeff): we need to block writes to this prefix while deletes are in progress`
			`// otherwise we can end up in a situation where we have staged data in the cache or`
			`// WAL that was deleted from the index, or worse. This needs to happen at a higher`
			`// layer.`

storage: fix problems with keeping resources alive This commit adds the pkg/lifecycle.Resource to help manage opening, closing, and leasing out references to some resource. A resource cannot be closed until all acquired references have been released. If the debug_ref tag is enabled, all resource acquisitions keep track of the stack trace that created them and have a finalizer associated with them to print on stderr if they are leaked. It also registers a handler on SIGUSR2 to dump all of the currently live resources. Having resources tracked in a uniform way with a data type allows us to do more sophisticated tracking with the debug_ref tag, as well. For example, we could panic the process if a resource cannot be closed within a certain time frame, or attempt to figure out the DAG of resource ownership dynamically. This commit also fixes many issues around resources, correctness during error scenarios, reporting of errors, idempotency of close, tracking of memory for some data structures, resource leaks in tests, and out of order dependency closes in tests. 2019-02-25 23:51:08 +00:00			`// TODO(jeff): ensure the engine is not closed while we're running this. At least`
			`// now we know that the series file or index won't be closed out from underneath`
			`// of us.`

tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`// Ensure that the index does not compact away the measurement or series we're`
			`// going to delete before we're done with them.`
			`e.index.DisableCompactions()`
			`defer e.index.EnableCompactions()`
			`e.index.Wait()`

Hook up DeleteBucket to the tsm1 engine 2019-01-09 22:24:26 +00:00			`// Disable and abort running compactions so that tombstones added existing tsm`
			`// files don't get removed. This would cause deleted measurements/series to`
			`// re-appear once the compaction completed. We only disable the level compactions`
			`// so that snapshotting does not stop while writing out tombstones. If it is stopped,`
			`// and writing tombstones takes a long time, writes can get rejected due to the cache`
			`// filling up.`
			`e.disableLevelCompactions(true)`
			`defer e.enableLevelCompactions(true)`

			`e.sfile.DisableCompactions()`
			`defer e.sfile.EnableCompactions()`

tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`// TODO(jeff): are the query language values still a thing?`
			`// Min and max time in the engine are slightly different from the query language values.`
			`if min == influxql.MinTime {`
			`min = math.MinInt64`
			`}`
			`if max == influxql.MaxTime {`
			`max = math.MaxInt64`
			`}`

tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`// Run the delete on each TSM file in parallel and keep track of possibly dead keys.`

			`// TODO(jeff): keep a set of keys for each file to avoid contention.`
			`// TODO(jeff): come up with a better way to figure out what keys we need to delete`
			`// from the index.`

			`var possiblyDead struct {`
			`sync.RWMutex`
			`keys map[string]struct{}`
			`}`
			`possiblyDead.keys = make(map[string]struct{})`

tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`if err := e.FileStore.Apply(func(r TSMFile) error {`
Improve efficiency of TSI index series drop This commit improves the performance of a mass delete on the TSI index by deleting at the measurement level instead of deleting each series individually. 2019-01-11 19:28:46 +00:00			`return r.DeletePrefix(name, min, max, func(key []byte) {`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`possiblyDead.Lock()`
			`possiblyDead.keys[string(key)] = struct{}{}`
			`possiblyDead.Unlock()`
			`})`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`}); err != nil {`
			`return err`
			`}`

			`var deleteKeys [][]byte`

			`// ApplySerialEntryFn cannot return an error in this invocation.`
			`_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {`
Improve efficiency of TSI index series drop This commit improves the performance of a mass delete on the TSI index by deleting at the measurement level instead of deleting each series individually. 2019-01-11 19:28:46 +00:00			`if bytes.HasPrefix(k, name) {`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`if deleteKeys == nil {`
			`deleteKeys = make([][]byte, 0, 10000)`
			`}`
			`deleteKeys = append(deleteKeys, k)`
Hook up DeleteBucket to the tsm1 engine 2019-01-09 22:24:26 +00:00
			`// we have to double check every key in the cache because maybe`
			`// it exists in the index but not yet on disk.`
			`possiblyDead.keys[string(k)] = struct{}{}`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00			`}`
			`return nil`
			`})`

			`// Sort the series keys because ApplyEntryFn iterates over the keys randomly.`
			`bytesutil.Sort(deleteKeys)`

tsm1: implement DeleteBucketRange for the Cache 2019-01-22 00:23:58 +00:00			`// Delete from the cache.`
			`e.Cache.DeleteBucketRange(name, min, max)`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`// Now that all of the data is purged, we need to find if some keys are fully deleted`
			`// and if so, remove them from the index.`
			`if err := e.FileStore.Apply(func(r TSMFile) error {`
			`possiblyDead.RLock()`
			`defer possiblyDead.RUnlock()`

Improve efficiency of TSI index series drop This commit improves the performance of a mass delete on the TSI index by deleting at the measurement level instead of deleting each series individually. 2019-01-11 19:28:46 +00:00			`iter := r.Iterator(name)`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`for i := 0; iter.Next(); i++ {`
			`key := iter.Key()`
Improve efficiency of TSI index series drop This commit improves the performance of a mass delete on the TSI index by deleting at the measurement level instead of deleting each series individually. 2019-01-11 19:28:46 +00:00			`if !bytes.HasPrefix(key, name) {`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`break`
			`}`

			`// TODO(jeff): benchmark the locking here.`
			`if i%1024 == 0 { // allow writes to proceed.`
			`possiblyDead.RUnlock()`
			`possiblyDead.RLock()`
			`}`

			`if _, ok := possiblyDead.keys[string(key)]; ok {`
			`possiblyDead.RUnlock()`
			`possiblyDead.Lock()`
			`delete(possiblyDead.keys, string(key))`
			`possiblyDead.Unlock()`
			`possiblyDead.RLock()`
			`}`
			`}`

			`return iter.Err()`
			`}); err != nil {`
			`return err`
			`}`

storage: double check the cache to avoid deleting keys that still exist 2019-02-04 17:16:38 +00:00			`// ApplySerialEntryFn cannot return an error in this invocation.`
			`_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {`
			`if bytes.HasPrefix(k, name) {`
			`delete(possiblyDead.keys, string(k))`
			`}`
			`return nil`
			`})`

tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`if len(possiblyDead.keys) > 0 {`
			`buf := make([]byte, 1024)`

			`// TODO(jeff): all of these methods have possible errors which opens us to partial`
			`// failure scenarios. we need to either ensure that partial errors here are ok or`
			`// do something to fix it.`
			`// TODO(jeff): it's also important that all of the deletes happen atomically with`
			`// the deletes of the data in the tsm files.`

Track deleted series ids to remove from series file Previously series that were being removed were tracked at the key level. This means that when removing them from the series file, the series id first had to be looked up. This can cause lock thrashing when there are many series ids to look up (such as with a bulk delete), because there are no bulk methods to do this. This commit changes how the series file delete is done by extracting the series ids from the index before we remove the index entries. It's then possible to delete all those series ids from the series file without having to lookup the ids. 2019-01-14 10:56:43 +00:00			`// In this case the entire measurement (bucket) can be removed from the index.`
			`if min == math.MinInt64 && max == math.MaxInt64 {`
Fix bucket delete for all buckets If a bucket had bytes in it that would be escaped by the models parser/package, then the index would not be correctly purged of those series data when the bucket was dropped. 2019-01-18 17:28:58 +00:00			`// The TSI index and Series File do not store series data in escaped form.`
			`name = models.UnescapeMeasurement(name)`

Track deleted series ids to remove from series file Previously series that were being removed were tracked at the key level. This means that when removing them from the series file, the series id first had to be looked up. This can cause lock thrashing when there are many series ids to look up (such as with a bulk delete), because there are no bulk methods to do this. This commit changes how the series file delete is done by extracting the series ids from the index before we remove the index entries. It's then possible to delete all those series ids from the series file without having to lookup the ids. 2019-01-14 10:56:43 +00:00			`// Build up a set of series IDs that we need to remove from the series file.`
			`set := tsdb.NewSeriesIDSet()`
			`itr, err := e.index.MeasurementSeriesIDIterator(name)`
			`if err != nil {`
			`return err`
			`}`

			`var elem tsdb.SeriesIDElem`
			`for elem, err = itr.Next(); err != nil; elem, err = itr.Next() {`
			`if elem.SeriesID.IsZero() {`
			`break`
			`}`

			`set.AddNoLock(elem.SeriesID)`
			`}`

			`if err != nil {`
			`return err`
			`} else if err := itr.Close(); err != nil {`
			`return err`
			`}`

			`// Remove the measurement from the index before the series file.`
			`if err := e.index.DropMeasurement(name); err != nil {`
			`return err`
			`}`

			`// Iterate over the series ids we previously extracted from the index`
			`// and remove from the series file.`
			`set.ForEachNoLock(func(id tsdb.SeriesID) {`
			`if err = e.sfile.DeleteSeriesID(id); err != nil {`
			`return`
			`}`
			`})`
			`return err`
			`}`

			`// This is the slow path, when not dropping the entire bucket (measurement)`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`for key := range possiblyDead.keys {`
			`// TODO(jeff): ugh reduce copies here`
			`keyb := []byte(key)`
tsm1: add test for engine DeletePrefix 2019-01-09 17:56:10 +00:00			`keyb, _ = SeriesAndFieldFromCompositeKey(keyb)`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00
			`name, tags := models.ParseKeyBytes(keyb)`
			`sid := e.sfile.SeriesID(name, tags, buf)`
			`if sid.IsZero() {`
			`continue`
			`}`

Track deleted series ids to remove from series file Previously series that were being removed were tracked at the key level. This means that when removing them from the series file, the series id first had to be looked up. This can cause lock thrashing when there are many series ids to look up (such as with a bulk delete), because there are no bulk methods to do this. This commit changes how the series file delete is done by extracting the series ids from the index before we remove the index entries. It's then possible to delete all those series ids from the series file without having to lookup the ids. 2019-01-14 10:56:43 +00:00			`if err := e.index.DropSeries(sid, keyb, true); err != nil {`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`return err`
			`}`
Improve efficiency of TSI index series drop This commit improves the performance of a mass delete on the TSI index by deleting at the measurement level instead of deleting each series individually. 2019-01-11 19:28:46 +00:00
Track deleted series ids to remove from series file Previously series that were being removed were tracked at the key level. This means that when removing them from the series file, the series id first had to be looked up. This can cause lock thrashing when there are many series ids to look up (such as with a bulk delete), because there are no bulk methods to do this. This commit changes how the series file delete is done by extracting the series ids from the index before we remove the index entries. It's then possible to delete all those series ids from the series file without having to lookup the ids. 2019-01-14 10:56:43 +00:00			`if err := e.sfile.DeleteSeriesID(sid); err != nil {`
Improve efficiency of TSI index series drop This commit improves the performance of a mass delete on the TSI index by deleting at the measurement level instead of deleting each series individually. 2019-01-11 19:28:46 +00:00			`return err`
			`}`
			`}`
tsm1: add initial index cleanup to DeletePrefix 2019-01-08 23:32:43 +00:00			`}`
tsm1: initial DeletePrefix impl 2019-01-08 23:03:34 +00:00
			`return nil`
			`}`