influxdb/tsdb/tsi1/file_set.go

599 lines
15 KiB
Go

package tsi1
import (
"bytes"
"fmt"
"regexp"
"sync"
"unsafe"
"github.com/influxdata/influxql"
"github.com/influxdata/platform/pkg/estimator"
"github.com/influxdata/platform/pkg/estimator/hll"
"github.com/influxdata/platform/tsdb"
)
// FileSet represents a collection of files.
type FileSet struct {
levels []CompactionLevel
sfile *tsdb.SeriesFile
files []File
manifestSize int64 // Size of the manifest file in bytes.
}
// NewFileSet returns a new instance of FileSet.
func NewFileSet(levels []CompactionLevel, sfile *tsdb.SeriesFile, files []File) (*FileSet, error) {
return &FileSet{
levels: levels,
sfile: sfile,
files: files,
}, nil
}
// bytes estimates the memory footprint of this FileSet, in bytes.
func (fs *FileSet) bytes() int {
var b int
for _, level := range fs.levels {
b += int(unsafe.Sizeof(level))
}
// Do not count SeriesFile because it belongs to the code that constructed this FileSet.
for _, file := range fs.files {
b += file.bytes()
}
b += int(unsafe.Sizeof(*fs))
return b
}
// Close closes all the files in the file set.
func (fs FileSet) Close() error {
var err error
for _, f := range fs.files {
if e := f.Close(); e != nil && err == nil {
err = e
}
}
return err
}
// Retain adds a reference count to all files.
func (fs *FileSet) Retain() {
for _, f := range fs.files {
f.Retain()
}
}
// Release removes a reference count from all files.
func (fs *FileSet) Release() {
for _, f := range fs.files {
f.Release()
}
}
// SeriesFile returns the attached series file.
func (fs *FileSet) SeriesFile() *tsdb.SeriesFile { return fs.sfile }
// PrependLogFile returns a new file set with f added at the beginning.
// Filters do not need to be rebuilt because log files have no bloom filter.
func (fs *FileSet) PrependLogFile(f *LogFile) *FileSet {
return &FileSet{
levels: fs.levels,
sfile: fs.sfile,
files: append([]File{f}, fs.files...),
}
}
// Size returns the on-disk size of the FileSet.
func (fs *FileSet) Size() int64 {
var total int64
for _, f := range fs.files {
total += f.Size()
}
return total + int64(fs.manifestSize)
}
// MustReplace swaps a list of files for a single file and returns a new file set.
// The caller should always guarantee that the files exist and are contiguous.
func (fs *FileSet) MustReplace(oldFiles []File, newFile File) *FileSet {
assert(len(oldFiles) > 0, "cannot replace empty files")
// Find index of first old file.
var i int
for ; i < len(fs.files); i++ {
if fs.files[i] == oldFiles[0] {
break
} else if i == len(fs.files)-1 {
panic("first replacement file not found")
}
}
// Ensure all old files are contiguous.
for j := range oldFiles {
if fs.files[i+j] != oldFiles[j] {
panic(fmt.Sprintf("cannot replace non-contiguous files: subset=%+v, fileset=%+v", Files(oldFiles).IDs(), Files(fs.files).IDs()))
}
}
// Copy to new fileset.
other := make([]File, len(fs.files)-len(oldFiles)+1)
copy(other[:i], fs.files[:i])
other[i] = newFile
copy(other[i+1:], fs.files[i+len(oldFiles):])
// Build new fileset and rebuild changed filters.
return &FileSet{
levels: fs.levels,
files: other,
}
}
// MaxID returns the highest file identifier.
func (fs *FileSet) MaxID() int {
var max int
for _, f := range fs.files {
if i := f.ID(); i > max {
max = i
}
}
return max
}
// Files returns all files in the set.
func (fs *FileSet) Files() []File {
return fs.files
}
// LogFiles returns all log files from the file set.
func (fs *FileSet) LogFiles() []*LogFile {
var a []*LogFile
for _, f := range fs.files {
if f, ok := f.(*LogFile); ok {
a = append(a, f)
}
}
return a
}
// IndexFiles returns all index files from the file set.
func (fs *FileSet) IndexFiles() []*IndexFile {
var a []*IndexFile
for _, f := range fs.files {
if f, ok := f.(*IndexFile); ok {
a = append(a, f)
}
}
return a
}
// LastContiguousIndexFilesByLevel returns the last contiguous files by level.
// These can be used by the compaction scheduler.
func (fs *FileSet) LastContiguousIndexFilesByLevel(level int) []*IndexFile {
if level == 0 {
return nil
}
var a []*IndexFile
for i := len(fs.files) - 1; i >= 0; i-- {
f := fs.files[i]
// Ignore files above level, stop on files below level.
if level < f.Level() {
continue
} else if level > f.Level() {
break
}
a = append([]*IndexFile{f.(*IndexFile)}, a...)
}
return a
}
// Measurement returns a measurement by name.
func (fs *FileSet) Measurement(name []byte) MeasurementElem {
for _, f := range fs.files {
if e := f.Measurement(name); e == nil {
continue
} else if e.Deleted() {
return nil
} else {
return e
}
}
return nil
}
// MeasurementIterator returns an iterator over all measurements in the index.
func (fs *FileSet) MeasurementIterator() MeasurementIterator {
a := make([]MeasurementIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.MeasurementIterator()
if itr != nil {
a = append(a, itr)
}
}
return MergeMeasurementIterators(a...)
}
// TagKeyIterator returns an iterator over all tag keys for a measurement.
func (fs *FileSet) TagKeyIterator(name []byte) TagKeyIterator {
a := make([]TagKeyIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.TagKeyIterator(name)
if itr != nil {
a = append(a, itr)
}
}
return MergeTagKeyIterators(a...)
}
// MeasurementSeriesIDIterator returns a series iterator for a measurement.
func (fs *FileSet) MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator {
a := make([]tsdb.SeriesIDIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.MeasurementSeriesIDIterator(name)
if itr != nil {
a = append(a, itr)
}
}
return tsdb.MergeSeriesIDIterators(a...)
}
// MeasurementTagKeysByExpr extracts the tag keys wanted by the expression.
func (fs *FileSet) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) {
// Return all keys if no condition was passed in.
if expr == nil {
m := make(map[string]struct{})
if itr := fs.TagKeyIterator(name); itr != nil {
for e := itr.Next(); e != nil; e = itr.Next() {
m[string(e.Key())] = struct{}{}
}
}
return m, nil
}
switch e := expr.(type) {
case *influxql.BinaryExpr:
switch e.Op {
case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
tag, ok := e.LHS.(*influxql.VarRef)
if !ok {
return nil, fmt.Errorf("left side of '%s' must be a tag key", e.Op.String())
} else if tag.Val != "_tagKey" {
return nil, nil
}
if influxql.IsRegexOp(e.Op) {
re, ok := e.RHS.(*influxql.RegexLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a regular expression", e.Op.String())
}
return fs.tagKeysByFilter(name, e.Op, nil, re.Val), nil
}
s, ok := e.RHS.(*influxql.StringLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a tag value string", e.Op.String())
}
return fs.tagKeysByFilter(name, e.Op, []byte(s.Val), nil), nil
case influxql.AND, influxql.OR:
lhs, err := fs.MeasurementTagKeysByExpr(name, e.LHS)
if err != nil {
return nil, err
}
rhs, err := fs.MeasurementTagKeysByExpr(name, e.RHS)
if err != nil {
return nil, err
}
if lhs != nil && rhs != nil {
if e.Op == influxql.OR {
return unionStringSets(lhs, rhs), nil
}
return intersectStringSets(lhs, rhs), nil
} else if lhs != nil {
return lhs, nil
} else if rhs != nil {
return rhs, nil
}
return nil, nil
default:
return nil, fmt.Errorf("invalid operator")
}
case *influxql.ParenExpr:
return fs.MeasurementTagKeysByExpr(name, e.Expr)
}
return nil, fmt.Errorf("%#v", expr)
}
// tagKeysByFilter will filter the tag keys for the measurement.
func (fs *FileSet) tagKeysByFilter(name []byte, op influxql.Token, val []byte, regex *regexp.Regexp) map[string]struct{} {
ss := make(map[string]struct{})
itr := fs.TagKeyIterator(name)
if itr != nil {
for e := itr.Next(); e != nil; e = itr.Next() {
var matched bool
switch op {
case influxql.EQ:
matched = bytes.Equal(e.Key(), val)
case influxql.NEQ:
matched = !bytes.Equal(e.Key(), val)
case influxql.EQREGEX:
matched = regex.Match(e.Key())
case influxql.NEQREGEX:
matched = !regex.Match(e.Key())
}
if !matched {
continue
}
ss[string(e.Key())] = struct{}{}
}
}
return ss
}
// TagKeySeriesIDIterator returns a series iterator for all values across a single key.
func (fs *FileSet) TagKeySeriesIDIterator(name, key []byte) tsdb.SeriesIDIterator {
a := make([]tsdb.SeriesIDIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.TagKeySeriesIDIterator(name, key)
if itr != nil {
a = append(a, itr)
}
}
return tsdb.MergeSeriesIDIterators(a...)
}
// HasTagKey returns true if the tag key exists.
func (fs *FileSet) HasTagKey(name, key []byte) bool {
for _, f := range fs.files {
if e := f.TagKey(name, key); e != nil {
return !e.Deleted()
}
}
return false
}
// HasTagValue returns true if the tag value exists.
func (fs *FileSet) HasTagValue(name, key, value []byte) bool {
for _, f := range fs.files {
if e := f.TagValue(name, key, value); e != nil {
return !e.Deleted()
}
}
return false
}
// TagValueIterator returns a value iterator for a tag key.
func (fs *FileSet) TagValueIterator(name, key []byte) TagValueIterator {
a := make([]TagValueIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.TagValueIterator(name, key)
if itr != nil {
a = append(a, itr)
}
}
return MergeTagValueIterators(a...)
}
// TagValueSeriesIDIterator returns a series iterator for a single tag value.
func (fs *FileSet) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.SeriesIDIterator, error) {
ss := tsdb.NewSeriesIDSet()
var ftss *tsdb.SeriesIDSet
for i := len(fs.files) - 1; i >= 0; i-- {
f := fs.files[i]
// Remove tombstones set in previous file.
if ftss != nil && ftss.Cardinality() > 0 {
ss = ss.AndNot(ftss)
}
// Fetch tag value series set for this file and merge into overall set.
fss, err := f.TagValueSeriesIDSet(name, key, value)
if err != nil {
return nil, err
} else if fss != nil {
ss.Merge(fss)
}
// Fetch tombstone set to be processed on next file.
if ftss, err = f.TombstoneSeriesIDSet(); err != nil {
return nil, err
}
}
return tsdb.NewSeriesIDSetIterator(ss), nil
}
// MeasurementsSketches returns the merged measurement sketches for the FileSet.
func (fs *FileSet) MeasurementsSketches() (estimator.Sketch, estimator.Sketch, error) {
sketch, tSketch := hll.NewDefaultPlus(), hll.NewDefaultPlus()
for _, f := range fs.files {
if s, t, err := f.MeasurementsSketches(); err != nil {
return nil, nil, err
} else if err := sketch.Merge(s); err != nil {
return nil, nil, err
} else if err := tSketch.Merge(t); err != nil {
return nil, nil, err
}
}
return sketch, tSketch, nil
}
// SeriesSketches returns the merged measurement sketches for the FileSet.
func (fs *FileSet) SeriesSketches() (estimator.Sketch, estimator.Sketch, error) {
sketch, tSketch := hll.NewDefaultPlus(), hll.NewDefaultPlus()
for _, f := range fs.files {
if s, t, err := f.SeriesSketches(); err != nil {
return nil, nil, err
} else if err := sketch.Merge(s); err != nil {
return nil, nil, err
} else if err := tSketch.Merge(t); err != nil {
return nil, nil, err
}
}
return sketch, tSketch, nil
}
// File represents a log or index file.
type File interface {
Close() error
Path() string
ID() int
Level() int
Measurement(name []byte) MeasurementElem
MeasurementIterator() MeasurementIterator
MeasurementHasSeries(ss *tsdb.SeriesIDSet, name []byte) bool
TagKey(name, key []byte) TagKeyElem
TagKeyIterator(name []byte) TagKeyIterator
TagValue(name, key, value []byte) TagValueElem
TagValueIterator(name, key []byte) TagValueIterator
// Series iteration.
MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator
TagKeySeriesIDIterator(name, key []byte) tsdb.SeriesIDIterator
TagValueSeriesIDSet(name, key, value []byte) (*tsdb.SeriesIDSet, error)
// Sketches for cardinality estimation
MeasurementsSketches() (s, t estimator.Sketch, err error)
SeriesSketches() (s, t estimator.Sketch, err error)
// Bitmap series existance.
SeriesIDSet() (*tsdb.SeriesIDSet, error)
TombstoneSeriesIDSet() (*tsdb.SeriesIDSet, error)
// Reference counting.
Retain()
Release()
// Size of file on disk
Size() int64
// Estimated memory footprint
bytes() int
}
type Files []File
func (a Files) IDs() []int {
ids := make([]int, len(a))
for i := range a {
ids[i] = a[i].ID()
}
return ids
}
// fileSetSeriesIDIterator attaches a fileset to an iterator that is released on close.
type fileSetSeriesIDIterator struct {
once sync.Once
fs *FileSet
itr tsdb.SeriesIDIterator
}
func newFileSetSeriesIDIterator(fs *FileSet, itr tsdb.SeriesIDIterator) tsdb.SeriesIDIterator {
if itr == nil {
fs.Release()
return nil
}
if itr, ok := itr.(tsdb.SeriesIDSetIterator); ok {
return &fileSetSeriesIDSetIterator{fs: fs, itr: itr}
}
return &fileSetSeriesIDIterator{fs: fs, itr: itr}
}
func (itr *fileSetSeriesIDIterator) Next() (tsdb.SeriesIDElem, error) {
return itr.itr.Next()
}
func (itr *fileSetSeriesIDIterator) Close() error {
itr.once.Do(func() { itr.fs.Release() })
return itr.itr.Close()
}
// fileSetSeriesIDSetIterator attaches a fileset to an iterator that is released on close.
type fileSetSeriesIDSetIterator struct {
once sync.Once
fs *FileSet
itr tsdb.SeriesIDSetIterator
}
func (itr *fileSetSeriesIDSetIterator) Next() (tsdb.SeriesIDElem, error) {
return itr.itr.Next()
}
func (itr *fileSetSeriesIDSetIterator) Close() error {
itr.once.Do(func() { itr.fs.Release() })
return itr.itr.Close()
}
func (itr *fileSetSeriesIDSetIterator) SeriesIDSet() *tsdb.SeriesIDSet {
return itr.itr.SeriesIDSet()
}
// fileSetMeasurementIterator attaches a fileset to an iterator that is released on close.
type fileSetMeasurementIterator struct {
once sync.Once
fs *FileSet
itr tsdb.MeasurementIterator
}
func newFileSetMeasurementIterator(fs *FileSet, itr tsdb.MeasurementIterator) *fileSetMeasurementIterator {
return &fileSetMeasurementIterator{fs: fs, itr: itr}
}
func (itr *fileSetMeasurementIterator) Next() ([]byte, error) {
return itr.itr.Next()
}
func (itr *fileSetMeasurementIterator) Close() error {
itr.once.Do(func() { itr.fs.Release() })
return itr.itr.Close()
}
// fileSetTagKeyIterator attaches a fileset to an iterator that is released on close.
type fileSetTagKeyIterator struct {
once sync.Once
fs *FileSet
itr tsdb.TagKeyIterator
}
func newFileSetTagKeyIterator(fs *FileSet, itr tsdb.TagKeyIterator) *fileSetTagKeyIterator {
return &fileSetTagKeyIterator{fs: fs, itr: itr}
}
func (itr *fileSetTagKeyIterator) Next() ([]byte, error) {
return itr.itr.Next()
}
func (itr *fileSetTagKeyIterator) Close() error {
itr.once.Do(func() { itr.fs.Release() })
return itr.itr.Close()
}
// fileSetTagValueIterator attaches a fileset to an iterator that is released on close.
type fileSetTagValueIterator struct {
once sync.Once
fs *FileSet
itr tsdb.TagValueIterator
}
func newFileSetTagValueIterator(fs *FileSet, itr tsdb.TagValueIterator) *fileSetTagValueIterator {
return &fileSetTagValueIterator{fs: fs, itr: itr}
}
func (itr *fileSetTagValueIterator) Next() ([]byte, error) {
return itr.itr.Next()
}
func (itr *fileSetTagValueIterator) Close() error {
itr.once.Do(func() { itr.fs.Release() })
return itr.itr.Close()
}