599 lines
15 KiB
Go
599 lines
15 KiB
Go
package tsi1
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"regexp"
|
|
"sync"
|
|
"unsafe"
|
|
|
|
"github.com/influxdata/influxql"
|
|
"github.com/influxdata/platform/pkg/estimator"
|
|
"github.com/influxdata/platform/pkg/estimator/hll"
|
|
"github.com/influxdata/platform/tsdb"
|
|
)
|
|
|
|
// FileSet represents a collection of files.
|
|
type FileSet struct {
|
|
levels []CompactionLevel
|
|
sfile *tsdb.SeriesFile
|
|
files []File
|
|
manifestSize int64 // Size of the manifest file in bytes.
|
|
}
|
|
|
|
// NewFileSet returns a new instance of FileSet.
|
|
func NewFileSet(levels []CompactionLevel, sfile *tsdb.SeriesFile, files []File) (*FileSet, error) {
|
|
return &FileSet{
|
|
levels: levels,
|
|
sfile: sfile,
|
|
files: files,
|
|
}, nil
|
|
}
|
|
|
|
// bytes estimates the memory footprint of this FileSet, in bytes.
|
|
func (fs *FileSet) bytes() int {
|
|
var b int
|
|
for _, level := range fs.levels {
|
|
b += int(unsafe.Sizeof(level))
|
|
}
|
|
// Do not count SeriesFile because it belongs to the code that constructed this FileSet.
|
|
for _, file := range fs.files {
|
|
b += file.bytes()
|
|
}
|
|
b += int(unsafe.Sizeof(*fs))
|
|
return b
|
|
}
|
|
|
|
// Close closes all the files in the file set.
|
|
func (fs FileSet) Close() error {
|
|
var err error
|
|
for _, f := range fs.files {
|
|
if e := f.Close(); e != nil && err == nil {
|
|
err = e
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Retain adds a reference count to all files.
|
|
func (fs *FileSet) Retain() {
|
|
for _, f := range fs.files {
|
|
f.Retain()
|
|
}
|
|
}
|
|
|
|
// Release removes a reference count from all files.
|
|
func (fs *FileSet) Release() {
|
|
for _, f := range fs.files {
|
|
f.Release()
|
|
}
|
|
}
|
|
|
|
// SeriesFile returns the attached series file.
|
|
func (fs *FileSet) SeriesFile() *tsdb.SeriesFile { return fs.sfile }
|
|
|
|
// PrependLogFile returns a new file set with f added at the beginning.
|
|
// Filters do not need to be rebuilt because log files have no bloom filter.
|
|
func (fs *FileSet) PrependLogFile(f *LogFile) *FileSet {
|
|
return &FileSet{
|
|
levels: fs.levels,
|
|
sfile: fs.sfile,
|
|
files: append([]File{f}, fs.files...),
|
|
}
|
|
}
|
|
|
|
// Size returns the on-disk size of the FileSet.
|
|
func (fs *FileSet) Size() int64 {
|
|
var total int64
|
|
for _, f := range fs.files {
|
|
total += f.Size()
|
|
}
|
|
return total + int64(fs.manifestSize)
|
|
}
|
|
|
|
// MustReplace swaps a list of files for a single file and returns a new file set.
|
|
// The caller should always guarantee that the files exist and are contiguous.
|
|
func (fs *FileSet) MustReplace(oldFiles []File, newFile File) *FileSet {
|
|
assert(len(oldFiles) > 0, "cannot replace empty files")
|
|
|
|
// Find index of first old file.
|
|
var i int
|
|
for ; i < len(fs.files); i++ {
|
|
if fs.files[i] == oldFiles[0] {
|
|
break
|
|
} else if i == len(fs.files)-1 {
|
|
panic("first replacement file not found")
|
|
}
|
|
}
|
|
|
|
// Ensure all old files are contiguous.
|
|
for j := range oldFiles {
|
|
if fs.files[i+j] != oldFiles[j] {
|
|
panic(fmt.Sprintf("cannot replace non-contiguous files: subset=%+v, fileset=%+v", Files(oldFiles).IDs(), Files(fs.files).IDs()))
|
|
}
|
|
}
|
|
|
|
// Copy to new fileset.
|
|
other := make([]File, len(fs.files)-len(oldFiles)+1)
|
|
copy(other[:i], fs.files[:i])
|
|
other[i] = newFile
|
|
copy(other[i+1:], fs.files[i+len(oldFiles):])
|
|
|
|
// Build new fileset and rebuild changed filters.
|
|
return &FileSet{
|
|
levels: fs.levels,
|
|
files: other,
|
|
}
|
|
}
|
|
|
|
// MaxID returns the highest file identifier.
|
|
func (fs *FileSet) MaxID() int {
|
|
var max int
|
|
for _, f := range fs.files {
|
|
if i := f.ID(); i > max {
|
|
max = i
|
|
}
|
|
}
|
|
return max
|
|
}
|
|
|
|
// Files returns all files in the set.
|
|
func (fs *FileSet) Files() []File {
|
|
return fs.files
|
|
}
|
|
|
|
// LogFiles returns all log files from the file set.
|
|
func (fs *FileSet) LogFiles() []*LogFile {
|
|
var a []*LogFile
|
|
for _, f := range fs.files {
|
|
if f, ok := f.(*LogFile); ok {
|
|
a = append(a, f)
|
|
}
|
|
}
|
|
return a
|
|
}
|
|
|
|
// IndexFiles returns all index files from the file set.
|
|
func (fs *FileSet) IndexFiles() []*IndexFile {
|
|
var a []*IndexFile
|
|
for _, f := range fs.files {
|
|
if f, ok := f.(*IndexFile); ok {
|
|
a = append(a, f)
|
|
}
|
|
}
|
|
return a
|
|
}
|
|
|
|
// LastContiguousIndexFilesByLevel returns the last contiguous files by level.
|
|
// These can be used by the compaction scheduler.
|
|
func (fs *FileSet) LastContiguousIndexFilesByLevel(level int) []*IndexFile {
|
|
if level == 0 {
|
|
return nil
|
|
}
|
|
|
|
var a []*IndexFile
|
|
for i := len(fs.files) - 1; i >= 0; i-- {
|
|
f := fs.files[i]
|
|
|
|
// Ignore files above level, stop on files below level.
|
|
if level < f.Level() {
|
|
continue
|
|
} else if level > f.Level() {
|
|
break
|
|
}
|
|
|
|
a = append([]*IndexFile{f.(*IndexFile)}, a...)
|
|
}
|
|
return a
|
|
}
|
|
|
|
// Measurement returns a measurement by name.
|
|
func (fs *FileSet) Measurement(name []byte) MeasurementElem {
|
|
for _, f := range fs.files {
|
|
if e := f.Measurement(name); e == nil {
|
|
continue
|
|
} else if e.Deleted() {
|
|
return nil
|
|
} else {
|
|
return e
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MeasurementIterator returns an iterator over all measurements in the index.
|
|
func (fs *FileSet) MeasurementIterator() MeasurementIterator {
|
|
a := make([]MeasurementIterator, 0, len(fs.files))
|
|
for _, f := range fs.files {
|
|
itr := f.MeasurementIterator()
|
|
if itr != nil {
|
|
a = append(a, itr)
|
|
}
|
|
}
|
|
return MergeMeasurementIterators(a...)
|
|
}
|
|
|
|
// TagKeyIterator returns an iterator over all tag keys for a measurement.
|
|
func (fs *FileSet) TagKeyIterator(name []byte) TagKeyIterator {
|
|
a := make([]TagKeyIterator, 0, len(fs.files))
|
|
for _, f := range fs.files {
|
|
itr := f.TagKeyIterator(name)
|
|
if itr != nil {
|
|
a = append(a, itr)
|
|
}
|
|
}
|
|
return MergeTagKeyIterators(a...)
|
|
}
|
|
|
|
// MeasurementSeriesIDIterator returns a series iterator for a measurement.
|
|
func (fs *FileSet) MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator {
|
|
a := make([]tsdb.SeriesIDIterator, 0, len(fs.files))
|
|
for _, f := range fs.files {
|
|
itr := f.MeasurementSeriesIDIterator(name)
|
|
if itr != nil {
|
|
a = append(a, itr)
|
|
}
|
|
}
|
|
return tsdb.MergeSeriesIDIterators(a...)
|
|
}
|
|
|
|
// MeasurementTagKeysByExpr extracts the tag keys wanted by the expression.
|
|
func (fs *FileSet) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) {
|
|
// Return all keys if no condition was passed in.
|
|
if expr == nil {
|
|
m := make(map[string]struct{})
|
|
if itr := fs.TagKeyIterator(name); itr != nil {
|
|
for e := itr.Next(); e != nil; e = itr.Next() {
|
|
m[string(e.Key())] = struct{}{}
|
|
}
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
switch e := expr.(type) {
|
|
case *influxql.BinaryExpr:
|
|
switch e.Op {
|
|
case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
|
|
tag, ok := e.LHS.(*influxql.VarRef)
|
|
if !ok {
|
|
return nil, fmt.Errorf("left side of '%s' must be a tag key", e.Op.String())
|
|
} else if tag.Val != "_tagKey" {
|
|
return nil, nil
|
|
}
|
|
|
|
if influxql.IsRegexOp(e.Op) {
|
|
re, ok := e.RHS.(*influxql.RegexLiteral)
|
|
if !ok {
|
|
return nil, fmt.Errorf("right side of '%s' must be a regular expression", e.Op.String())
|
|
}
|
|
return fs.tagKeysByFilter(name, e.Op, nil, re.Val), nil
|
|
}
|
|
|
|
s, ok := e.RHS.(*influxql.StringLiteral)
|
|
if !ok {
|
|
return nil, fmt.Errorf("right side of '%s' must be a tag value string", e.Op.String())
|
|
}
|
|
return fs.tagKeysByFilter(name, e.Op, []byte(s.Val), nil), nil
|
|
|
|
case influxql.AND, influxql.OR:
|
|
lhs, err := fs.MeasurementTagKeysByExpr(name, e.LHS)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rhs, err := fs.MeasurementTagKeysByExpr(name, e.RHS)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if lhs != nil && rhs != nil {
|
|
if e.Op == influxql.OR {
|
|
return unionStringSets(lhs, rhs), nil
|
|
}
|
|
return intersectStringSets(lhs, rhs), nil
|
|
} else if lhs != nil {
|
|
return lhs, nil
|
|
} else if rhs != nil {
|
|
return rhs, nil
|
|
}
|
|
return nil, nil
|
|
default:
|
|
return nil, fmt.Errorf("invalid operator")
|
|
}
|
|
|
|
case *influxql.ParenExpr:
|
|
return fs.MeasurementTagKeysByExpr(name, e.Expr)
|
|
}
|
|
|
|
return nil, fmt.Errorf("%#v", expr)
|
|
}
|
|
|
|
// tagKeysByFilter will filter the tag keys for the measurement.
|
|
func (fs *FileSet) tagKeysByFilter(name []byte, op influxql.Token, val []byte, regex *regexp.Regexp) map[string]struct{} {
|
|
ss := make(map[string]struct{})
|
|
itr := fs.TagKeyIterator(name)
|
|
if itr != nil {
|
|
for e := itr.Next(); e != nil; e = itr.Next() {
|
|
var matched bool
|
|
switch op {
|
|
case influxql.EQ:
|
|
matched = bytes.Equal(e.Key(), val)
|
|
case influxql.NEQ:
|
|
matched = !bytes.Equal(e.Key(), val)
|
|
case influxql.EQREGEX:
|
|
matched = regex.Match(e.Key())
|
|
case influxql.NEQREGEX:
|
|
matched = !regex.Match(e.Key())
|
|
}
|
|
|
|
if !matched {
|
|
continue
|
|
}
|
|
ss[string(e.Key())] = struct{}{}
|
|
}
|
|
}
|
|
return ss
|
|
}
|
|
|
|
// TagKeySeriesIDIterator returns a series iterator for all values across a single key.
|
|
func (fs *FileSet) TagKeySeriesIDIterator(name, key []byte) tsdb.SeriesIDIterator {
|
|
a := make([]tsdb.SeriesIDIterator, 0, len(fs.files))
|
|
for _, f := range fs.files {
|
|
itr := f.TagKeySeriesIDIterator(name, key)
|
|
if itr != nil {
|
|
a = append(a, itr)
|
|
}
|
|
}
|
|
return tsdb.MergeSeriesIDIterators(a...)
|
|
}
|
|
|
|
// HasTagKey returns true if the tag key exists.
|
|
func (fs *FileSet) HasTagKey(name, key []byte) bool {
|
|
for _, f := range fs.files {
|
|
if e := f.TagKey(name, key); e != nil {
|
|
return !e.Deleted()
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// HasTagValue returns true if the tag value exists.
|
|
func (fs *FileSet) HasTagValue(name, key, value []byte) bool {
|
|
for _, f := range fs.files {
|
|
if e := f.TagValue(name, key, value); e != nil {
|
|
return !e.Deleted()
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// TagValueIterator returns a value iterator for a tag key.
|
|
func (fs *FileSet) TagValueIterator(name, key []byte) TagValueIterator {
|
|
a := make([]TagValueIterator, 0, len(fs.files))
|
|
for _, f := range fs.files {
|
|
itr := f.TagValueIterator(name, key)
|
|
if itr != nil {
|
|
a = append(a, itr)
|
|
}
|
|
}
|
|
return MergeTagValueIterators(a...)
|
|
}
|
|
|
|
// TagValueSeriesIDIterator returns a series iterator for a single tag value.
|
|
func (fs *FileSet) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.SeriesIDIterator, error) {
|
|
ss := tsdb.NewSeriesIDSet()
|
|
|
|
var ftss *tsdb.SeriesIDSet
|
|
for i := len(fs.files) - 1; i >= 0; i-- {
|
|
f := fs.files[i]
|
|
|
|
// Remove tombstones set in previous file.
|
|
if ftss != nil && ftss.Cardinality() > 0 {
|
|
ss = ss.AndNot(ftss)
|
|
}
|
|
|
|
// Fetch tag value series set for this file and merge into overall set.
|
|
fss, err := f.TagValueSeriesIDSet(name, key, value)
|
|
if err != nil {
|
|
return nil, err
|
|
} else if fss != nil {
|
|
ss.Merge(fss)
|
|
}
|
|
|
|
// Fetch tombstone set to be processed on next file.
|
|
if ftss, err = f.TombstoneSeriesIDSet(); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return tsdb.NewSeriesIDSetIterator(ss), nil
|
|
}
|
|
|
|
// MeasurementsSketches returns the merged measurement sketches for the FileSet.
|
|
func (fs *FileSet) MeasurementsSketches() (estimator.Sketch, estimator.Sketch, error) {
|
|
sketch, tSketch := hll.NewDefaultPlus(), hll.NewDefaultPlus()
|
|
for _, f := range fs.files {
|
|
if s, t, err := f.MeasurementsSketches(); err != nil {
|
|
return nil, nil, err
|
|
} else if err := sketch.Merge(s); err != nil {
|
|
return nil, nil, err
|
|
} else if err := tSketch.Merge(t); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
return sketch, tSketch, nil
|
|
}
|
|
|
|
// SeriesSketches returns the merged measurement sketches for the FileSet.
|
|
func (fs *FileSet) SeriesSketches() (estimator.Sketch, estimator.Sketch, error) {
|
|
sketch, tSketch := hll.NewDefaultPlus(), hll.NewDefaultPlus()
|
|
for _, f := range fs.files {
|
|
if s, t, err := f.SeriesSketches(); err != nil {
|
|
return nil, nil, err
|
|
} else if err := sketch.Merge(s); err != nil {
|
|
return nil, nil, err
|
|
} else if err := tSketch.Merge(t); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
return sketch, tSketch, nil
|
|
}
|
|
|
|
// File represents a log or index file.
|
|
type File interface {
|
|
Close() error
|
|
Path() string
|
|
|
|
ID() int
|
|
Level() int
|
|
|
|
Measurement(name []byte) MeasurementElem
|
|
MeasurementIterator() MeasurementIterator
|
|
MeasurementHasSeries(ss *tsdb.SeriesIDSet, name []byte) bool
|
|
|
|
TagKey(name, key []byte) TagKeyElem
|
|
TagKeyIterator(name []byte) TagKeyIterator
|
|
|
|
TagValue(name, key, value []byte) TagValueElem
|
|
TagValueIterator(name, key []byte) TagValueIterator
|
|
|
|
// Series iteration.
|
|
MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator
|
|
TagKeySeriesIDIterator(name, key []byte) tsdb.SeriesIDIterator
|
|
TagValueSeriesIDSet(name, key, value []byte) (*tsdb.SeriesIDSet, error)
|
|
|
|
// Sketches for cardinality estimation
|
|
MeasurementsSketches() (s, t estimator.Sketch, err error)
|
|
SeriesSketches() (s, t estimator.Sketch, err error)
|
|
|
|
// Bitmap series existance.
|
|
SeriesIDSet() (*tsdb.SeriesIDSet, error)
|
|
TombstoneSeriesIDSet() (*tsdb.SeriesIDSet, error)
|
|
|
|
// Reference counting.
|
|
Retain()
|
|
Release()
|
|
|
|
// Size of file on disk
|
|
Size() int64
|
|
|
|
// Estimated memory footprint
|
|
bytes() int
|
|
}
|
|
|
|
type Files []File
|
|
|
|
func (a Files) IDs() []int {
|
|
ids := make([]int, len(a))
|
|
for i := range a {
|
|
ids[i] = a[i].ID()
|
|
}
|
|
return ids
|
|
}
|
|
|
|
// fileSetSeriesIDIterator attaches a fileset to an iterator that is released on close.
|
|
type fileSetSeriesIDIterator struct {
|
|
once sync.Once
|
|
fs *FileSet
|
|
itr tsdb.SeriesIDIterator
|
|
}
|
|
|
|
func newFileSetSeriesIDIterator(fs *FileSet, itr tsdb.SeriesIDIterator) tsdb.SeriesIDIterator {
|
|
if itr == nil {
|
|
fs.Release()
|
|
return nil
|
|
}
|
|
if itr, ok := itr.(tsdb.SeriesIDSetIterator); ok {
|
|
return &fileSetSeriesIDSetIterator{fs: fs, itr: itr}
|
|
}
|
|
return &fileSetSeriesIDIterator{fs: fs, itr: itr}
|
|
}
|
|
|
|
func (itr *fileSetSeriesIDIterator) Next() (tsdb.SeriesIDElem, error) {
|
|
return itr.itr.Next()
|
|
}
|
|
|
|
func (itr *fileSetSeriesIDIterator) Close() error {
|
|
itr.once.Do(func() { itr.fs.Release() })
|
|
return itr.itr.Close()
|
|
}
|
|
|
|
// fileSetSeriesIDSetIterator attaches a fileset to an iterator that is released on close.
|
|
type fileSetSeriesIDSetIterator struct {
|
|
once sync.Once
|
|
fs *FileSet
|
|
itr tsdb.SeriesIDSetIterator
|
|
}
|
|
|
|
func (itr *fileSetSeriesIDSetIterator) Next() (tsdb.SeriesIDElem, error) {
|
|
return itr.itr.Next()
|
|
}
|
|
|
|
func (itr *fileSetSeriesIDSetIterator) Close() error {
|
|
itr.once.Do(func() { itr.fs.Release() })
|
|
return itr.itr.Close()
|
|
}
|
|
|
|
func (itr *fileSetSeriesIDSetIterator) SeriesIDSet() *tsdb.SeriesIDSet {
|
|
return itr.itr.SeriesIDSet()
|
|
}
|
|
|
|
// fileSetMeasurementIterator attaches a fileset to an iterator that is released on close.
|
|
type fileSetMeasurementIterator struct {
|
|
once sync.Once
|
|
fs *FileSet
|
|
itr tsdb.MeasurementIterator
|
|
}
|
|
|
|
func newFileSetMeasurementIterator(fs *FileSet, itr tsdb.MeasurementIterator) *fileSetMeasurementIterator {
|
|
return &fileSetMeasurementIterator{fs: fs, itr: itr}
|
|
}
|
|
|
|
func (itr *fileSetMeasurementIterator) Next() ([]byte, error) {
|
|
return itr.itr.Next()
|
|
}
|
|
|
|
func (itr *fileSetMeasurementIterator) Close() error {
|
|
itr.once.Do(func() { itr.fs.Release() })
|
|
return itr.itr.Close()
|
|
}
|
|
|
|
// fileSetTagKeyIterator attaches a fileset to an iterator that is released on close.
|
|
type fileSetTagKeyIterator struct {
|
|
once sync.Once
|
|
fs *FileSet
|
|
itr tsdb.TagKeyIterator
|
|
}
|
|
|
|
func newFileSetTagKeyIterator(fs *FileSet, itr tsdb.TagKeyIterator) *fileSetTagKeyIterator {
|
|
return &fileSetTagKeyIterator{fs: fs, itr: itr}
|
|
}
|
|
|
|
func (itr *fileSetTagKeyIterator) Next() ([]byte, error) {
|
|
return itr.itr.Next()
|
|
}
|
|
|
|
func (itr *fileSetTagKeyIterator) Close() error {
|
|
itr.once.Do(func() { itr.fs.Release() })
|
|
return itr.itr.Close()
|
|
}
|
|
|
|
// fileSetTagValueIterator attaches a fileset to an iterator that is released on close.
|
|
type fileSetTagValueIterator struct {
|
|
once sync.Once
|
|
fs *FileSet
|
|
itr tsdb.TagValueIterator
|
|
}
|
|
|
|
func newFileSetTagValueIterator(fs *FileSet, itr tsdb.TagValueIterator) *fileSetTagValueIterator {
|
|
return &fileSetTagValueIterator{fs: fs, itr: itr}
|
|
}
|
|
|
|
func (itr *fileSetTagValueIterator) Next() ([]byte, error) {
|
|
return itr.itr.Next()
|
|
}
|
|
|
|
func (itr *fileSetTagValueIterator) Close() error {
|
|
itr.once.Do(func() { itr.fs.Release() })
|
|
return itr.itr.Close()
|
|
}
|