2018-10-01 11:08:33 +00:00
|
|
|
package tsi1
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"sort"
|
|
|
|
"time"
|
|
|
|
|
2019-01-08 00:37:16 +00:00
|
|
|
"github.com/influxdata/influxdb/pkg/bytesutil"
|
storage: fix problems with keeping resources alive
This commit adds the pkg/lifecycle.Resource to help manage opening,
closing, and leasing out references to some resource. A resource
cannot be closed until all acquired references have been released.
If the debug_ref tag is enabled, all resource acquisitions keep
track of the stack trace that created them and have a finalizer
associated with them to print on stderr if they are leaked. It also
registers a handler on SIGUSR2 to dump all of the currently live
resources.
Having resources tracked in a uniform way with a data type allows us
to do more sophisticated tracking with the debug_ref tag, as well.
For example, we could panic the process if a resource cannot be
closed within a certain time frame, or attempt to figure out the
DAG of resource ownership dynamically.
This commit also fixes many issues around resources, correctness
during error scenarios, reporting of errors, idempotency of
close, tracking of memory for some data structures, resource leaks
in tests, and out of order dependency closes in tests.
2019-02-25 23:51:08 +00:00
|
|
|
"github.com/influxdata/influxdb/pkg/lifecycle"
|
2019-01-08 00:37:16 +00:00
|
|
|
"github.com/influxdata/influxdb/tsdb"
|
2018-10-01 11:08:33 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// IndexFiles represents a layered set of index files.
|
|
|
|
type IndexFiles []*IndexFile
|
|
|
|
|
|
|
|
// IDs returns the ids for all index files.
|
|
|
|
func (p IndexFiles) IDs() []int {
|
|
|
|
a := make([]int, len(p))
|
|
|
|
for i, f := range p {
|
|
|
|
a[i] = f.ID()
|
|
|
|
}
|
|
|
|
return a
|
|
|
|
}
|
|
|
|
|
storage: fix problems with keeping resources alive
This commit adds the pkg/lifecycle.Resource to help manage opening,
closing, and leasing out references to some resource. A resource
cannot be closed until all acquired references have been released.
If the debug_ref tag is enabled, all resource acquisitions keep
track of the stack trace that created them and have a finalizer
associated with them to print on stderr if they are leaked. It also
registers a handler on SIGUSR2 to dump all of the currently live
resources.
Having resources tracked in a uniform way with a data type allows us
to do more sophisticated tracking with the debug_ref tag, as well.
For example, we could panic the process if a resource cannot be
closed within a certain time frame, or attempt to figure out the
DAG of resource ownership dynamically.
This commit also fixes many issues around resources, correctness
during error scenarios, reporting of errors, idempotency of
close, tracking of memory for some data structures, resource leaks
in tests, and out of order dependency closes in tests.
2019-02-25 23:51:08 +00:00
|
|
|
// Acquire acquires a reference to each file in the index files.
|
|
|
|
func (p IndexFiles) Acquire() (lifecycle.References, error) {
|
|
|
|
refs := make(lifecycle.References, 0, len(p))
|
2018-10-01 11:08:33 +00:00
|
|
|
for _, f := range p {
|
storage: fix problems with keeping resources alive
This commit adds the pkg/lifecycle.Resource to help manage opening,
closing, and leasing out references to some resource. A resource
cannot be closed until all acquired references have been released.
If the debug_ref tag is enabled, all resource acquisitions keep
track of the stack trace that created them and have a finalizer
associated with them to print on stderr if they are leaked. It also
registers a handler on SIGUSR2 to dump all of the currently live
resources.
Having resources tracked in a uniform way with a data type allows us
to do more sophisticated tracking with the debug_ref tag, as well.
For example, we could panic the process if a resource cannot be
closed within a certain time frame, or attempt to figure out the
DAG of resource ownership dynamically.
This commit also fixes many issues around resources, correctness
during error scenarios, reporting of errors, idempotency of
close, tracking of memory for some data structures, resource leaks
in tests, and out of order dependency closes in tests.
2019-02-25 23:51:08 +00:00
|
|
|
ref, err := f.Acquire()
|
|
|
|
if err != nil {
|
|
|
|
for _, ref := range refs {
|
|
|
|
ref.Release()
|
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
refs = append(refs, ref)
|
2018-10-01 11:08:33 +00:00
|
|
|
}
|
storage: fix problems with keeping resources alive
This commit adds the pkg/lifecycle.Resource to help manage opening,
closing, and leasing out references to some resource. A resource
cannot be closed until all acquired references have been released.
If the debug_ref tag is enabled, all resource acquisitions keep
track of the stack trace that created them and have a finalizer
associated with them to print on stderr if they are leaked. It also
registers a handler on SIGUSR2 to dump all of the currently live
resources.
Having resources tracked in a uniform way with a data type allows us
to do more sophisticated tracking with the debug_ref tag, as well.
For example, we could panic the process if a resource cannot be
closed within a certain time frame, or attempt to figure out the
DAG of resource ownership dynamically.
This commit also fixes many issues around resources, correctness
during error scenarios, reporting of errors, idempotency of
close, tracking of memory for some data structures, resource leaks
in tests, and out of order dependency closes in tests.
2019-02-25 23:51:08 +00:00
|
|
|
return refs, nil
|
2018-10-01 11:08:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Files returns p as a list of File objects.
|
|
|
|
func (p IndexFiles) Files() []File {
|
|
|
|
other := make([]File, len(p))
|
|
|
|
for i, f := range p {
|
|
|
|
other[i] = f
|
|
|
|
}
|
|
|
|
return other
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p IndexFiles) buildSeriesIDSets() (seriesIDSet, tombstoneSeriesIDSet *tsdb.SeriesIDSet, err error) {
|
|
|
|
if len(p) == 0 {
|
|
|
|
return tsdb.NewSeriesIDSet(), tsdb.NewSeriesIDSet(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Start with sets from last file.
|
|
|
|
if seriesIDSet, err = p[len(p)-1].SeriesIDSet(); err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
} else if tombstoneSeriesIDSet, err = p[len(p)-1].TombstoneSeriesIDSet(); err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Build sets in reverse order.
|
|
|
|
// This assumes that bits in both sets are mutually exclusive.
|
|
|
|
for i := len(p) - 2; i >= 0; i-- {
|
|
|
|
ss, err := p[i].SeriesIDSet()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ts, err := p[i].TombstoneSeriesIDSet()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add tombstones and remove from old series existence set.
|
|
|
|
seriesIDSet.Diff(ts)
|
|
|
|
tombstoneSeriesIDSet.Merge(ts)
|
|
|
|
|
|
|
|
// Add new series and remove from old series tombstone set.
|
|
|
|
tombstoneSeriesIDSet.Diff(ss)
|
|
|
|
seriesIDSet.Merge(ss)
|
|
|
|
}
|
|
|
|
|
|
|
|
return seriesIDSet, tombstoneSeriesIDSet, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// MeasurementNames returns a sorted list of all measurement names for all files.
|
|
|
|
func (p *IndexFiles) MeasurementNames() [][]byte {
|
|
|
|
itr := p.MeasurementIterator()
|
|
|
|
if itr == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var names [][]byte
|
|
|
|
for e := itr.Next(); e != nil; e = itr.Next() {
|
|
|
|
names = append(names, bytesutil.Clone(e.Name()))
|
|
|
|
}
|
|
|
|
sort.Sort(byteSlices(names))
|
|
|
|
return names
|
|
|
|
}
|
|
|
|
|
|
|
|
// MeasurementIterator returns an iterator that merges measurements across all files.
|
|
|
|
func (p IndexFiles) MeasurementIterator() MeasurementIterator {
|
|
|
|
a := make([]MeasurementIterator, 0, len(p))
|
|
|
|
for i := range p {
|
|
|
|
itr := p[i].MeasurementIterator()
|
|
|
|
if itr == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
a = append(a, itr)
|
|
|
|
}
|
|
|
|
return MergeMeasurementIterators(a...)
|
|
|
|
}
|
|
|
|
|
|
|
|
// TagKeyIterator returns an iterator that merges tag keys across all files.
|
|
|
|
func (p *IndexFiles) TagKeyIterator(name []byte) (TagKeyIterator, error) {
|
|
|
|
a := make([]TagKeyIterator, 0, len(*p))
|
|
|
|
for _, f := range *p {
|
|
|
|
itr := f.TagKeyIterator(name)
|
|
|
|
if itr == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
a = append(a, itr)
|
|
|
|
}
|
|
|
|
return MergeTagKeyIterators(a...), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// MeasurementSeriesIDIterator returns an iterator that merges series across all files.
|
|
|
|
func (p IndexFiles) MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator {
|
|
|
|
a := make([]tsdb.SeriesIDIterator, 0, len(p))
|
|
|
|
for _, f := range p {
|
|
|
|
itr := f.MeasurementSeriesIDIterator(name)
|
|
|
|
if itr == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
a = append(a, itr)
|
|
|
|
}
|
|
|
|
return tsdb.MergeSeriesIDIterators(a...)
|
|
|
|
}
|
|
|
|
|
|
|
|
// TagValueSeriesIDSet returns an iterator that merges series across all files.
|
|
|
|
func (p IndexFiles) TagValueSeriesIDSet(name, key, value []byte) (*tsdb.SeriesIDSet, error) {
|
|
|
|
ss := tsdb.NewSeriesIDSet()
|
|
|
|
for i := range p {
|
|
|
|
if fss, err := p[i].TagValueSeriesIDSet(name, key, value); err != nil {
|
|
|
|
return nil, err
|
|
|
|
} else if fss != nil {
|
|
|
|
ss.Merge(fss)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ss, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// CompactTo merges all index files and writes them to w.
|
|
|
|
func (p IndexFiles) CompactTo(w io.Writer, sfile *tsdb.SeriesFile, m, k uint64, cancel <-chan struct{}) (n int64, err error) {
|
|
|
|
var t IndexFileTrailer
|
|
|
|
|
|
|
|
// Check for cancellation.
|
|
|
|
select {
|
|
|
|
case <-cancel:
|
|
|
|
return n, ErrCompactionInterrupted
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wrap writer in buffered I/O.
|
|
|
|
bw := bufio.NewWriter(w)
|
|
|
|
|
|
|
|
// Setup context object to track shared data for this compaction.
|
|
|
|
var info indexCompactInfo
|
|
|
|
info.cancel = cancel
|
|
|
|
info.tagSets = make(map[string]indexTagSetPos)
|
|
|
|
|
|
|
|
// Write magic number.
|
|
|
|
if err := writeTo(bw, []byte(FileSignature), &n); err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flush buffer before re-mapping.
|
|
|
|
if err := bw.Flush(); err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write tagset blocks in measurement order.
|
|
|
|
if err := p.writeTagsetsTo(bw, &info, &n); err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ensure block is word aligned.
|
|
|
|
// if offset := n % 8; offset != 0 {
|
|
|
|
// if err := writeTo(bw, make([]byte, 8-offset), &n); err != nil {
|
|
|
|
// return n, err
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
|
|
|
|
// Write measurement block.
|
|
|
|
t.MeasurementBlock.Offset = n
|
|
|
|
if err := p.writeMeasurementBlockTo(bw, &info, &n); err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
t.MeasurementBlock.Size = n - t.MeasurementBlock.Offset
|
|
|
|
|
|
|
|
// Build series sets.
|
|
|
|
seriesIDSet, tombstoneSeriesIDSet, err := p.buildSeriesIDSets()
|
|
|
|
if err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write series set.
|
|
|
|
t.SeriesIDSet.Offset = n
|
|
|
|
nn, err := seriesIDSet.WriteTo(bw)
|
|
|
|
if n += nn; err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
t.SeriesIDSet.Size = n - t.SeriesIDSet.Offset
|
|
|
|
|
|
|
|
// Write tombstone series set.
|
|
|
|
t.TombstoneSeriesIDSet.Offset = n
|
|
|
|
nn, err = tombstoneSeriesIDSet.WriteTo(bw)
|
|
|
|
if n += nn; err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
t.TombstoneSeriesIDSet.Size = n - t.TombstoneSeriesIDSet.Offset
|
|
|
|
|
|
|
|
// Write trailer.
|
|
|
|
nn, err = t.WriteTo(bw)
|
|
|
|
n += nn
|
|
|
|
if err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flush file.
|
|
|
|
if err := bw.Flush(); err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p IndexFiles) writeTagsetsTo(w io.Writer, info *indexCompactInfo, n *int64) error {
|
|
|
|
mitr := p.MeasurementIterator()
|
|
|
|
if mitr == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for m := mitr.Next(); m != nil; m = mitr.Next() {
|
|
|
|
if err := p.writeTagsetTo(w, m.Name(), info, n); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// writeTagsetTo writes a single tagset to w and saves the tagset offset.
|
|
|
|
func (p IndexFiles) writeTagsetTo(w io.Writer, name []byte, info *indexCompactInfo, n *int64) error {
|
|
|
|
var seriesIDs []tsdb.SeriesID
|
|
|
|
|
|
|
|
// Check for cancellation.
|
|
|
|
select {
|
|
|
|
case <-info.cancel:
|
|
|
|
return ErrCompactionInterrupted
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ensure block is word aligned.
|
|
|
|
// if offset := (*n) % 8; offset != 0 {
|
|
|
|
// if err := writeTo(w, make([]byte, 8-offset), n); err != nil {
|
|
|
|
// return err
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
|
|
|
|
kitr, err := p.TagKeyIterator(name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
enc := NewTagBlockEncoder(w)
|
|
|
|
for ke := kitr.Next(); ke != nil; ke = kitr.Next() {
|
|
|
|
// Encode key.
|
|
|
|
if err := enc.EncodeKey(ke.Key(), ke.Deleted()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iterate over tag values.
|
|
|
|
vitr := ke.TagValueIterator()
|
|
|
|
for ve := vitr.Next(); ve != nil; ve = vitr.Next() {
|
|
|
|
seriesIDs = seriesIDs[:0]
|
|
|
|
|
|
|
|
// Merge all series together.
|
|
|
|
if err := func() error {
|
|
|
|
ss, err := p.TagValueSeriesIDSet(name, ke.Key(), ve.Value())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return enc.EncodeValue(ve.Value(), ve.Deleted(), ss)
|
|
|
|
}(); err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Save tagset offset to measurement.
|
|
|
|
pos := info.tagSets[string(name)]
|
|
|
|
pos.offset = *n
|
|
|
|
|
|
|
|
// Flush data to writer.
|
|
|
|
err = enc.Close()
|
|
|
|
*n += enc.N()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Save tagset size to measurement.
|
|
|
|
pos.size = *n - pos.offset
|
|
|
|
|
|
|
|
info.tagSets[string(name)] = pos
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p IndexFiles) writeMeasurementBlockTo(w io.Writer, info *indexCompactInfo, n *int64) error {
|
|
|
|
mw := NewMeasurementBlockWriter()
|
|
|
|
|
|
|
|
// Check for cancellation.
|
|
|
|
select {
|
|
|
|
case <-info.cancel:
|
|
|
|
return ErrCompactionInterrupted
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add measurement data & compute sketches.
|
|
|
|
mitr := p.MeasurementIterator()
|
|
|
|
if mitr != nil {
|
|
|
|
var seriesN int
|
|
|
|
for m := mitr.Next(); m != nil; m = mitr.Next() {
|
|
|
|
name := m.Name()
|
|
|
|
|
|
|
|
// Look-up series ids.
|
|
|
|
if err := func() error {
|
|
|
|
itr := p.MeasurementSeriesIDIterator(name)
|
|
|
|
defer itr.Close()
|
|
|
|
|
|
|
|
var seriesIDs []tsdb.SeriesID
|
|
|
|
for {
|
|
|
|
e, err := itr.Next()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
} else if e.SeriesID.IsZero() {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
seriesIDs = append(seriesIDs, e.SeriesID)
|
|
|
|
|
|
|
|
// Check for cancellation periodically.
|
|
|
|
if seriesN++; seriesN%1000 == 0 {
|
|
|
|
select {
|
|
|
|
case <-info.cancel:
|
|
|
|
return ErrCompactionInterrupted
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sort.Slice(seriesIDs, func(i, j int) bool { return seriesIDs[i].Less(seriesIDs[j]) })
|
|
|
|
|
|
|
|
// Add measurement to writer.
|
|
|
|
pos := info.tagSets[string(name)]
|
|
|
|
mw.Add(name, m.Deleted(), pos.offset, pos.size, seriesIDs)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flush data to writer.
|
|
|
|
nn, err := mw.WriteTo(w)
|
|
|
|
*n += nn
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stat returns the max index file size and the total file size for all index files.
|
|
|
|
func (p IndexFiles) Stat() (*IndexFilesInfo, error) {
|
|
|
|
var info IndexFilesInfo
|
|
|
|
for _, f := range p {
|
|
|
|
fi, err := os.Stat(f.Path())
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
continue
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if fi.Size() > info.MaxSize {
|
|
|
|
info.MaxSize = fi.Size()
|
|
|
|
}
|
|
|
|
if fi.ModTime().After(info.ModTime) {
|
|
|
|
info.ModTime = fi.ModTime()
|
|
|
|
}
|
|
|
|
|
|
|
|
info.Size += fi.Size()
|
|
|
|
}
|
|
|
|
return &info, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type IndexFilesInfo struct {
|
|
|
|
MaxSize int64 // largest file size
|
|
|
|
Size int64 // total file size
|
|
|
|
ModTime time.Time // last modified
|
|
|
|
}
|
|
|
|
|
|
|
|
// indexCompactInfo is a context object used for tracking position information
|
|
|
|
// during the compaction of index files.
|
|
|
|
type indexCompactInfo struct {
|
|
|
|
cancel <-chan struct{}
|
|
|
|
|
|
|
|
// Tracks offset/size for each measurement's tagset.
|
|
|
|
tagSets map[string]indexTagSetPos
|
|
|
|
}
|
|
|
|
|
|
|
|
// indexTagSetPos stores the offset/size of tagsets.
|
|
|
|
type indexTagSetPos struct {
|
|
|
|
offset int64
|
|
|
|
size int64
|
|
|
|
}
|