271 lines
5.9 KiB
Go
271 lines
5.9 KiB
Go
// Package b1 reads data from b1 shards.
|
|
package b1 // import "github.com/influxdata/influxdb/cmd/influx_tsm/b1"
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"math"
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/boltdb/bolt"
|
|
"github.com/influxdata/influxdb/cmd/influx_tsm/stats"
|
|
"github.com/influxdata/influxdb/cmd/influx_tsm/tsdb"
|
|
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
|
|
)
|
|
|
|
// DefaultChunkSize is the size of chunks read from the b1 shard
|
|
const DefaultChunkSize int = 1000
|
|
|
|
var excludedBuckets = map[string]bool{
|
|
"fields": true,
|
|
"meta": true,
|
|
"series": true,
|
|
"wal": true,
|
|
}
|
|
|
|
// Reader is used to read all data from a b1 shard.
|
|
type Reader struct {
|
|
path string
|
|
db *bolt.DB
|
|
tx *bolt.Tx
|
|
|
|
cursors []*cursor
|
|
currCursor int
|
|
|
|
keyBuf string
|
|
values []tsm1.Value
|
|
valuePos int
|
|
|
|
fields map[string]*tsdb.MeasurementFields
|
|
codecs map[string]*tsdb.FieldCodec
|
|
|
|
stats *stats.Stats
|
|
}
|
|
|
|
// NewReader returns a reader for the b1 shard at path.
|
|
func NewReader(path string, stats *stats.Stats, chunkSize int) *Reader {
|
|
r := &Reader{
|
|
path: path,
|
|
fields: make(map[string]*tsdb.MeasurementFields),
|
|
codecs: make(map[string]*tsdb.FieldCodec),
|
|
stats: stats,
|
|
}
|
|
|
|
if chunkSize <= 0 {
|
|
chunkSize = DefaultChunkSize
|
|
}
|
|
|
|
r.values = make([]tsm1.Value, chunkSize)
|
|
|
|
return r
|
|
}
|
|
|
|
// Open opens the reader.
|
|
func (r *Reader) Open() error {
|
|
// Open underlying storage.
|
|
db, err := bolt.Open(r.path, 0666, &bolt.Options{Timeout: 1 * time.Second})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
r.db = db
|
|
|
|
// Load fields.
|
|
if err := r.db.View(func(tx *bolt.Tx) error {
|
|
meta := tx.Bucket([]byte("fields"))
|
|
c := meta.Cursor()
|
|
|
|
for k, v := c.First(); k != nil; k, v = c.Next() {
|
|
mf := &tsdb.MeasurementFields{}
|
|
if err := mf.UnmarshalBinary(v); err != nil {
|
|
return err
|
|
}
|
|
r.fields[string(k)] = mf
|
|
r.codecs[string(k)] = tsdb.NewFieldCodec(mf.Fields)
|
|
}
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
seriesSet := make(map[string]bool)
|
|
|
|
// ignore series index and find all series in this shard
|
|
if err := r.db.View(func(tx *bolt.Tx) error {
|
|
tx.ForEach(func(name []byte, _ *bolt.Bucket) error {
|
|
key := string(name)
|
|
if !excludedBuckets[key] {
|
|
seriesSet[key] = true
|
|
}
|
|
return nil
|
|
})
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
r.tx, err = r.db.Begin(false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Create cursor for each field of each series.
|
|
for s := range seriesSet {
|
|
measurement := tsdb.MeasurementFromSeriesKey(s)
|
|
fields := r.fields[measurement]
|
|
if fields == nil {
|
|
r.stats.IncrFiltered()
|
|
continue
|
|
}
|
|
for _, f := range fields.Fields {
|
|
c := newCursor(r.tx, s, f.Name, r.codecs[measurement])
|
|
c.SeekTo(0)
|
|
r.cursors = append(r.cursors, c)
|
|
}
|
|
}
|
|
sort.Sort(cursors(r.cursors))
|
|
|
|
return nil
|
|
}
|
|
|
|
// Next returns whether any data remains to be read. It must be called before
|
|
// the next call to Read().
|
|
func (r *Reader) Next() bool {
|
|
r.valuePos = 0
|
|
OUTER:
|
|
for {
|
|
if r.currCursor >= len(r.cursors) {
|
|
// All cursors drained. No more data remains.
|
|
return false
|
|
}
|
|
|
|
cc := r.cursors[r.currCursor]
|
|
r.keyBuf = tsm1.SeriesFieldKey(cc.series, cc.field)
|
|
|
|
for {
|
|
k, v := cc.Next()
|
|
if k == -1 {
|
|
// Go to next cursor and try again.
|
|
r.currCursor++
|
|
if r.valuePos == 0 {
|
|
// The previous cursor had no data. Instead of returning
|
|
// just go immediately to the next cursor.
|
|
continue OUTER
|
|
}
|
|
// There is some data available. Indicate that it should be read.
|
|
return true
|
|
}
|
|
|
|
if f, ok := v.(float64); ok {
|
|
if math.IsInf(f, 0) {
|
|
r.stats.AddPointsRead(1)
|
|
r.stats.IncrInf()
|
|
continue
|
|
}
|
|
|
|
if math.IsNaN(f) {
|
|
r.stats.AddPointsRead(1)
|
|
r.stats.IncrNaN()
|
|
continue
|
|
}
|
|
}
|
|
|
|
r.values[r.valuePos] = tsm1.NewValue(k, v)
|
|
r.valuePos++
|
|
|
|
if r.valuePos >= len(r.values) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Read returns the next chunk of data in the shard, converted to tsm1 values. Data is
|
|
// emitted completely for every field, in every series, before the next field is processed.
|
|
// Data from Read() adheres to the requirements for writing to tsm1 shards
|
|
func (r *Reader) Read() (string, []tsm1.Value, error) {
|
|
return r.keyBuf, r.values[:r.valuePos], nil
|
|
}
|
|
|
|
// Close closes the reader.
|
|
func (r *Reader) Close() error {
|
|
r.tx.Rollback()
|
|
return r.db.Close()
|
|
}
|
|
|
|
// cursor provides ordered iteration across a series.
|
|
type cursor struct {
|
|
// Bolt cursor and readahead buffer.
|
|
cursor *bolt.Cursor
|
|
keyBuf int64
|
|
valBuf interface{}
|
|
|
|
series string
|
|
field string
|
|
dec *tsdb.FieldCodec
|
|
}
|
|
|
|
// Cursor returns an iterator for a key over a single field.
|
|
func newCursor(tx *bolt.Tx, series string, field string, dec *tsdb.FieldCodec) *cursor {
|
|
cur := &cursor{
|
|
keyBuf: -2,
|
|
series: series,
|
|
field: field,
|
|
dec: dec,
|
|
}
|
|
|
|
// Retrieve series bucket.
|
|
b := tx.Bucket([]byte(series))
|
|
if b != nil {
|
|
cur.cursor = b.Cursor()
|
|
}
|
|
|
|
return cur
|
|
}
|
|
|
|
// Seek moves the cursor to a position.
|
|
func (c *cursor) SeekTo(seek int64) {
|
|
var seekBytes [8]byte
|
|
binary.BigEndian.PutUint64(seekBytes[:], uint64(seek))
|
|
k, v := c.cursor.Seek(seekBytes[:])
|
|
c.keyBuf, c.valBuf = tsdb.DecodeKeyValue(c.field, c.dec, k, v)
|
|
}
|
|
|
|
// Next returns the next key/value pair from the cursor.
|
|
func (c *cursor) Next() (key int64, value interface{}) {
|
|
for {
|
|
k, v := func() (int64, interface{}) {
|
|
if c.keyBuf != -2 {
|
|
k, v := c.keyBuf, c.valBuf
|
|
c.keyBuf = -2
|
|
return k, v
|
|
}
|
|
|
|
k, v := c.cursor.Next()
|
|
if k == nil {
|
|
return -1, nil
|
|
}
|
|
return tsdb.DecodeKeyValue(c.field, c.dec, k, v)
|
|
}()
|
|
|
|
if k != -1 && v == nil {
|
|
// There is a point in the series at the next timestamp,
|
|
// but not for this cursor's field. Go to the next point.
|
|
continue
|
|
}
|
|
return k, v
|
|
}
|
|
}
|
|
|
|
// Sort b1 cursors in correct order for writing to TSM files.
|
|
|
|
type cursors []*cursor
|
|
|
|
func (a cursors) Len() int { return len(a) }
|
|
func (a cursors) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a cursors) Less(i, j int) bool {
|
|
if a[i].series == a[j].series {
|
|
return a[i].field < a[j].field
|
|
}
|
|
return a[i].series < a[j].series
|
|
}
|