influxdb/tsdb/engine/tsi1/series.go

456 lines
11 KiB
Go

package tsi1
import (
"bytes"
"encoding/binary"
"errors"
"io"
"math"
"sort"
"github.com/influxdata/influxdb/models"
)
// ErrSeriesOverflow is returned when too many series are added to a series writer.
var ErrSeriesOverflow = errors.New("series overflow")
// Series list field size constants.
const (
// Series list trailer field sizes.
TermListOffsetSize = 8
TermListSizeSize = 8
SeriesDataOffsetSize = 8
SeriesDataSizeSize = 8
SeriesListTrailerSize = TermListOffsetSize +
TermListSizeSize +
SeriesDataOffsetSize +
SeriesDataSizeSize
// Other field sizes
TermCountSize = 4
SeriesCountSize = 4
SeriesIDSize = 4
)
// Series flag constants.
const (
SeriesTombstoneFlag = 0x01
)
// SeriesList represents the section of the index which holds the term
// dictionary and a sorted list of series keys.
type SeriesList struct {
termData []byte
seriesData []byte
}
// SeriesOffset returns offset of the encoded series key.
// Returns 0 if the key does not exist in the series list.
func (l *SeriesList) SeriesOffset(key []byte) (offset uint32, deleted bool) {
offset = uint32(len(l.termData) + SeriesCountSize)
data := l.seriesData[SeriesCountSize:]
for i, n := uint32(0), l.SeriesCount(); i < n; i++ {
// Read series flag.
flag := data[0]
data = data[1:]
// Read series length.
ln, sz := binary.Uvarint(data)
data = data[sz:]
// Return offset if the series key matches.
if bytes.Equal(key, data[:ln]) {
deleted = (flag & SeriesTombstoneFlag) != 0
return offset, deleted
}
// Update offset & move data forward.
data = data[ln:]
offset += uint32(ln) + uint32(sz)
}
return 0, false
}
// EncodeSeries returns a dictionary-encoded series key.
func (l *SeriesList) EncodeSeries(name string, tags models.Tags) []byte {
// Build a buffer with the minimum space for the name, tag count, and tags.
buf := make([]byte, 2+len(tags))
return l.AppendEncodeSeries(buf[:0], name, tags)
}
// AppendEncodeSeries appends an encoded series value to dst and returns the new slice.
func (l *SeriesList) AppendEncodeSeries(dst []byte, name string, tags models.Tags) []byte {
var buf [binary.MaxVarintLen32]byte
// Append encoded name.
n := binary.PutUvarint(buf[:], uint64(l.EncodeTerm([]byte(name))))
dst = append(dst, buf[:n]...)
// Append encoded tag count.
n = binary.PutUvarint(buf[:], uint64(len(tags)))
dst = append(dst, buf[:n]...)
// Append tags.
for _, t := range tags {
n := binary.PutUvarint(buf[:], uint64(l.EncodeTerm(t.Key)))
dst = append(dst, buf[:n]...)
n = binary.PutUvarint(buf[:], uint64(l.EncodeTerm(t.Value)))
dst = append(dst, buf[:n]...)
}
return dst
}
// DecodeSeries decodes a dictionary encoded series into a name and tagset.
func (l *SeriesList) DecodeSeries(v []byte) (name string, tags models.Tags) {
// Read name.
offset, n := binary.Uvarint(v)
name, v = string(l.DecodeTerm(uint32(offset))), v[n:]
// Read tag count.
tagN, n := binary.Uvarint(v)
v = v[n:]
// Loop over tag key/values.
for i := 0; i < int(tagN); i++ {
// Read key.
offset, n := binary.Uvarint(v)
key, v := l.DecodeTerm(uint32(offset)), v[n:]
// Read value.
offset, n = binary.Uvarint(v)
value, v := l.DecodeTerm(uint32(offset)), v[n:]
// Add to tagset.
tags.Set(key, value)
}
return name, tags
}
// DecodeTerm returns the term at the given offset.
func (l *SeriesList) DecodeTerm(offset uint32) []byte {
buf := l.termData[offset:]
// Read length at offset.
i, n := binary.Uvarint(buf)
buf = buf[n:]
// Return term data.
return buf[:i]
}
// EncodeTerm returns the offset of v within data.
func (l *SeriesList) EncodeTerm(v []byte) uint32 {
offset := uint32(TermCountSize)
data := l.termData[offset:]
for i, n := uint32(0), l.TermCount(); i < n; i++ {
// Read term length.
ln, sz := binary.Uvarint(data)
data = data[sz:]
// Return offset if the term matches.
if bytes.Equal(v, data[:ln]) {
return offset
}
// Update offset & move data forward.
data = data[ln:]
offset += uint32(ln) + uint32(sz)
}
return 0
}
// TermCount returns the number of terms within the dictionary.
func (l *SeriesList) TermCount() uint32 {
return binary.BigEndian.Uint32(l.termData[:TermCountSize])
}
// SeriesCount returns the number of series.
func (l *SeriesList) SeriesCount() uint32 {
return binary.BigEndian.Uint32(l.seriesData[:SeriesCountSize])
}
// UnmarshalBinary unpacks data into the series list.
//
// If data is an mmap then it should stay open until the series list is no
// longer used because data access is performed directly from the byte slice.
func (l *SeriesList) UnmarshalBinary(data []byte) error {
t := ReadSeriesListTrailer(data)
// Slice term list data.
l.termData = data[t.TermList.Offset:]
l.termData = l.termData[:t.TermList.Size]
// Slice series data data.
l.seriesData = data[t.SeriesData.Offset:]
l.seriesData = l.seriesData[:t.SeriesData.Size]
return nil
}
// SeriesListWriter writes a SeriesDictionary block.
type SeriesListWriter struct {
terms map[string]int // term frequency
series []serie // series list
// Term list is available after writer has been written.
termList *TermList
}
// NewSeriesListWriter returns a new instance of SeriesListWriter.
func NewSeriesListWriter() *SeriesListWriter {
return &SeriesListWriter{
terms: make(map[string]int),
}
}
// Add adds a series to the writer's set.
// Returns an ErrSeriesOverflow if no more series can be held in the writer.
func (sw *SeriesListWriter) Add(name string, tags models.Tags) error {
return sw.append(name, tags, false)
}
// Delete marks a series as tombstoned.
func (sw *SeriesListWriter) Delete(name string, tags models.Tags) error {
return sw.append(name, tags, true)
}
func (sw *SeriesListWriter) append(name string, tags models.Tags, deleted bool) error {
// Ensure writer doesn't add too many series.
if len(sw.series) == math.MaxInt32 {
return ErrSeriesOverflow
}
// Increment term counts.
sw.terms[name]++
for _, t := range tags {
sw.terms[string(t.Key)]++
sw.terms[string(t.Value)]++
}
// Append series to list.
sw.series = append(sw.series, serie{name: name, tags: tags, deleted: deleted})
return nil
}
// WriteTo computes the dictionary encoding of the series and writes to w.
func (sw *SeriesListWriter) WriteTo(w io.Writer) (n int64, err error) {
var t SeriesListTrailer
terms := NewTermList(sw.terms)
// Write term dictionary.
t.TermList.Offset = n
nn, err := sw.writeTermListTo(w, terms)
n += nn
if err != nil {
return n, err
}
t.TermList.Size = n - t.TermList.Offset
// Write dictionary-encoded series list.
t.SeriesData.Offset = n
nn, err = sw.writeSeriesTo(w, terms, uint32(n))
n += nn
if err != nil {
return n, err
}
t.SeriesData.Size = n - t.SeriesData.Offset
// Write trailer.
if err := sw.writeTrailerTo(w, t, &n); err != nil {
return n, err
}
// Save term list for future encoding.
sw.termList = terms
return n, nil
}
// writeTermListTo writes the terms to w.
func (sw *SeriesListWriter) writeTermListTo(w io.Writer, terms *TermList) (n int64, err error) {
buf := make([]byte, binary.MaxVarintLen32)
// Write term count.
binary.BigEndian.PutUint32(buf[:4], uint32(terms.Len()))
nn, err := w.Write(buf[:4])
n += int64(nn)
if err != nil {
return n, err
}
// Write terms.
for i := range terms.a {
e := &terms.a[i]
// Ensure that we can reference the offset using a uint32.
if n > math.MaxUint32 {
return n, errors.New("series dictionary exceeded max size")
}
// Track starting offset of the term.
e.offset = uint32(n)
// Join varint(length) & term in buffer.
sz := binary.PutUvarint(buf, uint64(len(e.term)))
buf = append(buf[:sz], e.term...)
// Write buffer to writer.
nn, err := w.Write(buf)
n += int64(nn)
if err != nil {
return n, err
}
}
return n, nil
}
// writeSeriesTo writes dictionary-encoded series to w in sorted order.
func (sw *SeriesListWriter) writeSeriesTo(w io.Writer, terms *TermList, offset uint32) (n int64, err error) {
buf := make([]byte, binary.MaxVarintLen32+1)
// Ensure series are sorted.
sort.Sort(series(sw.series))
// Write series count.
binary.BigEndian.PutUint32(buf[:4], uint32(len(sw.series)))
nn, err := w.Write(buf[:4])
n += int64(nn)
if err != nil {
return n, err
}
// Write series.
var seriesBuf []byte
for i := range sw.series {
s := &sw.series[i]
// Ensure that we can reference the series using a uint32.
if int64(offset)+n > math.MaxUint32 {
return n, errors.New("series list exceeded max size")
}
// Track offset of the series.
s.offset = uint32(offset + uint32(n))
// Write encoded series to a separate buffer.
seriesBuf = terms.AppendEncodedSeries(seriesBuf[:0], s.name, s.tags)
// Join flag, varint(length), & dictionary-encoded series in buffer.
buf[0] = 0 // TODO(benbjohnson): series tombstone
sz := binary.PutUvarint(buf[1:], uint64(len(seriesBuf)))
buf = append(buf[:1+sz], seriesBuf...)
// Write buffer to writer.
nn, err := w.Write(buf)
n += int64(nn)
if err != nil {
return n, err
}
}
return n, nil
}
// writeTrailerTo writes offsets to the end of the series list.
func (sw *SeriesListWriter) writeTrailerTo(w io.Writer, t SeriesListTrailer, n *int64) error {
if err := writeUint64To(w, uint64(t.TermList.Offset), n); err != nil {
return err
} else if err := writeUint64To(w, uint64(t.TermList.Size), n); err != nil {
return err
}
if err := writeUint64To(w, uint64(t.SeriesData.Offset), n); err != nil {
return err
} else if err := writeUint64To(w, uint64(t.SeriesData.Size), n); err != nil {
return err
}
return nil
}
// Offset returns the series offset from the writer.
// Only valid after the series list has been written to a writer.
func (sw *SeriesListWriter) Offset(name string, tags models.Tags) uint32 {
// Find position of series.
i := sort.Search(len(sw.series), func(i int) bool {
s := &sw.series[i]
if s.name != name {
return s.name >= name
}
return models.CompareTags(s.tags, tags) != -1
})
// Ignore if it's not an exact match.
if i >= len(sw.series) {
return 0
} else if s := &sw.series[i]; s.name != name || !s.tags.Equal(tags) {
return 0
}
// Return offset & deleted flag of series.
return sw.series[i].offset
}
// ReadSeriesListTrailer returns the series list trailer from data.
func ReadSeriesListTrailer(data []byte) SeriesListTrailer {
var t SeriesListTrailer
// Slice trailer data.
buf := data[len(data)-SeriesListTrailerSize:]
// Read term list info.
t.TermList.Offset = int64(binary.BigEndian.Uint64(buf[0:TermListOffsetSize]))
buf = buf[TermListOffsetSize:]
t.TermList.Size = int64(binary.BigEndian.Uint64(buf[0:TermListSizeSize]))
buf = buf[TermListSizeSize:]
// Read series data info.
t.SeriesData.Offset = int64(binary.BigEndian.Uint64(buf[0:SeriesDataOffsetSize]))
buf = buf[SeriesDataOffsetSize:]
t.SeriesData.Size = int64(binary.BigEndian.Uint64(buf[0:SeriesDataSizeSize]))
buf = buf[SeriesDataSizeSize:]
return t
}
// SeriesListTrailer represents meta data written to the end of the series list.
type SeriesListTrailer struct {
TermList struct {
Offset int64
Size int64
}
SeriesData struct {
Offset int64
Size int64
}
}
type serie struct {
name string
tags models.Tags
deleted bool
offset uint32
}
type series []serie
func (a series) Len() int { return len(a) }
func (a series) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a series) Less(i, j int) bool {
if a[i].name != a[j].name {
return a[i].name < a[j].name
}
return models.CompareTags(a[i].tags, a[j].tags) == -1
}