influxdb/tsdb/series_segment.go

459 lines
12 KiB
Go

package tsdb
import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"regexp"
"strconv"
"github.com/influxdata/influxdb/v2/pkg/mmap"
)
const (
SeriesSegmentVersion = 1
SeriesSegmentMagic = "SSEG"
SeriesSegmentHeaderSize = 4 + 1 // magic + version
)
// Series entry constants.
const (
SeriesEntryFlagSize = 1
SeriesEntryHeaderSize = 1 + 8 // flag + id
SeriesEntryInsertFlag = 0x01
SeriesEntryTombstoneFlag = 0x02
)
var (
ErrInvalidSeriesSegment = errors.New("invalid series segment")
ErrInvalidSeriesSegmentVersion = errors.New("invalid series segment version")
ErrSeriesSegmentNotWritable = errors.New("series segment not writable")
)
// SeriesSegment represents a log of series entries.
type SeriesSegment struct {
id uint16
path string
data []byte // mmap file
file *os.File // write file handle
w *bufio.Writer // bufferred file handle
size uint32 // current file size
}
// NewSeriesSegment returns a new instance of SeriesSegment.
func NewSeriesSegment(id uint16, path string) *SeriesSegment {
return &SeriesSegment{
id: id,
path: path,
}
}
// CreateSeriesSegment generates an empty segment at path.
func CreateSeriesSegment(id uint16, path string) (*SeriesSegment, error) {
// Generate segment in temp location.
f, err := os.Create(path + ".initializing")
if err != nil {
return nil, err
}
defer f.Close()
// Write header to file and close.
hdr := NewSeriesSegmentHeader()
if _, err := hdr.WriteTo(f); err != nil {
return nil, err
} else if err := f.Truncate(int64(SeriesSegmentSize(id))); err != nil {
return nil, err
} else if err := f.Sync(); err != nil {
return nil, err
} else if err := f.Close(); err != nil {
return nil, err
}
// Swap with target path.
if err := os.Rename(f.Name(), path); err != nil {
return nil, err
}
// Open segment at new location.
segment := NewSeriesSegment(id, path)
if err := segment.Open(); err != nil {
return nil, err
}
return segment, nil
}
// Open memory maps the data file at the file's path.
func (s *SeriesSegment) Open() error {
if err := func() (err error) {
// Memory map file data.
if s.data, err = mmap.Map(s.path, int64(SeriesSegmentSize(s.id))); err != nil {
return err
}
// Read header.
hdr, err := ReadSeriesSegmentHeader(s.data)
if err != nil {
return err
} else if hdr.Version != SeriesSegmentVersion {
return ErrInvalidSeriesSegmentVersion
}
return nil
}(); err != nil {
s.Close()
return err
}
return nil
}
// Path returns the file path to the segment.
func (s *SeriesSegment) Path() string { return s.path }
// InitForWrite initializes a write handle for the segment.
// This is only used for the last segment in the series file.
func (s *SeriesSegment) InitForWrite() (err error) {
// Only calculate segment data size if writing.
for s.size = uint32(SeriesSegmentHeaderSize); s.size < uint32(len(s.data)); {
flag, _, _, sz := ReadSeriesEntry(s.data[s.size:])
if !IsValidSeriesEntryFlag(flag) {
break
}
s.size += uint32(sz)
}
// Open file handler for writing & seek to end of data.
if s.file, err = os.OpenFile(s.path, os.O_WRONLY|os.O_CREATE, 0666); err != nil {
return err
} else if _, err := s.file.Seek(int64(s.size), io.SeekStart); err != nil {
return err
}
s.w = bufio.NewWriterSize(s.file, 32*1024)
return nil
}
// Close unmaps the segment.
func (s *SeriesSegment) Close() (err error) {
if e := s.CloseForWrite(); e != nil && err == nil {
err = e
}
if s.data != nil {
if e := mmap.Unmap(s.data); e != nil && err == nil {
err = e
}
s.data = nil
}
return err
}
func (s *SeriesSegment) CloseForWrite() (err error) {
if s.w != nil {
if e := s.w.Flush(); e != nil && err == nil {
err = e
}
s.w = nil
}
if s.file != nil {
if e := s.file.Close(); e != nil && err == nil {
err = e
}
s.file = nil
}
return err
}
// Data returns the raw data.
func (s *SeriesSegment) Data() []byte { return s.data }
// ID returns the id the segment was initialized with.
func (s *SeriesSegment) ID() uint16 { return s.id }
// Size returns the size of the data in the segment.
// This is only populated once InitForWrite() is called.
func (s *SeriesSegment) Size() int64 { return int64(s.size) }
// Slice returns a byte slice starting at pos.
func (s *SeriesSegment) Slice(pos uint32) []byte { return s.data[pos:] }
// WriteLogEntry writes entry data into the segment.
// Returns the offset of the beginning of the entry.
func (s *SeriesSegment) WriteLogEntry(data []byte) (offset int64, err error) {
if !s.CanWrite(data) {
return 0, ErrSeriesSegmentNotWritable
}
offset = JoinSeriesOffset(s.id, s.size)
if _, err := s.w.Write(data); err != nil {
return 0, err
}
s.size += uint32(len(data))
return offset, nil
}
// CanWrite returns true if segment has space to write entry data.
func (s *SeriesSegment) CanWrite(data []byte) bool {
return s.w != nil && s.size+uint32(len(data)) <= SeriesSegmentSize(s.id)
}
// Flush flushes the buffer to disk.
func (s *SeriesSegment) Flush() error {
if s.w == nil {
return nil
}
if err := s.w.Flush(); err != nil {
return err
}
return s.file.Sync()
}
// AppendSeriesIDs appends all the segments ids to a slice. Returns the new slice.
func (s *SeriesSegment) AppendSeriesIDs(a []uint64) []uint64 {
s.ForEachEntry(func(flag uint8, id uint64, _ int64, _ []byte) error {
if flag == SeriesEntryInsertFlag {
a = append(a, id)
}
return nil
})
return a
}
// MaxSeriesID returns the highest series id in the segment.
func (s *SeriesSegment) MaxSeriesID() uint64 {
var max uint64
s.ForEachEntry(func(flag uint8, id uint64, _ int64, _ []byte) error {
if flag == SeriesEntryInsertFlag && id > max {
max = id
}
return nil
})
return max
}
// ForEachEntry executes fn for every entry in the segment.
func (s *SeriesSegment) ForEachEntry(fn func(flag uint8, id uint64, offset int64, key []byte) error) error {
for pos := uint32(SeriesSegmentHeaderSize); pos < uint32(len(s.data)); {
flag, id, key, sz := ReadSeriesEntry(s.data[pos:])
if !IsValidSeriesEntryFlag(flag) {
break
}
offset := JoinSeriesOffset(s.id, pos)
if err := fn(flag, id, offset, key); err != nil {
return err
}
pos += uint32(sz)
}
return nil
}
// Clone returns a copy of the segment. Excludes the write handler, if set.
func (s *SeriesSegment) Clone() *SeriesSegment {
return &SeriesSegment{
id: s.id,
path: s.path,
data: s.data,
size: s.size,
}
}
// CompactToPath rewrites the segment to a new file and removes tombstoned entries.
func (s *SeriesSegment) CompactToPath(path string, index *SeriesIndex) error {
dst, err := CreateSeriesSegment(s.id, path)
if err != nil {
return err
}
defer dst.Close()
if err = dst.InitForWrite(); err != nil {
return err
}
// Iterate through the segment and write any entries to a new segment
// that exist in the index.
var buf []byte
if err = s.ForEachEntry(func(flag uint8, id uint64, _ int64, key []byte) error {
if index.IsDeleted(id) {
return nil // series id has been deleted from index
} else if flag == SeriesEntryTombstoneFlag {
return fmt.Errorf("[series id %d]: tombstone entry but exists in index", id)
}
// copy entry over to new segment
buf = AppendSeriesEntry(buf[:0], flag, id, key)
if _, err := dst.WriteLogEntry(buf); err != nil {
return err
}
return err
}); err != nil {
return err
}
// Close the segment and truncate it to its maximum size.
size := dst.size
if err := dst.Close(); err != nil {
return err
} else if err := os.Truncate(dst.path, int64(size)); err != nil {
return err
}
return nil
}
// CloneSeriesSegments returns a copy of a slice of segments.
func CloneSeriesSegments(a []*SeriesSegment) []*SeriesSegment {
other := make([]*SeriesSegment, len(a))
for i := range a {
other[i] = a[i].Clone()
}
return other
}
// FindSegment returns a segment by id.
func FindSegment(a []*SeriesSegment, id uint16) *SeriesSegment {
for _, segment := range a {
if segment.id == id {
return segment
}
}
return nil
}
// ReadSeriesKeyFromSegments returns a series key from an offset within a set of segments.
func ReadSeriesKeyFromSegments(a []*SeriesSegment, offset int64) []byte {
segmentID, pos := SplitSeriesOffset(offset)
segment := FindSegment(a, segmentID)
if segment == nil {
return nil
}
buf := segment.Slice(pos)
key, _ := ReadSeriesKey(buf)
return key
}
// JoinSeriesOffset returns an offset that combines the 2-byte segmentID and 4-byte pos.
func JoinSeriesOffset(segmentID uint16, pos uint32) int64 {
return (int64(segmentID) << 32) | int64(pos)
}
// SplitSeriesOffset splits a offset into its 2-byte segmentID and 4-byte pos parts.
func SplitSeriesOffset(offset int64) (segmentID uint16, pos uint32) {
return uint16((offset >> 32) & 0xFFFF), uint32(offset & 0xFFFFFFFF)
}
// IsValidSeriesSegmentFilename returns true if filename is a 4-character lowercase hexadecimal number.
func IsValidSeriesSegmentFilename(filename string) bool {
return seriesSegmentFilenameRegex.MatchString(filename)
}
// ParseSeriesSegmentFilename returns the id represented by the hexadecimal filename.
func ParseSeriesSegmentFilename(filename string) (uint16, error) {
i, err := strconv.ParseUint(filename, 16, 32)
return uint16(i), err
}
var seriesSegmentFilenameRegex = regexp.MustCompile(`^[0-9a-f]{4}$`)
// SeriesSegmentSize returns the maximum size of the segment.
// The size goes up by powers of 2 starting from 4MB and reaching 256MB.
func SeriesSegmentSize(id uint16) uint32 {
const min = 22 // 4MB
const max = 28 // 256MB
shift := id + min
if shift >= max {
shift = max
}
return 1 << shift
}
// SeriesSegmentHeader represents the header of a series segment.
type SeriesSegmentHeader struct {
Version uint8
}
// NewSeriesSegmentHeader returns a new instance of SeriesSegmentHeader.
func NewSeriesSegmentHeader() SeriesSegmentHeader {
return SeriesSegmentHeader{Version: SeriesSegmentVersion}
}
// ReadSeriesSegmentHeader returns the header from data.
func ReadSeriesSegmentHeader(data []byte) (hdr SeriesSegmentHeader, err error) {
r := bytes.NewReader(data)
// Read magic number.
magic := make([]byte, len(SeriesSegmentMagic))
if _, err := io.ReadFull(r, magic); err != nil {
return hdr, err
} else if !bytes.Equal([]byte(SeriesSegmentMagic), magic) {
return hdr, ErrInvalidSeriesSegment
}
// Read version.
if err := binary.Read(r, binary.BigEndian, &hdr.Version); err != nil {
return hdr, err
}
return hdr, nil
}
// WriteTo writes the header to w.
func (hdr *SeriesSegmentHeader) WriteTo(w io.Writer) (n int64, err error) {
var buf bytes.Buffer
buf.WriteString(SeriesSegmentMagic)
binary.Write(&buf, binary.BigEndian, hdr.Version)
return buf.WriteTo(w)
}
func ReadSeriesEntry(data []byte) (flag uint8, id uint64, key []byte, sz int64) {
// If flag byte is zero then no more entries exist.
flag, data = uint8(data[0]), data[1:]
if !IsValidSeriesEntryFlag(flag) {
return 0, 0, nil, 1
}
id, data = binary.BigEndian.Uint64(data), data[8:]
switch flag {
case SeriesEntryInsertFlag:
key, _ = ReadSeriesKey(data)
}
return flag, id, key, int64(SeriesEntryHeaderSize + len(key))
}
func AppendSeriesEntry(dst []byte, flag uint8, id uint64, key []byte) []byte {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, id)
dst = append(dst, flag)
dst = append(dst, buf...)
switch flag {
case SeriesEntryInsertFlag:
dst = append(dst, key...)
case SeriesEntryTombstoneFlag:
default:
panic(fmt.Sprintf("unreachable: invalid flag: %d", flag))
}
return dst
}
// IsValidSeriesEntryFlag returns true if flag is valid.
func IsValidSeriesEntryFlag(flag byte) bool {
switch flag {
case SeriesEntryInsertFlag, SeriesEntryTombstoneFlag:
return true
default:
return false
}
}