Segment series file

pull/9265/head
Ben Johnson 2017-12-29 11:57:30 -07:00
parent 4ab1542cfc
commit 56980b0d24
No known key found for this signature in database
GPG Key ID: 81741CD251883081
14 changed files with 1350 additions and 692 deletions

View File

@ -1684,19 +1684,12 @@ func NewEngine(index string) (*Engine, error) {
}
// Setup series file.
f, err := ioutil.TempFile(dbPath, "series")
seriesPath, err := ioutil.TempDir(dbPath, "series")
if err != nil {
return nil, err
}
f.Close()
sfile := tsdb.NewSeriesFile(f.Name())
// If we're running on a 32-bit system then reduce the SeriesFile size, so we
// can address is in memory.
if runtime.GOARCH == "386" {
sfile.MaxSize = 1 << 27 // 128MB
}
sfile := tsdb.NewSeriesFile(seriesPath)
if err = sfile.Open(); err != nil {
return nil, err
}
@ -1729,19 +1722,11 @@ type SeriesFile struct {
// NewSeriesFile returns a new instance of SeriesFile with a temporary file path.
func NewSeriesFile() *SeriesFile {
file, err := ioutil.TempFile("", "tsdb-series-file-")
dir, err := ioutil.TempDir("", "tsdb-series-file-")
if err != nil {
panic(err)
}
file.Close()
s := &SeriesFile{SeriesFile: tsdb.NewSeriesFile(file.Name())}
// If we're running on a 32-bit system then reduce the SeriesFile size, so we
// can address is in memory.
if runtime.GOARCH == "386" {
s.SeriesFile.MaxSize = 1 << 27 // 128MB
}
return s
return &SeriesFile{SeriesFile: tsdb.NewSeriesFile(dir)}
}
// MustOpenSeriesFile returns a new, open instance of SeriesFile. Panic on error.

View File

@ -85,10 +85,10 @@ func TestFileSet_SeriesIDIterator(t *testing.T) {
tagset string
}{
{`cpu`, `[{region east}]`},
{`cpu`, `[{region north}]`},
{`cpu`, `[{region west}]`},
{`disk`, `[]`},
{`mem`, `[{region east}]`},
{`disk`, `[]`},
{`cpu`, `[{region north}]`},
}
for _, expected := range allexpected {

View File

@ -6,7 +6,6 @@ import (
"os"
"path/filepath"
"reflect"
"runtime"
"testing"
"github.com/influxdata/influxdb/models"
@ -290,19 +289,11 @@ type SeriesFile struct {
// NewSeriesFile returns a new instance of SeriesFile with a temporary file path.
func NewSeriesFile() *SeriesFile {
file, err := ioutil.TempFile("", "tsdb-series-file-")
dir, err := ioutil.TempDir("", "tsdb-series-file-")
if err != nil {
panic(err)
}
file.Close()
s := &SeriesFile{SeriesFile: tsdb.NewSeriesFile(file.Name())}
// If we're running on a 32-bit system then reduce the SeriesFile size, so we
// can address is in memory.
if runtime.GOARCH == "386" {
s.SeriesFile.MaxSize = 1 << 27 // 128MB
}
return s
return &SeriesFile{SeriesFile: tsdb.NewSeriesFile(dir)}
}
// MustOpenSeriesFile returns a new, open instance of SeriesFile. Panic on error.
@ -316,6 +307,6 @@ func MustOpenSeriesFile() *SeriesFile {
// Close closes the log file and removes it from disk.
func (f *SeriesFile) Close() error {
defer os.Remove(f.Path())
defer os.RemoveAll(f.Path())
return f.SeriesFile.Close()
}

View File

@ -6,7 +6,6 @@ import (
"os"
"path/filepath"
"reflect"
"runtime"
"testing"
"github.com/influxdata/influxdb/internal"
@ -147,19 +146,12 @@ func MustNewIndex(index string) *Index {
panic(err)
}
file, err := ioutil.TempFile(rootPath, "series")
seriesPath, err := ioutil.TempDir(rootPath, "series")
if err != nil {
panic(err)
}
file.Close()
sfile := tsdb.NewSeriesFile(file.Name())
// If we're running on a 32-bit system then reduce the SeriesFile size, so we
// can address is in memory.
if runtime.GOARCH == "386" {
sfile.MaxSize = 1 << 27 // 128MB
}
sfile := tsdb.NewSeriesFile(seriesPath)
if err := sfile.Open(); err != nil {
panic(err)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +0,0 @@
package tsdb
// DefaultMaxSeriesFileSize is the maximum series file size. Assuming that each
// series key takes, for example, 150 bytes, the limit would support ~900K series.
const DefaultMaxSeriesFileSize = 128 * (1 << 20) // 128MB

View File

@ -1,5 +0,0 @@
package tsdb
// DefaultMaxSeriesFileSize is the maximum series file size. Assuming that each
// series key takes, for example, 150 bytes, the limit would support ~229M series.
const DefaultMaxSeriesFileSize = 32 * (1 << 30) // 32GB

View File

@ -4,7 +4,6 @@ import (
"fmt"
"io/ioutil"
"os"
"runtime"
"testing"
"github.com/influxdata/influxdb/models"
@ -65,25 +64,15 @@ func TestSeriesFileCompactor(t *testing.T) {
t.Fatalf("unexpected series count: %d", n)
}
// Compact to new file.
compactionPath := sfile.Path() + ".compacting"
defer os.Remove(compactionPath)
compactor := tsdb.NewSeriesFileCompactor(sfile.SeriesFile)
if err := compactor.CompactTo(compactionPath); err != nil {
// Compact in-place.
compactor := tsdb.NewSeriesFileCompactor()
if err := compactor.Compact(sfile.SeriesFile); err != nil {
t.Fatal(err)
}
// Open new series file.
other := tsdb.NewSeriesFile(compactionPath)
if err := other.Open(); err != nil {
t.Fatal(err)
}
defer other.Close()
// Verify all series exist.
for i := range names {
if seriesID := other.SeriesID(names[i], tagsSlice[i], nil); seriesID == 0 {
if seriesID := sfile.SeriesID(names[i], tagsSlice[i], nil); seriesID == 0 {
t.Fatalf("series does not exist: %s,%s", names[i], tagsSlice[i].String())
}
}
@ -103,19 +92,11 @@ type SeriesFile struct {
// NewSeriesFile returns a new instance of SeriesFile with a temporary file path.
func NewSeriesFile() *SeriesFile {
file, err := ioutil.TempFile("", "tsdb-series-file-")
dir, err := ioutil.TempDir("", "tsdb-series-file-")
if err != nil {
panic(err)
}
file.Close()
s := &SeriesFile{SeriesFile: tsdb.NewSeriesFile(file.Name())}
// If we're running on a 32-bit system then reduce the SeriesFile size, so we
// can address is in memory.
if runtime.GOARCH == "386" {
s.SeriesFile.MaxSize = 1 << 27 // 128MB
}
return s
return &SeriesFile{SeriesFile: tsdb.NewSeriesFile(dir)}
}
// MustOpenSeriesFile returns a new, open instance of SeriesFile. Panic on error.
@ -129,6 +110,6 @@ func MustOpenSeriesFile() *SeriesFile {
// Close closes the log file and removes it from disk.
func (f *SeriesFile) Close() error {
defer os.Remove(f.Path())
defer os.RemoveAll(f.Path())
return f.SeriesFile.Close()
}

346
tsdb/series_index.go Normal file
View File

@ -0,0 +1,346 @@
package tsdb
import (
"bytes"
"encoding/binary"
"errors"
"io"
"os"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/mmap"
"github.com/influxdata/influxdb/pkg/rhh"
)
const (
SeriesIndexVersion = 1
SeriesIndexMagic = "SIDX"
)
const (
SeriesIndexElemSize = 16 // offset + id
SeriesIndexLoadFactor = 90 // rhh load factor
SeriesIndexHeaderSize = 0 +
4 + 1 + // magic + version
8 + 8 + // max series + max offset
8 + 8 + // count + capacity
8 + 8 + // key/id map offset & size
8 + 8 + // id/offset map offset & size
0
)
var ErrInvalidSeriesIndex = errors.New("invalid series index")
// SeriesIndex represents an index of key-to-id & id-to-offset mappings.
type SeriesIndex struct {
path string
count uint64
capacity int64
mask int64
maxSeriesID uint64
maxOffset int64
data []byte // mmap data
keyIDData []byte // key/id mmap data
idOffsetData []byte // id/offset mmap data
// In-memory data since rebuild.
keyIDMap *rhh.HashMap
idOffsetMap map[uint64]int64
tombstones map[uint64]struct{}
}
func NewSeriesIndex(path string) *SeriesIndex {
return &SeriesIndex{
path: path,
}
}
// Open memory-maps the index file.
func (idx *SeriesIndex) Open() (err error) {
// Map data file, if it exists.
if err := func() error {
if _, err := os.Stat(idx.path); err != nil && !os.IsNotExist(err) {
return err
} else if err == nil {
if idx.data, err = mmap.Map(idx.path, 0); err != nil {
return err
}
hdr, err := ReadSeriesIndexHeader(idx.data)
if err != nil {
return err
}
idx.count, idx.capacity, idx.mask = hdr.Count, hdr.Capacity, hdr.Capacity-1
idx.maxSeriesID, idx.maxOffset = hdr.MaxSeriesID, hdr.MaxOffset
idx.keyIDData = idx.data[hdr.KeyIDMap.Offset : hdr.KeyIDMap.Offset+hdr.KeyIDMap.Size]
idx.idOffsetData = idx.data[hdr.IDOffsetMap.Offset : hdr.IDOffsetMap.Offset+hdr.IDOffsetMap.Size]
}
return nil
}(); err != nil {
idx.Close()
return err
}
idx.keyIDMap = rhh.NewHashMap(rhh.DefaultOptions)
idx.idOffsetMap = make(map[uint64]int64)
idx.tombstones = make(map[uint64]struct{})
return nil
}
// Close unmaps the index file.
func (idx *SeriesIndex) Close() (err error) {
if idx.data != nil {
err = mmap.Unmap(idx.data)
}
idx.keyIDData = nil
idx.idOffsetData = nil
idx.keyIDMap = nil
idx.idOffsetMap = nil
idx.tombstones = nil
return err
}
// Recover rebuilds the in-memory index for all new entries.
func (idx *SeriesIndex) Recover(segments []*SeriesSegment) error {
// Allocate new in-memory maps.
idx.keyIDMap = rhh.NewHashMap(rhh.DefaultOptions)
idx.idOffsetMap = make(map[uint64]int64)
idx.tombstones = make(map[uint64]struct{})
// Process all entries since the maximum offset in the on-disk index.
minSegmentID, _ := SplitSeriesOffset(idx.maxOffset)
for _, segment := range segments {
if segment.ID() < minSegmentID {
continue
}
if err := segment.ForEachEntry(func(flag uint8, id uint64, offset int64, key []byte) error {
if offset <= idx.maxOffset {
return nil
}
idx.execEntry(flag, id, offset, key)
return nil
}); err != nil {
return err
}
}
return nil
}
// Count returns the number of series in the index.
func (idx *SeriesIndex) Count() uint64 {
return idx.count + uint64(len(idx.idOffsetMap))
}
func (idx *SeriesIndex) Insert(key []byte, id uint64, offset int64) {
idx.execEntry(SeriesEntryInsertFlag, id, offset, key)
}
// Delete marks the series id as deleted.
func (idx *SeriesIndex) Delete(id uint64) {
idx.execEntry(SeriesEntryTombstoneFlag, id, 0, nil)
}
// IsDeleted returns true if series id has been deleted.
func (idx *SeriesIndex) IsDeleted(id uint64) bool {
_, ok := idx.tombstones[id]
return ok
}
func (idx *SeriesIndex) execEntry(flag uint8, id uint64, offset int64, key []byte) {
switch flag {
case SeriesEntryInsertFlag:
idx.keyIDMap.Put(key, id)
idx.idOffsetMap[id] = offset
case SeriesEntryTombstoneFlag:
idx.tombstones[id] = struct{}{}
default:
panic("unreachable")
}
}
func (idx *SeriesIndex) FindIDBySeriesKey(segments []*SeriesSegment, key []byte) uint64 {
if v := idx.keyIDMap.Get(key); v != nil {
if id, _ := v.(uint64); id != 0 {
return id
}
}
if len(idx.data) == 0 {
return 0
}
hash := rhh.HashKey(key)
for d, pos := int64(0), hash&idx.mask; ; d, pos = d+1, (pos+1)&idx.mask {
elem := idx.keyIDData[(pos * SeriesIndexElemSize):]
elemOffset := int64(binary.BigEndian.Uint64(elem[:8]))
if elemOffset == 0 {
return 0
}
elemKey := ReadSeriesKeyFromSegments(segments, elemOffset+SeriesEntryHeaderSize)
elemHash := rhh.HashKey(elemKey)
if d > rhh.Dist(elemHash, pos, idx.capacity) {
return 0
} else if elemHash == hash && bytes.Equal(elemKey, key) {
return binary.BigEndian.Uint64(elem[8:])
}
}
}
func (idx *SeriesIndex) FindIDByNameTags(segments []*SeriesSegment, name []byte, tags models.Tags, buf []byte) uint64 {
id := idx.FindIDBySeriesKey(segments, AppendSeriesKey(buf[:0], name, tags))
if _, ok := idx.tombstones[id]; ok {
return 0
}
return id
}
func (idx *SeriesIndex) FindIDListByNameTags(segments []*SeriesSegment, names [][]byte, tagsSlice []models.Tags, buf []byte) (ids []uint64, ok bool) {
ids, ok = make([]uint64, len(names)), true
for i := range names {
id := idx.FindIDByNameTags(segments, names[i], tagsSlice[i], buf)
if id == 0 {
ok = false
continue
}
ids[i] = id
}
return ids, ok
}
func (idx *SeriesIndex) FindOffsetByID(id uint64) int64 {
if offset := idx.idOffsetMap[id]; offset != 0 {
return offset
} else if len(idx.data) == 0 {
return 0
}
hash := rhh.HashUint64(id)
for d, pos := int64(0), hash&idx.mask; ; d, pos = d+1, (pos+1)&idx.mask {
elem := idx.idOffsetData[(pos * SeriesIndexElemSize):]
elemID := binary.BigEndian.Uint64(elem[:8])
if elemID == id {
return int64(binary.BigEndian.Uint64(elem[8:]))
} else if elemID == 0 || d > rhh.Dist(rhh.HashUint64(elemID), pos, idx.capacity) {
return 0
}
}
}
// Clone returns a copy of idx for use during compaction. In-memory maps are not cloned.
func (idx *SeriesIndex) Clone() *SeriesIndex {
tombstones := make(map[uint64]struct{}, len(idx.tombstones))
for id := range idx.tombstones {
tombstones[id] = struct{}{}
}
return &SeriesIndex{
path: idx.path,
count: idx.count,
capacity: idx.capacity,
mask: idx.mask,
data: idx.data,
keyIDData: idx.keyIDData,
idOffsetData: idx.idOffsetData,
tombstones: tombstones,
}
}
// SeriesIndexHeader represents the header of a series index.
type SeriesIndexHeader struct {
Version uint8
MaxSeriesID uint64
MaxOffset int64
Count uint64
Capacity int64
KeyIDMap struct {
Offset int64
Size int64
}
IDOffsetMap struct {
Offset int64
Size int64
}
}
// NewSeriesIndexHeader returns a new instance of SeriesIndexHeader.
func NewSeriesIndexHeader() SeriesIndexHeader {
return SeriesIndexHeader{Version: SeriesIndexVersion}
}
// ReadSeriesIndexHeader returns the header from data.
func ReadSeriesIndexHeader(data []byte) (hdr SeriesIndexHeader, err error) {
r := bytes.NewReader(data)
// Read magic number.
magic := make([]byte, len(SeriesIndexMagic))
if _, err := io.ReadFull(r, magic); err != nil {
return hdr, err
} else if !bytes.Equal([]byte(SeriesIndexMagic), magic) {
return hdr, ErrInvalidSeriesIndex
}
// Read version.
if err := binary.Read(r, binary.BigEndian, &hdr.Version); err != nil {
return hdr, err
}
// Read max offset.
if err := binary.Read(r, binary.BigEndian, &hdr.MaxSeriesID); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.MaxOffset); err != nil {
return hdr, err
}
// Read count & capacity.
if err := binary.Read(r, binary.BigEndian, &hdr.Count); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.Capacity); err != nil {
return hdr, err
}
// Read key/id map position.
if err := binary.Read(r, binary.BigEndian, &hdr.KeyIDMap.Offset); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.KeyIDMap.Size); err != nil {
return hdr, err
}
// Read offset/id map position.
if err := binary.Read(r, binary.BigEndian, &hdr.IDOffsetMap.Offset); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.IDOffsetMap.Size); err != nil {
return hdr, err
}
return hdr, nil
}
// WriteTo writes the header to w.
func (hdr *SeriesIndexHeader) WriteTo(w io.Writer) (n int64, err error) {
var buf bytes.Buffer
buf.WriteString(SeriesIndexMagic)
binary.Write(&buf, binary.BigEndian, hdr.Version)
binary.Write(&buf, binary.BigEndian, hdr.MaxSeriesID)
binary.Write(&buf, binary.BigEndian, hdr.MaxOffset)
binary.Write(&buf, binary.BigEndian, hdr.Count)
binary.Write(&buf, binary.BigEndian, hdr.Capacity)
binary.Write(&buf, binary.BigEndian, hdr.KeyIDMap.Offset)
binary.Write(&buf, binary.BigEndian, hdr.KeyIDMap.Size)
binary.Write(&buf, binary.BigEndian, hdr.IDOffsetMap.Offset)
binary.Write(&buf, binary.BigEndian, hdr.IDOffsetMap.Size)
return buf.WriteTo(w)
}

132
tsdb/series_index_test.go Normal file
View File

@ -0,0 +1,132 @@
package tsdb_test
import (
"bytes"
"path/filepath"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/tsdb"
)
func TestSeriesIndex_Count(t *testing.T) {
dir, cleanup := MustTempDir()
defer cleanup()
idx := tsdb.NewSeriesIndex(filepath.Join(dir, "index"))
if err := idx.Open(); err != nil {
t.Fatal(err)
}
defer idx.Close()
key0 := tsdb.AppendSeriesKey(nil, []byte("m0"), nil)
idx.Insert(key0, 1, 10)
key1 := tsdb.AppendSeriesKey(nil, []byte("m1"), nil)
idx.Insert(key1, 2, 20)
if n := idx.Count(); n != 2 {
t.Fatalf("unexpected count: %d", n)
}
}
func TestSeriesIndex_Delete(t *testing.T) {
dir, cleanup := MustTempDir()
defer cleanup()
idx := tsdb.NewSeriesIndex(filepath.Join(dir, "index"))
if err := idx.Open(); err != nil {
t.Fatal(err)
}
defer idx.Close()
key0 := tsdb.AppendSeriesKey(nil, []byte("m0"), nil)
idx.Insert(key0, 1, 10)
key1 := tsdb.AppendSeriesKey(nil, []byte("m1"), nil)
idx.Insert(key1, 2, 20)
idx.Delete(1)
if !idx.IsDeleted(1) {
t.Fatal("expected deletion")
} else if idx.IsDeleted(2) {
t.Fatal("expected series to exist")
}
}
func TestSeriesIndex_FindIDBySeriesKey(t *testing.T) {
dir, cleanup := MustTempDir()
defer cleanup()
idx := tsdb.NewSeriesIndex(filepath.Join(dir, "index"))
if err := idx.Open(); err != nil {
t.Fatal(err)
}
defer idx.Close()
key0 := tsdb.AppendSeriesKey(nil, []byte("m0"), nil)
idx.Insert(key0, 1, 10)
key1 := tsdb.AppendSeriesKey(nil, []byte("m1"), nil)
idx.Insert(key1, 2, 20)
badKey := tsdb.AppendSeriesKey(nil, []byte("not_found"), nil)
if id := idx.FindIDBySeriesKey(nil, key0); id != 1 {
t.Fatalf("unexpected id(0): %d", id)
} else if id := idx.FindIDBySeriesKey(nil, key1); id != 2 {
t.Fatalf("unexpected id(1): %d", id)
} else if id := idx.FindIDBySeriesKey(nil, badKey); id != 0 {
t.Fatalf("unexpected id(2): %d", id)
}
if id := idx.FindIDByNameTags(nil, []byte("m0"), nil, nil); id != 1 {
t.Fatalf("unexpected id(0): %d", id)
} else if id := idx.FindIDByNameTags(nil, []byte("m1"), nil, nil); id != 2 {
t.Fatalf("unexpected id(1): %d", id)
} else if id := idx.FindIDByNameTags(nil, []byte("not_found"), nil, nil); id != 0 {
t.Fatalf("unexpected id(2): %d", id)
}
}
func TestSeriesIndex_FindOffsetByID(t *testing.T) {
dir, cleanup := MustTempDir()
defer cleanup()
idx := tsdb.NewSeriesIndex(filepath.Join(dir, "index"))
if err := idx.Open(); err != nil {
t.Fatal(err)
}
defer idx.Close()
idx.Insert(tsdb.AppendSeriesKey(nil, []byte("m0"), nil), 1, 10)
idx.Insert(tsdb.AppendSeriesKey(nil, []byte("m1"), nil), 2, 20)
if offset := idx.FindOffsetByID(1); offset != 10 {
t.Fatalf("unexpected offset(0): %d", offset)
} else if offset := idx.FindOffsetByID(2); offset != 20 {
t.Fatalf("unexpected offset(1): %d", offset)
} else if offset := idx.FindOffsetByID(3); offset != 0 {
t.Fatalf("unexpected offset(2): %d", offset)
}
}
func TestSeriesIndexHeader(t *testing.T) {
// Verify header initializes correctly.
hdr := tsdb.NewSeriesIndexHeader()
if hdr.Version != tsdb.SeriesIndexVersion {
t.Fatalf("unexpected version: %d", hdr.Version)
}
hdr.MaxSeriesID = 10
hdr.MaxOffset = 20
hdr.Count = 30
hdr.Capacity = 40
hdr.KeyIDMap.Offset, hdr.KeyIDMap.Size = 50, 60
hdr.IDOffsetMap.Offset, hdr.IDOffsetMap.Size = 70, 80
// Marshal/unmarshal.
var buf bytes.Buffer
if _, err := hdr.WriteTo(&buf); err != nil {
t.Fatal(err)
} else if other, err := tsdb.ReadSeriesIndexHeader(buf.Bytes()); err != nil {
t.Fatal(err)
} else if diff := cmp.Diff(hdr, other); diff != "" {
t.Fatal(diff)
}
}

396
tsdb/series_segment.go Normal file
View File

@ -0,0 +1,396 @@
package tsdb
import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"regexp"
"strconv"
"github.com/influxdata/influxdb/pkg/mmap"
)
const (
SeriesSegmentVersion = 1
SeriesSegmentMagic = "SSEG"
SeriesSegmentHeaderSize = 4 + 1 // magic + version
)
// Series entry constants.
const (
SeriesEntryFlagSize = 1
SeriesEntryHeaderSize = 1 + 8 // flag + id
SeriesEntryInsertFlag = 0x01
SeriesEntryTombstoneFlag = 0x02
)
var (
ErrInvalidSeriesSegment = errors.New("invalid series segment")
ErrInvalidSeriesSegmentVersion = errors.New("invalid series segment version")
ErrSeriesSegmentNotWritable = errors.New("series segment not writable")
)
// SeriesSegment represents a log of series entries.
type SeriesSegment struct {
id uint16
path string
data []byte // mmap file
file *os.File // write file handle
w *bufio.Writer // bufferred file handle
size uint32 // current file size
}
// NewSeriesSegment returns a new instance of SeriesSegment.
func NewSeriesSegment(id uint16, path string) *SeriesSegment {
return &SeriesSegment{
id: id,
path: path,
}
}
// CreateSeriesSegment generates an empty segment at path.
func CreateSeriesSegment(id uint16, path string) (*SeriesSegment, error) {
// Generate segment in temp location.
f, err := ioutil.TempFile("", "series-segment-")
if err != nil {
return nil, err
}
defer f.Close()
// Write header to file and close.
hdr := NewSeriesSegmentHeader()
if _, err := hdr.WriteTo(f); err != nil {
return nil, err
} else if err := f.Truncate(int64(SeriesSegmentSize(id))); err != nil {
return nil, err
} else if err := f.Close(); err != nil {
return nil, err
}
// Swap with target path.
if err := os.Rename(f.Name(), path); err != nil {
return nil, err
}
// Open segment at new location.
segment := NewSeriesSegment(id, path)
if err := segment.Open(); err != nil {
return nil, err
}
return segment, nil
}
// Open memory maps the data file at the file's path.
func (s *SeriesSegment) Open() error {
if err := func() (err error) {
// Memory map file data.
if s.data, err = mmap.Map(s.path, int64(SeriesSegmentSize(s.id))); err != nil {
return err
}
// Read header.
hdr, err := ReadSeriesSegmentHeader(s.data)
if err != nil {
return err
} else if hdr.Version != SeriesSegmentVersion {
return ErrInvalidSeriesSegmentVersion
}
return nil
}(); err != nil {
s.Close()
return err
}
return nil
}
// InitForWrite initializes a write handle for the segment.
// This is only used for the last segment in the series file.
func (s *SeriesSegment) InitForWrite() (err error) {
// Only calculcate segment data size if writing.
for s.size = uint32(SeriesSegmentHeaderSize); s.size < uint32(len(s.data)); {
flag, _, _, sz := ReadSeriesEntry(s.data[s.size:])
if flag == 0 {
break
}
s.size += uint32(sz)
}
// Open file handler for writing & seek to end of data.
if s.file, err = os.OpenFile(s.path, os.O_WRONLY|os.O_CREATE, 0666); err != nil {
return err
} else if _, err := s.file.Seek(int64(s.size), os.SEEK_SET); err != nil {
return err
}
s.w = bufio.NewWriter(s.file)
return nil
}
// Close unmaps the segment.
func (s *SeriesSegment) Close() (err error) {
if e := s.CloseForWrite(); e != nil && err == nil {
err = e
}
if s.data != nil {
if e := mmap.Unmap(s.data); e != nil && err == nil {
err = e
}
s.data = nil
}
return err
}
func (s *SeriesSegment) CloseForWrite() (err error) {
if s.w != nil {
if e := s.w.Flush(); e != nil && err == nil {
err = e
}
s.w = nil
}
if s.file != nil {
if e := s.file.Close(); e != nil && err == nil {
err = e
}
s.file = nil
}
return err
}
// ID returns the id the segment was initialized with.
func (s *SeriesSegment) ID() uint16 { return s.id }
// Size returns the size of the data in the segment.
// This is only populated once InitForWrite() is called.
func (s *SeriesSegment) Size() int64 { return int64(s.size) }
// Slice returns a byte slice starting at pos.
func (s *SeriesSegment) Slice(pos uint32) []byte { return s.data[pos:] }
// WriteLogEntry writes entry data into the segment.
// Returns the offset of the beginning of the entry.
func (s *SeriesSegment) WriteLogEntry(data []byte) (offset int64, err error) {
if !s.CanWrite(data) {
return 0, ErrSeriesSegmentNotWritable
}
offset = JoinSeriesOffset(s.id, s.size)
if _, err := s.w.Write(data); err != nil {
return 0, err
}
s.size += uint32(len(data))
return offset, nil
}
// CanWrite returns true if segment has space to write entry data.
func (s *SeriesSegment) CanWrite(data []byte) bool {
return s.w != nil && s.size+uint32(len(data)) <= SeriesSegmentSize(s.id)
}
// Flush flushes the buffer to disk.
func (s *SeriesSegment) Flush() error {
if s.w == nil {
return nil
}
return s.w.Flush()
}
// AppendSeriesIDs appends all the segments ids to a slice. Returns the new slice.
func (s *SeriesSegment) AppendSeriesIDs(a []uint64) []uint64 {
s.ForEachEntry(func(flag uint8, id uint64, _ int64, _ []byte) error {
if flag == SeriesEntryInsertFlag {
a = append(a, id)
}
return nil
})
return a
}
// MaxSeriesID returns the highest series id in the segment.
func (s *SeriesSegment) MaxSeriesID() uint64 {
var max uint64
s.ForEachEntry(func(flag uint8, id uint64, _ int64, _ []byte) error {
if flag == SeriesEntryInsertFlag && id > max {
max = id
}
return nil
})
return max
}
// ForEachEntry executes fn for every entry in the segment.
func (s *SeriesSegment) ForEachEntry(fn func(flag uint8, id uint64, offset int64, key []byte) error) error {
for pos := uint32(SeriesSegmentHeaderSize); pos < s.size; {
flag, id, key, sz := ReadSeriesEntry(s.data[pos:])
if flag == 0 {
break
}
offset := JoinSeriesOffset(s.id, pos)
if err := fn(flag, id, offset, key); err != nil {
return err
}
pos += uint32(sz)
}
return nil
}
// Clone returns a copy of the segment. Excludes the write handler, if set.
func (s *SeriesSegment) Clone() *SeriesSegment {
return &SeriesSegment{
id: s.id,
path: s.path,
data: s.data,
size: s.size,
}
}
// CloneSeriesSegments returns a copy of a slice of segments.
func CloneSeriesSegments(a []*SeriesSegment) []*SeriesSegment {
other := make([]*SeriesSegment, len(a))
for i := range a {
other[i] = a[i].Clone()
}
return other
}
// FindSegment returns a segment by id.
func FindSegment(a []*SeriesSegment, id uint16) *SeriesSegment {
for _, segment := range a {
if segment.id == id {
return segment
}
}
return nil
}
// ReadSeriesKeyFromSegments returns a series key from an offset within a set of segments.
func ReadSeriesKeyFromSegments(a []*SeriesSegment, offset int64) []byte {
segmentID, pos := SplitSeriesOffset(offset)
segment := FindSegment(a, segmentID)
if segment == nil {
return nil
}
buf := segment.Slice(pos)
key, _ := ReadSeriesKey(buf)
return key
}
// JoinSeriesOffset returns an offset that combines the 2-byte segmentID and 4-byte pos.
func JoinSeriesOffset(segmentID uint16, pos uint32) int64 {
return (int64(segmentID) << 32) | int64(pos)
}
// SplitSeriesOffset splits a offset into its 2-byte segmentID and 4-byte pos parts.
func SplitSeriesOffset(offset int64) (segmentID uint16, pos uint32) {
return uint16((offset >> 32) & 0xFFFF), uint32(offset & 0xFFFFFFFF)
}
// IsValidSeriesSegmentFilename returns true if filename is a 4-character lowercase hexidecimal number.
func IsValidSeriesSegmentFilename(filename string) bool {
return seriesSegmentFilenameRegex.MatchString(filename)
}
// ParseSeriesSegmentFilename returns the id represented by the hexidecimal filename.
func ParseSeriesSegmentFilename(filename string) uint16 {
i, _ := strconv.ParseUint(filename, 16, 32)
return uint16(i)
}
var seriesSegmentFilenameRegex = regexp.MustCompile(`^[0-9a-f]{4}$`)
// SeriesSegmentSize returns the maximum size of the segment.
// The size goes up by powers of 2 starting from 4MB and reaching 256MB.
func SeriesSegmentSize(id uint16) uint32 {
const min = 22 // 4MB
const max = 28 // 256MB
shift := id + min
if shift >= max {
shift = max
}
return 1 << shift
}
// SeriesSegmentHeader represents the header of a series segment.
type SeriesSegmentHeader struct {
Version uint8
}
// NewSeriesSegmentHeader returns a new instance of SeriesSegmentHeader.
func NewSeriesSegmentHeader() SeriesSegmentHeader {
return SeriesSegmentHeader{Version: SeriesSegmentVersion}
}
// ReadSeriesSegmentHeader returns the header from data.
func ReadSeriesSegmentHeader(data []byte) (hdr SeriesSegmentHeader, err error) {
r := bytes.NewReader(data)
// Read magic number.
magic := make([]byte, len(SeriesSegmentMagic))
if _, err := io.ReadFull(r, magic); err != nil {
return hdr, err
} else if !bytes.Equal([]byte(SeriesSegmentMagic), magic) {
return hdr, ErrInvalidSeriesSegment
}
// Read version.
if err := binary.Read(r, binary.BigEndian, &hdr.Version); err != nil {
return hdr, err
}
return hdr, nil
}
// WriteTo writes the header to w.
func (hdr *SeriesSegmentHeader) WriteTo(w io.Writer) (n int64, err error) {
var buf bytes.Buffer
buf.WriteString(SeriesSegmentMagic)
binary.Write(&buf, binary.BigEndian, hdr.Version)
return buf.WriteTo(w)
}
func ReadSeriesEntry(data []byte) (flag uint8, id uint64, key []byte, sz int64) {
// If flag byte is zero then no more entries exist.
flag, data = uint8(data[0]), data[1:]
if flag == 0 {
return 0, 0, nil, 1
}
id, data = binary.BigEndian.Uint64(data), data[8:]
switch flag {
case SeriesEntryInsertFlag:
key, _ = ReadSeriesKey(data)
}
return flag, id, key, int64(SeriesEntryHeaderSize + len(key))
}
func AppendSeriesEntry(dst []byte, flag uint8, id uint64, key []byte) []byte {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, id)
dst = append(dst, flag)
dst = append(dst, buf...)
switch flag {
case SeriesEntryInsertFlag:
dst = append(dst, key...)
case SeriesEntryTombstoneFlag:
default:
panic(fmt.Sprintf("unreachable: invalid flag: %d", flag))
}
return dst
}

208
tsdb/series_segment_test.go Normal file
View File

@ -0,0 +1,208 @@
package tsdb_test
import (
"bytes"
"path/filepath"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/tsdb"
)
func TestSeriesSegment(t *testing.T) {
dir, cleanup := MustTempDir()
defer cleanup()
// Create a new initial segment (4mb) and initialize for writing.
segment, err := tsdb.CreateSeriesSegment(0, filepath.Join(dir, "0000"))
if err != nil {
t.Fatal(err)
} else if err := segment.InitForWrite(); err != nil {
t.Fatal(err)
}
defer segment.Close()
// Write initial entry.
key1 := tsdb.AppendSeriesKey(nil, []byte("m0"), nil)
offset, err := segment.WriteLogEntry(tsdb.AppendSeriesEntry(nil, tsdb.SeriesEntryInsertFlag, 1, key1))
if err != nil {
t.Fatal(err)
} else if offset != tsdb.SeriesSegmentHeaderSize {
t.Fatalf("unexpected offset: %d", offset)
}
// Write a large entry (3mb).
key2 := tsdb.AppendSeriesKey(nil, bytes.Repeat([]byte("m"), 3*(1<<20)), nil)
if _, err := segment.WriteLogEntry(tsdb.AppendSeriesEntry(nil, tsdb.SeriesEntryInsertFlag, 2, key2)); err != nil {
t.Fatal(err)
} else if offset != tsdb.SeriesSegmentHeaderSize {
t.Fatalf("unexpected offset: %d", offset)
}
// Write another entry that is too large for the remaining segment space.
if _, err := segment.WriteLogEntry(tsdb.AppendSeriesEntry(nil, tsdb.SeriesEntryInsertFlag, 3, tsdb.AppendSeriesKey(nil, bytes.Repeat([]byte("n"), 3*(1<<20)), nil))); err != tsdb.ErrSeriesSegmentNotWritable {
t.Fatalf("unexpected error: %s", err)
}
// Verify two entries exist.
var n int
segment.ForEachEntry(func(flag uint8, id uint64, offset int64, key []byte) error {
switch n {
case 0:
if flag != tsdb.SeriesEntryInsertFlag || id != 1 || !bytes.Equal(key1, key) {
t.Fatalf("unexpected entry(0): %d, %d, %q", flag, id, key)
}
case 1:
if flag != tsdb.SeriesEntryInsertFlag || id != 2 || !bytes.Equal(key2, key) {
t.Fatalf("unexpected entry(1): %d, %d, %q", flag, id, key)
}
default:
t.Fatalf("too many entries")
}
n++
return nil
})
if n != 2 {
t.Fatalf("unexpected entry count: %d", n)
}
}
func TestSeriesSegment_AppendSeriesIDs(t *testing.T) {
dir, cleanup := MustTempDir()
defer cleanup()
segment, err := tsdb.CreateSeriesSegment(0, filepath.Join(dir, "0000"))
if err != nil {
t.Fatal(err)
} else if err := segment.InitForWrite(); err != nil {
t.Fatal(err)
}
defer segment.Close()
// Write entries.
if _, err := segment.WriteLogEntry(tsdb.AppendSeriesEntry(nil, tsdb.SeriesEntryInsertFlag, 10, tsdb.AppendSeriesKey(nil, []byte("m0"), nil))); err != nil {
t.Fatal(err)
} else if _, err := segment.WriteLogEntry(tsdb.AppendSeriesEntry(nil, tsdb.SeriesEntryInsertFlag, 11, tsdb.AppendSeriesKey(nil, []byte("m1"), nil))); err != nil {
t.Fatal(err)
} else if err := segment.Flush(); err != nil {
t.Fatal(err)
}
// Collect series ids with existing set.
a := segment.AppendSeriesIDs([]uint64{1, 2})
if diff := cmp.Diff(a, []uint64{1, 2, 10, 11}); diff != "" {
t.Fatal(diff)
}
}
func TestSeriesSegment_MaxSeriesID(t *testing.T) {
dir, cleanup := MustTempDir()
defer cleanup()
segment, err := tsdb.CreateSeriesSegment(0, filepath.Join(dir, "0000"))
if err != nil {
t.Fatal(err)
} else if err := segment.InitForWrite(); err != nil {
t.Fatal(err)
}
defer segment.Close()
// Write entries.
if _, err := segment.WriteLogEntry(tsdb.AppendSeriesEntry(nil, tsdb.SeriesEntryInsertFlag, 10, tsdb.AppendSeriesKey(nil, []byte("m0"), nil))); err != nil {
t.Fatal(err)
} else if _, err := segment.WriteLogEntry(tsdb.AppendSeriesEntry(nil, tsdb.SeriesEntryInsertFlag, 11, tsdb.AppendSeriesKey(nil, []byte("m1"), nil))); err != nil {
t.Fatal(err)
} else if err := segment.Flush(); err != nil {
t.Fatal(err)
}
// Verify maximum.
if max := segment.MaxSeriesID(); max != 11 {
t.Fatalf("unexpected max: %d", max)
}
}
func TestSeriesSegmentHeader(t *testing.T) {
// Verify header initializes correctly.
hdr := tsdb.NewSeriesSegmentHeader()
if hdr.Version != tsdb.SeriesSegmentVersion {
t.Fatalf("unexpected version: %d", hdr.Version)
}
// Marshal/unmarshal.
var buf bytes.Buffer
if _, err := hdr.WriteTo(&buf); err != nil {
t.Fatal(err)
} else if other, err := tsdb.ReadSeriesSegmentHeader(buf.Bytes()); err != nil {
t.Fatal(err)
} else if diff := cmp.Diff(hdr, other); diff != "" {
t.Fatal(diff)
}
}
func TestJoinSeriesOffset(t *testing.T) {
if offset := tsdb.JoinSeriesOffset(0x1234, 0x56789ABC); offset != 0x123456789ABC {
t.Fatalf("unexpected offset: %x", offset)
}
}
func TestSplitSeriesOffset(t *testing.T) {
if segmentID, pos := tsdb.SplitSeriesOffset(0x123456789ABC); segmentID != 0x1234 || pos != 0x56789ABC {
t.Fatalf("unexpected segmentID/pos: %x/%x", segmentID, pos)
}
}
func TestIsValidSeriesSegmentFilename(t *testing.T) {
if tsdb.IsValidSeriesSegmentFilename("") {
t.Fatal("expected invalid")
} else if tsdb.IsValidSeriesSegmentFilename("0ab") {
t.Fatal("expected invalid")
} else if !tsdb.IsValidSeriesSegmentFilename("192a") {
t.Fatal("expected valid")
}
}
func TestParseSeriesSegmentFilename(t *testing.T) {
if v := tsdb.ParseSeriesSegmentFilename("a90b"); v != 0xA90B {
t.Fatalf("unexpected value: %x", v)
} else if v := tsdb.ParseSeriesSegmentFilename("0001"); v != 1 {
t.Fatalf("unexpected value: %x", v)
} else if v := tsdb.ParseSeriesSegmentFilename("invalid"); v != 0 {
t.Fatalf("unexpected value: %x", v)
}
}
func TestSeriesSegmentSize(t *testing.T) {
const mb = (1 << 20)
if sz := tsdb.SeriesSegmentSize(0); sz != 4*mb {
t.Fatalf("unexpected size: %d", sz)
} else if sz := tsdb.SeriesSegmentSize(1); sz != 8*mb {
t.Fatalf("unexpected size: %d", sz)
} else if sz := tsdb.SeriesSegmentSize(2); sz != 16*mb {
t.Fatalf("unexpected size: %d", sz)
} else if sz := tsdb.SeriesSegmentSize(3); sz != 32*mb {
t.Fatalf("unexpected size: %d", sz)
} else if sz := tsdb.SeriesSegmentSize(4); sz != 64*mb {
t.Fatalf("unexpected size: %d", sz)
} else if sz := tsdb.SeriesSegmentSize(5); sz != 128*mb {
t.Fatalf("unexpected size: %d", sz)
} else if sz := tsdb.SeriesSegmentSize(6); sz != 256*mb {
t.Fatalf("unexpected size: %d", sz)
} else if sz := tsdb.SeriesSegmentSize(7); sz != 256*mb {
t.Fatalf("unexpected size: %d", sz)
}
}
func TestSeriesEntry(t *testing.T) {
seriesKey := tsdb.AppendSeriesKey(nil, []byte("m0"), nil)
buf := tsdb.AppendSeriesEntry(nil, 1, 2, seriesKey)
if flag, id, key, sz := tsdb.ReadSeriesEntry(buf); flag != 1 {
t.Fatalf("unexpected flag: %d", flag)
} else if id != 2 {
t.Fatalf("unexpected id: %d", id)
} else if !bytes.Equal(seriesKey, key) {
t.Fatalf("unexpected key: %q", key)
} else if sz != int64(tsdb.SeriesEntryHeaderSize+len(key)) {
t.Fatalf("unexpected size: %d", sz)
}
}

View File

@ -7,7 +7,6 @@ import (
"path"
"path/filepath"
"regexp"
"runtime"
"sort"
"strings"
"testing"
@ -222,12 +221,6 @@ func NewTempShard(index string) *TempShard {
// Create series file.
sfile := NewSeriesFile(filepath.Join(dir, "db0", SeriesFileName))
// If we're running on a 32-bit system then reduce the SeriesFile size, so we
// can address is in memory.
if runtime.GOARCH == "386" {
sfile.MaxSize = 1 << 27 // 128MB
}
if err := sfile.Open(); err != nil {
panic(err)
}

View File

@ -354,12 +354,6 @@ func (s *Store) openSeriesFile(database string) (*SeriesFile, error) {
}
sfile := NewSeriesFile(filepath.Join(s.path, database, SeriesFileName))
// Set a custom mmap size if one has been specified, otherwise the default
// will be used.
if s.SeriesFileMaxSize > 0 {
sfile.MaxSize = s.SeriesFileMaxSize
}
if err := sfile.Open(); err != nil {
return nil, err
}