influxdb/tsdb/engine/tsm1/compact.go

299 lines
7.4 KiB
Go

package tsm1
// Compactions are the process of creating read-optimized TSM files.
// The files are created by converting write-optimized WAL entries
// to read-optimized TSM format. They can also be created from existing
// TSM files when there are tombstone records that neeed to be removed, points
// that were overwritten by later writes and need to updated, or multiple
// smaller TSM files need to be merged to reduce file counts and improve
// compression ratios.
//
// The the compaction process is stream-oriented using multiple readers and
// iterators. The resulting stream is written sorted and chunked to allow for
// one-pass writing of a new TSM file.
import (
"fmt"
"os"
"path/filepath"
"sort"
)
var errMaxFileExceeded = fmt.Errorf("max file exceeded")
// Compactor merges multiple WAL segments and TSM files into one or more
// new TSM files.
type Compactor struct {
Dir string
MaxFileSize int
currentID int
merge *MergeIterator
}
// Compact converts WAL segements and TSM files into new TSM files.
func (c *Compactor) Compact(walSegments []string) ([]string, error) {
var walReaders []*WALSegmentReader
// For each segment, create a reader to iterate over each WAL entry
for _, path := range walSegments {
f, err := os.Open(path)
if err != nil {
return nil, err
}
r := NewWALSegmentReader(f)
defer r.Close()
walReaders = append(walReaders, r)
}
// WALKeyIterator allows all the segments to be ordered by key and
// sorted values during compaction.
walKeyIterator, err := NewWALKeyIterator(walReaders...)
if err != nil {
return nil, err
}
// Merge iterator combines the WAL and TSM iterators (note: TSM iteration is
// not in place yet). It will also chunk the values into 1000 element blocks.
c.merge = NewMergeIterator(walKeyIterator, 1000)
defer c.merge.Close()
// These are the new TSM files written
var files []string
for {
// TODO: this needs to be intialized based on the existing files on disk
c.currentID++
// New TSM files are written to a temp file and renamed when fully completed.
fileName := filepath.Join(c.Dir, fmt.Sprintf("%07d.%s.tmp", c.currentID, Format))
// Write as much as possible to this file
err := c.write(fileName)
// We've hit the max file limit and there is more to write. Create a new file
// and continue.
if err == errMaxFileExceeded {
files = append(files, fileName)
continue
}
// We hit an error but didn't finish the compaction. Remove the temp file and abort.
if err != nil {
os.RemoveAll(fileName)
return nil, err
}
files = append(files, fileName)
break
}
c.merge = nil
return files, nil
}
func (c *Compactor) write(path string) error {
fd, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0666)
if err != nil {
return err
}
// Create the write for the new TSM file.
w, err := NewTSMWriter(fd)
if err != nil {
return err
}
for c.merge.Next() {
// Each call to read returns the next sorted key (or the prior one if there are
// more values to write). The size of values will be less than or equal to our
// chunk size (1000)
key, values, err := c.merge.Read()
if err != nil {
return err
}
// Write the key and value
if err := w.Write(key, values); err != nil {
return err
}
// If we have a max file size configured and we're over it, close out the file
// and return the error.
if c.MaxFileSize != 0 && w.Size() > c.MaxFileSize {
if err := w.WriteIndex(); err != nil {
return err
}
if err := w.Close(); err != nil {
return err
}
return errMaxFileExceeded
}
}
// We're all done. Close out the file.
if err := w.WriteIndex(); err != nil {
return err
}
if err := w.Close(); err != nil {
return err
}
return nil
}
// MergeIterator merges multiple KeyIterators while chunking each read call
// into a fixed size. Each iteration, the lowest lexicographically ordered
// key is returned with the next set of values for that key ordered by time. Values
// with identical times are overwitten by the WAL KeyIterator.
//
// Moving through the full iteration cycle will result in sorted, unique, chunks of values
// up to a max size. Each key returned will be greater than or equal to the prior
// key returned.
type MergeIterator struct {
// wal is the iterator for multiple WAL segments combined
wal KeyIterator
// size is the maximum value of a chunk to return
size int
// key is the current iteration series key
key string
// walBuf is the remaining values from the last wal Read call
walBuf []Value
// chunk is the current set of values that will be returned by Read
chunk []Value
// err is any error returned by an underlying iterator to be returned by Read
err error
}
func (m *MergeIterator) Next() bool {
// Prime the wal buffer if possible
if len(m.walBuf) == 0 && m.wal.Next() {
k, v, err := m.wal.Read()
m.key = k
m.err = err
m.walBuf = v
}
// Move size elements into the current chunk and slice the same
// amount off of the wal buffer.
if m.size < len(m.walBuf) {
m.chunk = m.walBuf[:m.size]
m.walBuf = m.walBuf[m.size:]
} else {
m.chunk = m.walBuf
m.walBuf = m.walBuf[:0]
}
return len(m.chunk) > 0
}
func (m *MergeIterator) Read() (string, []Value, error) {
return m.key, m.chunk, m.err
}
func (m *MergeIterator) Close() error {
m.walBuf = nil
m.chunk = nil
return m.wal.Close()
}
func NewMergeIterator(WAL KeyIterator, size int) *MergeIterator {
m := &MergeIterator{
wal: WAL,
size: size,
}
return m
}
// KeyIterator allows iteration over set of keys and values in sorted order.
type KeyIterator interface {
Next() bool
Read() (string, []Value, error)
Close() error
}
// walKeyIterator allows WAL segments to be iterated over in sorted order.
type walKeyIterator struct {
k string
Order []string
Series map[string]Values
}
func (k *walKeyIterator) Next() bool {
if len(k.Order) == 0 {
return false
}
k.k = k.Order[0]
k.Order = k.Order[1:]
return true
}
func (k *walKeyIterator) Read() (string, []Value, error) {
return k.k, k.Series[k.k], nil
}
func (k *walKeyIterator) Close() error {
k.Order = nil
k.Series = nil
return nil
}
func NewWALKeyIterator(readers ...*WALSegmentReader) (KeyIterator, error) {
series := map[string]Values{}
order := []string{}
// Iterate over each reader in order. Later readers will overwrite earlier ones if values
// overlap.
for _, r := range readers {
for r.Next() {
entry, err := r.Read()
if err != nil {
return nil, err
}
switch t := entry.(type) {
case *WriteWALEntry:
// Each point needs to be decomposed from a time with multiple fields, to a time, value tuple
for k, v := range t.Values {
// Just append each point as we see it. Dedup and sorting happens later.
series[k] = append(series[k], v...)
}
case *DeleteWALEntry:
// Each key is a series, measurement + tagset string
for _, k := range t.Keys {
// seriesKey is specific to a field, measurment + tagset string + sep + field name
for seriesKey := range series {
// If the delete series key matches the portion before the separator, we delete what we have
if k == seriesKey {
delete(series, seriesKey)
}
}
}
}
}
}
// Need to create the order that we'll iterate over (sorted key), as well as
// sort and dedup all the points for each key.
for k, v := range series {
order = append(order, k)
series[k] = v.Deduplicate(true)
}
sort.Strings(order)
return &walKeyIterator{
Series: series,
Order: order,
}, nil
}