influxdb/tsdb/tsm1/reader_index_test.go

package tsm1

import (
	"bytes"
	"fmt"
	"math"
	"math/rand"
	"reflect"
	"sync"
	"sync/atomic"
	"testing"
)

func loadIndex(tb testing.TB, w IndexWriter) *indirectIndex {
	tb.Helper()

	b, err := w.MarshalBinary()
	fatalIfErr(tb, "marshaling index", err)

	indir := NewIndirectIndex()
	fatalIfErr(tb, "unmarshaling index", indir.UnmarshalBinary(b))

	return indir
}

func TestIndirectIndex_Entries_NonExistent(t *testing.T) {
	index := NewIndexWriter()
	index.Add([]byte("cpu"), BlockFloat64, 0, 1, 10, 100)
	index.Add([]byte("cpu"), BlockFloat64, 2, 3, 20, 200)
	ind := loadIndex(t, index)

	// mem has not been added to the index so we should get no entries back
	// for both
	exp := index.Entries([]byte("mem"))
	entries, err := ind.ReadEntries([]byte("mem"), nil)
	if err != nil {
		t.Fatal(err)
	}

	if got, exp := len(entries), len(exp); got != exp && exp != 0 {
		t.Fatalf("entries length mismatch: got %v, exp %v", got, exp)
	}
}

func TestIndirectIndex_Type(t *testing.T) {
	index := NewIndexWriter()
	index.Add([]byte("cpu"), BlockInteger, 0, 1, 10, 20)
	ind := loadIndex(t, index)

	typ, err := ind.Type([]byte("cpu"))
	if err != nil {
		fatal(t, "reading type", err)
	}

	if got, exp := typ, BlockInteger; got != exp {
		t.Fatalf("type mismatch: got %v, exp %v", got, exp)
	}
}

func TestIndirectIndex_Delete(t *testing.T) {
	check := func(t *testing.T, got, exp bool) {
		t.Helper()
		if exp != got {
			t.Fatalf("expected: %v but got: %v", exp, got)
		}
	}

	index := NewIndexWriter()
	index.Add([]byte("cpu1"), BlockInteger, 0, 10, 10, 20)
	index.Add([]byte("cpu1"), BlockInteger, 10, 20, 10, 20)
	index.Add([]byte("cpu2"), BlockInteger, 0, 10, 10, 20)
	index.Add([]byte("cpu2"), BlockInteger, 10, 20, 10, 20)
	index.Add([]byte("mem"), BlockInteger, 0, 10, 10, 20)
	ind := loadIndex(t, index)

	ind.Delete([][]byte{[]byte("cpu1")})

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), false)
	check(t, ind.Contains([]byte("cpu2")), true)

	ind.Delete([][]byte{[]byte("cpu1"), []byte("cpu2")})

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), false)
	check(t, ind.Contains([]byte("cpu2")), false)

	ind.Delete([][]byte{[]byte("mem")})

	check(t, ind.Contains([]byte("mem")), false)
	check(t, ind.Contains([]byte("cpu1")), false)
	check(t, ind.Contains([]byte("cpu2")), false)
}

func TestIndirectIndex_DeleteRange(t *testing.T) {
	check := func(t *testing.T, got, exp bool) {
		t.Helper()
		if exp != got {
			t.Fatalf("expected: %v but got: %v", exp, got)
		}
	}

	index := NewIndexWriter()
	index.Add([]byte("cpu1"), BlockInteger, 0, 10, 10, 20)
	index.Add([]byte("cpu1"), BlockInteger, 10, 20, 10, 20)
	index.Add([]byte("cpu2"), BlockInteger, 0, 10, 10, 20)
	index.Add([]byte("cpu2"), BlockInteger, 10, 20, 10, 20)
	index.Add([]byte("mem"), BlockInteger, 0, 10, 10, 20)
	ind := loadIndex(t, index)

	ind.DeleteRange([][]byte{[]byte("cpu1")}, 5, 15)

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), true)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true)
	check(t, ind.Contains([]byte("cpu2")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true)

	ind.DeleteRange([][]byte{[]byte("cpu1"), []byte("cpu2")}, 0, 5)

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true)
	check(t, ind.Contains([]byte("cpu2")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true)

	ind.DeleteRange([][]byte{[]byte("cpu1"), []byte("cpu2")}, 15, 20)

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), false)
	check(t, ind.Contains([]byte("cpu2")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), false)
}

func TestIndirectIndex_DeletePrefix(t *testing.T) {
	check := func(t *testing.T, got, exp bool) {
		t.Helper()
		if exp != got {
			t.Fatalf("expected: %v but got: %v", exp, got)
		}
	}

	index := NewIndexWriter()
	index.Add([]byte("cpu1"), BlockInteger, 0, 10, 10, 20)
	index.Add([]byte("cpu1"), BlockInteger, 10, 20, 10, 20)
	index.Add([]byte("cpu2"), BlockInteger, 0, 10, 10, 20)
	index.Add([]byte("cpu2"), BlockInteger, 10, 20, 10, 20)
	index.Add([]byte("mem"), BlockInteger, 0, 10, 10, 20)
	ind := loadIndex(t, index)

	ind.DeletePrefix([]byte("c"), 5, 15, nil, nil)

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), true)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true)
	check(t, ind.Contains([]byte("cpu2")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true)

	ind.DeletePrefix([]byte("cp"), 0, 5, nil, nil)

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true)
	check(t, ind.Contains([]byte("cpu2")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true)

	ind.DeletePrefix([]byte("cpu"), 15, 20, nil, nil)

	check(t, ind.Contains([]byte("mem")), true)
	check(t, ind.Contains([]byte("cpu1")), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), false)
	check(t, ind.Contains([]byte("cpu2")), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false)
	check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), false)
}

func TestIndirectIndex_DeletePrefix_NoMatch(t *testing.T) {
	check := func(t *testing.T, got, exp bool) {
		t.Helper()
		if exp != got {
			t.Fatalf("expected: %v but got: %v", exp, got)
		}
	}

	index := NewIndexWriter()
	index.Add([]byte("cpu"), BlockInteger, 0, 10, 10, 20)
	ind := loadIndex(t, index)

	ind.DeletePrefix([]byte("b"), 5, 5, nil, nil)
	ind.DeletePrefix([]byte("d"), 5, 5, nil, nil)

	check(t, ind.Contains([]byte("cpu")), true)
	check(t, ind.MaybeContainsValue([]byte("cpu"), 5), true)
}

func TestIndirectIndex_DeletePrefix_Dead(t *testing.T) {
	check := func(t *testing.T, got, exp interface{}) {
		t.Helper()
		if !reflect.DeepEqual(exp, got) {
			t.Fatalf("expected: %q but got: %q", exp, got)
		}
	}

	var keys [][]byte
	dead := func(key []byte) { keys = append(keys, append([]byte(nil), key...)) }

	b := func(keys ...string) (out [][]byte) {
		for _, key := range keys {
			out = append(out, []byte(key))
		}
		return out
	}

	index := NewIndexWriter()
	index.Add([]byte("cpu"), BlockInteger, 0, 10, 10, 20)
	index.Add([]byte("dpu"), BlockInteger, 0, 10, 10, 20)
	ind := loadIndex(t, index)

	ind.DeletePrefix([]byte("b"), 5, 5, nil, dead)
	check(t, keys, b())

	ind.DeletePrefix([]byte("c"), 0, 9, nil, dead)
	check(t, keys, b())

	ind.DeletePrefix([]byte("c"), 9, 10, nil, dead)
	check(t, keys, b("cpu"))

	ind.DeletePrefix([]byte("d"), -50, 50, nil, dead)
	check(t, keys, b("cpu", "dpu"))
}

func TestIndirectIndex_DeletePrefix_Dead_Fuzz(t *testing.T) {
	key := bytes.Repeat([]byte("X"), 32)
	check := func(t *testing.T, got, exp interface{}) {
		t.Helper()
		if !reflect.DeepEqual(exp, got) {
			t.Fatalf("expected: %v but got: %v", exp, got)
		}
	}

	for i := 0; i < 5000; i++ {
		// Create an index with the key in it
		writer := NewIndexWriter()
		writer.Add(key, BlockInteger, 0, 10, 10, 20)
		ind := loadIndex(t, writer)

		// Keep track if dead is ever called.
		happened := uint64(0)
		dead := func([]byte) { atomic.AddUint64(&happened, 1) }

		// Build up a random set of operations to delete the key.
		ops := make([]func(), 9)
		for j := range ops {
			n := int64(j)
			if rand.Intn(2) == 0 {
				kn := key[:rand.Intn(len(key))]
				ops[j] = func() { ind.DeletePrefix(kn, n, n+1, nil, dead) }
			} else {
				ops[j] = func() { ind.DeleteRange([][]byte{key}, n, n+1) }
			}
		}

		// Since we will run the ops concurrently, this shuffle is unnecessary
		// but it might provide more coverage of random orderings than the
		// scheduler randomness alone.
		rand.Shuffle(len(ops), func(i, j int) { ops[i], ops[j] = ops[j], ops[i] })

		// Run the operations concurrently. The key should never be dead.
		var wg sync.WaitGroup
		for _, op := range ops {
			op := op
			wg.Add(1)
			go func() { op(); wg.Done() }()
		}
		wg.Wait()
		check(t, happened, uint64(0))

		// Run the last delete operation. It should kill the key.
		ind.DeletePrefix(key, 9, 10, nil, dead)
		check(t, happened, uint64(1))
	}
}

//
// indirectIndex benchmarks
//

const (
	indexKeyCount   = 500000
	indexBlockCount = 100
)

type indexCacheInfo struct {
	index    *indirectIndex
	offsets  []uint32
	prefixes []prefixEntry
	allKeys  [][]byte
	bytes    []byte
}

func (i *indexCacheInfo) reset() {
	i.index.ro.offsets = append([]uint32(nil), i.offsets...)
	i.index.ro.prefixes = append([]prefixEntry(nil), i.prefixes...)
	i.index.tombstones = make(map[uint32][]TimeRange)
	i.index.prefixTombstones = newPrefixTree()
	resetFaults(i.index)
}

var (
	indexCache = map[string]*indexCacheInfo{}
	indexSizes = map[string][2]int{
		"large": {500000, 100},
		"med":   {1000, 1000},
		"small": {5000, 2},
	}
)

func getFaults(indirect *indirectIndex) int64 {
	return int64(atomic.LoadUint64(&indirect.b.faults))
}

func resetFaults(indirect *indirectIndex) {
	if indirect != nil {
		indirect.b = faultBuffer{b: indirect.b.b}
	}
}

func getIndex(tb testing.TB, name string) (*indirectIndex, *indexCacheInfo) {
	info, ok := indexCache[name]
	if ok {
		info.reset()
		return info.index, info
	}
	info = new(indexCacheInfo)

	sizes, ok := indexSizes[name]
	if !ok {
		sizes = [2]int{indexKeyCount, indexBlockCount}
	}
	keys, blocks := sizes[0], sizes[1]

	writer := NewIndexWriter()

	// add a ballast key that starts at -1 so that we don't trigger optimizations
	// when deleting [0, MaxInt]
	writer.Add([]byte("ballast"), BlockFloat64, -1, 1, 0, 100)

	for i := 0; i < keys; i++ {
		key := []byte(fmt.Sprintf("cpu-%08d", i))
		info.allKeys = append(info.allKeys, key)
		for j := 0; j < blocks; j++ {
			writer.Add(key, BlockFloat64, 0, 100, 10, 100)
		}
	}

	var err error
	info.bytes, err = writer.MarshalBinary()
	if err != nil {
		tb.Fatalf("unexpected error marshaling index: %v", err)
	}

	info.index = NewIndirectIndex()
	if err = info.index.UnmarshalBinary(info.bytes); err != nil {
		tb.Fatalf("unexpected error unmarshaling index: %v", err)
	}
	info.offsets = append([]uint32(nil), info.index.ro.offsets...)
	info.prefixes = append([]prefixEntry(nil), info.index.ro.prefixes...)

	indexCache[name] = info
	return info.index, info
}

func BenchmarkIndirectIndex_UnmarshalBinary(b *testing.B) {
	indirect, info := getIndex(b, "large")
	b.ReportAllocs()
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		if err := indirect.UnmarshalBinary(info.bytes); err != nil {
			b.Fatalf("unexpected error unmarshaling index: %v", err)
		}
	}
}

func BenchmarkIndirectIndex_Entries(b *testing.B) {
	indirect, _ := getIndex(b, "med")
	b.ReportAllocs()
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		resetFaults(indirect)
		indirect.ReadEntries([]byte("cpu-00000001"), nil)
	}

	if faultBufferEnabled {
		b.SetBytes(getFaults(indirect) * 4096)
		b.Log("recorded faults:", getFaults(indirect))
	}
}

func BenchmarkIndirectIndex_ReadEntries(b *testing.B) {
	var entries []IndexEntry
	indirect, _ := getIndex(b, "med")
	b.ReportAllocs()
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		resetFaults(indirect)
		entries, _ = indirect.ReadEntries([]byte("cpu-00000001"), entries)
	}

	if faultBufferEnabled {
		b.SetBytes(getFaults(indirect) * 4096)
		b.Log("recorded faults:", getFaults(indirect))
	}
}

func BenchmarkBlockIterator_Next(b *testing.B) {
	indirect, _ := getIndex(b, "med")
	r := TSMReader{index: indirect}
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		resetFaults(indirect)
		bi := r.BlockIterator()
		for bi.Next() {
		}
	}

	if faultBufferEnabled {
		b.SetBytes(getFaults(indirect) * 4096)
		b.Log("recorded faults:", getFaults(indirect))
	}
}

func BenchmarkIndirectIndex_DeleteRangeLast(b *testing.B) {
	indirect, _ := getIndex(b, "large")
	keys := [][]byte{[]byte("cpu-00999999")}

	b.ReportAllocs()
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		resetFaults(indirect)
		indirect.DeleteRange(keys, 10, 50)
	}

	if faultBufferEnabled {
		b.SetBytes(getFaults(indirect) * 4096)
		b.Log("recorded faults:", getFaults(indirect))
	}
}

func BenchmarkIndirectIndex_DeleteRangeFull(b *testing.B) {
	run := func(b *testing.B, name string) {
		indirect, _ := getIndex(b, name)
		b.ReportAllocs()
		b.ResetTimer()

		for i := 0; i < b.N; i++ {
			b.StopTimer()
			var info *indexCacheInfo
			indirect, info = getIndex(b, name)
			b.StartTimer()

			for i := 0; i < len(info.allKeys); i += 4096 {
				n := i + 4096
				if n > len(info.allKeys) {
					n = len(info.allKeys)
				}
				indirect.DeleteRange(info.allKeys[i:n], 10, 50)
			}
		}

		if faultBufferEnabled {
			b.SetBytes(getFaults(indirect) * 4096)
			b.Log("recorded faults:", getFaults(indirect))
		}
	}

	b.Run("Large", func(b *testing.B) { run(b, "large") })
	b.Run("Small", func(b *testing.B) { run(b, "small") })
}

func BenchmarkIndirectIndex_DeleteRangeFull_Covered(b *testing.B) {
	run := func(b *testing.B, name string) {
		indirect, _ := getIndex(b, name)
		b.ReportAllocs()
		b.ResetTimer()

		for i := 0; i < b.N; i++ {
			b.StopTimer()
			var info *indexCacheInfo
			indirect, info = getIndex(b, name)
			b.StartTimer()

			for i := 0; i < len(info.allKeys); i += 4096 {
				n := i + 4096
				if n > len(info.allKeys) {
					n = len(info.allKeys)
				}
				indirect.DeleteRange(info.allKeys[i:n], 0, math.MaxInt64)
			}
		}

		if faultBufferEnabled {
			b.SetBytes(getFaults(indirect) * 4096)
			b.Log("recorded faults:", getFaults(indirect))
		}
	}

	b.Run("Large", func(b *testing.B) { run(b, "large") })
	b.Run("Small", func(b *testing.B) { run(b, "small") })
}

func BenchmarkIndirectIndex_Delete(b *testing.B) {
	run := func(b *testing.B, name string) {
		indirect, _ := getIndex(b, name)
		b.ReportAllocs()
		b.ResetTimer()

		for i := 0; i < b.N; i++ {
			b.StopTimer()
			var info *indexCacheInfo
			indirect, info = getIndex(b, name)
			b.StartTimer()

			for i := 0; i < len(info.allKeys); i += 4096 {
				n := i + 4096
				if n > len(info.allKeys) {
					n = len(info.allKeys)
				}
				indirect.Delete(info.allKeys[i:n])
			}
		}

		if faultBufferEnabled {
			b.SetBytes(getFaults(indirect) * 4096)
			b.Log("recorded faults:", getFaults(indirect))
		}
	}

	b.Run("Large", func(b *testing.B) { run(b, "large") })
	b.Run("Small", func(b *testing.B) { run(b, "small") })
}

func BenchmarkIndirectIndex_DeletePrefixFull(b *testing.B) {
	prefix := []byte("cpu-")
	run := func(b *testing.B, name string) {
		indirect, _ := getIndex(b, name)
		b.ReportAllocs()
		b.ResetTimer()

		for i := 0; i < b.N; i++ {
			b.StopTimer()
			indirect, _ = getIndex(b, name)
			b.StartTimer()

			indirect.DeletePrefix(prefix, 10, 50, nil, nil)
		}

		if faultBufferEnabled {
			b.SetBytes(getFaults(indirect) * 4096)
			b.Log("recorded faults:", getFaults(indirect))
		}
	}

	b.Run("Large", func(b *testing.B) { run(b, "large") })
	b.Run("Small", func(b *testing.B) { run(b, "small") })
}

func BenchmarkIndirectIndex_DeletePrefixFull_Covered(b *testing.B) {
	prefix := []byte("cpu-")
	run := func(b *testing.B, name string) {
		indirect, _ := getIndex(b, name)
		b.ReportAllocs()
		b.ResetTimer()

		for i := 0; i < b.N; i++ {
			b.StopTimer()
			indirect, _ = getIndex(b, name)
			b.StartTimer()

			indirect.DeletePrefix(prefix, 0, math.MaxInt64, nil, nil)
		}

		if faultBufferEnabled {
			b.SetBytes(getFaults(indirect) * 4096)
			b.Log("recorded faults:", getFaults(indirect))
		}
	}

	b.Run("Large", func(b *testing.B) { run(b, "large") })
	b.Run("Small", func(b *testing.B) { run(b, "small") })
}