fix: series file index compaction (#23916)

Series file indices monotonically grew even
when series were deleted.  Also stop 
ignoring error in series index recovery

Partially closes https://github.com/influxdata/EAR/issues/3643
pull/24270/head
davidby-influx 2023-06-01 10:49:23 -07:00 committed by GitHub
parent ab85be01a2
commit 53856cdaae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 76 additions and 60 deletions

View File

@ -4,12 +4,14 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"math"
"os" "os"
"path/filepath" "path/filepath"
"sync" "sync"
"github.com/influxdata/influxdb/logger" "github.com/influxdata/influxdb/logger"
"github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/models"
errors2 "github.com/influxdata/influxdb/pkg/errors"
"github.com/influxdata/influxdb/pkg/limiter" "github.com/influxdata/influxdb/pkg/limiter"
"github.com/influxdata/influxdb/pkg/rhh" "github.com/influxdata/influxdb/pkg/rhh"
"go.uber.org/zap" "go.uber.org/zap"
@ -86,7 +88,7 @@ func (p *SeriesPartition) Open() error {
p.index = NewSeriesIndex(p.IndexPath()) p.index = NewSeriesIndex(p.IndexPath())
if err := p.index.Open(); err != nil { if err := p.index.Open(); err != nil {
return err return err
} else if p.index.Recover(p.segments); err != nil { } else if err = p.index.Recover(p.segments); err != nil {
return err return err
} }
@ -573,21 +575,35 @@ func (c *SeriesPartitionCompactor) Compact(p *SeriesPartition) error {
return nil return nil
} }
func (c *SeriesPartitionCompactor) compactIndexTo(index *SeriesIndex, seriesN uint64, segments []*SeriesSegment, path string) error { var errDone error = errors.New("done")
hdr := NewSeriesIndexHeader()
hdr.Count = seriesN
hdr.Capacity = pow2((int64(hdr.Count) * 100) / SeriesIndexLoadFactor)
// Allocate space for maps. func (c *SeriesPartitionCompactor) compactIndexTo(index *SeriesIndex, seriesN uint64, segments []*SeriesSegment, path string) (err error) {
keyIDMap := make([]byte, (hdr.Capacity * SeriesIndexElemSize))
idOffsetMap := make([]byte, (hdr.Capacity * SeriesIndexElemSize)) hdr := NewSeriesIndexHeader()
var keyIDMap []byte
var idOffsetMap []byte
hdr.Count = math.MaxUint64
// seriesN is the current size of the index. Because it may contain tombstones
// for deleted series, we recalculate that number (as seriesCount) without the
// deleted series as we rebuild the index. If the count of existing series does
// not equal the seriesN passed in (meaning there were tombstones), we rebuild
// the index a second time with the correct size.
seriesCount := seriesN
for {
seriesN = seriesCount
seriesCount = uint64(0)
// This only loops if there are deleted entries, which shrinks the size
hdr.Capacity = pow2((int64(seriesN) * 100) / SeriesIndexLoadFactor)
// Allocate space for maps, guaranteeing slices are initialized to zero
keyIDMap = make([]byte, hdr.Capacity*SeriesIndexElemSize)
idOffsetMap = make([]byte, hdr.Capacity*SeriesIndexElemSize)
// Reindex all partitions. // Reindex all partitions.
var entryN int var entryN int
for _, segment := range segments { for _, segment := range segments {
errDone := errors.New("done")
if err := segment.ForEachEntry(func(flag uint8, id uint64, offset int64, key []byte) error { if err = segment.ForEachEntry(func(flag uint8, id uint64, offset int64, key []byte) error {
// Make sure we don't go past the offset where the compaction began. // Make sure we don't go past the offset where the compaction began.
if offset > index.maxOffset { if offset > index.maxOffset {
@ -605,7 +621,8 @@ func (c *SeriesPartitionCompactor) compactIndexTo(index *SeriesIndex, seriesN ui
// Only process insert entries. // Only process insert entries.
switch flag { switch flag {
case SeriesEntryInsertFlag: // fallthrough case SeriesEntryInsertFlag:
// does not fallthrough
case SeriesEntryTombstoneFlag: case SeriesEntryTombstoneFlag:
return nil return nil
default: default:
@ -619,7 +636,7 @@ func (c *SeriesPartitionCompactor) compactIndexTo(index *SeriesIndex, seriesN ui
if index.IsDeleted(id) { if index.IsDeleted(id) {
return nil return nil
} }
seriesCount++
// Insert into maps. // Insert into maps.
c.insertIDOffsetMap(idOffsetMap, hdr.Capacity, id, offset) c.insertIDOffsetMap(idOffsetMap, hdr.Capacity, id, offset)
return c.insertKeyIDMap(keyIDMap, hdr.Capacity, segments, key, offset, id) return c.insertKeyIDMap(keyIDMap, hdr.Capacity, segments, key, offset, id)
@ -629,38 +646,37 @@ func (c *SeriesPartitionCompactor) compactIndexTo(index *SeriesIndex, seriesN ui
return err return err
} }
} }
hdr.Count = seriesCount
if seriesN != seriesCount {
continue
} else {
break
}
}
// Open file handler. // Open file handler.
f, err := os.Create(path) f, err := os.Create(path)
if err != nil { if err != nil {
return err return err
} }
defer f.Close() defer errors2.Capture(&err, f.Close)()
// Calculate map positions. // Calculate map positions.
hdr.KeyIDMap.Offset, hdr.KeyIDMap.Size = SeriesIndexHeaderSize, int64(len(keyIDMap)) hdr.KeyIDMap.Offset, hdr.KeyIDMap.Size = SeriesIndexHeaderSize, int64(len(keyIDMap))
hdr.IDOffsetMap.Offset, hdr.IDOffsetMap.Size = hdr.KeyIDMap.Offset+hdr.KeyIDMap.Size, int64(len(idOffsetMap)) hdr.IDOffsetMap.Offset, hdr.IDOffsetMap.Size = hdr.KeyIDMap.Offset+hdr.KeyIDMap.Size, int64(len(idOffsetMap))
// Write header. // Write header.
if _, err := hdr.WriteTo(f); err != nil { if _, err = hdr.WriteTo(f); err != nil {
return err return err
} }
// Write maps. // Write maps.
if _, err := f.Write(keyIDMap); err != nil { if _, err = f.Write(keyIDMap); err != nil {
return err return err
} else if _, err := f.Write(idOffsetMap); err != nil { } else if _, err := f.Write(idOffsetMap); err != nil {
return err return err
} }
// Sync & close. // Sync, then deferred close
if err := f.Sync(); err != nil { return f.Sync()
return err
} else if err := f.Close(); err != nil {
return err
}
return nil
} }
func (c *SeriesPartitionCompactor) insertKeyIDMap(dst []byte, capacity int64, segments []*SeriesSegment, key []byte, offset int64, id uint64) error { func (c *SeriesPartitionCompactor) insertKeyIDMap(dst []byte, capacity int64, segments []*SeriesSegment, key []byte, offset int64, id uint64) error {