fix: rename TSM files on errBlockRead
parent
f04105bede
commit
ee2cfaea7a
|
@ -85,6 +85,15 @@ type errBlockRead struct {
|
|||
err error
|
||||
}
|
||||
|
||||
func (e errBlockRead) Unwrap() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func (e errBlockRead) Is(target error) bool {
|
||||
_, ok := target.(errBlockRead)
|
||||
return ok
|
||||
}
|
||||
|
||||
func (e errBlockRead) Error() string {
|
||||
if e.err != nil {
|
||||
return fmt.Sprintf("block read error on %s: %s", e.file, e.err)
|
||||
|
@ -1684,7 +1693,7 @@ func (k *tsmBatchKeyIterator) Err() error {
|
|||
errs = make([]error, 0, len(k.errs)+1)
|
||||
errs = append(errs, k.errs...)
|
||||
errs = append(errs, fmt.Errorf("additional errors dropped: %d", k.overflowErrors))
|
||||
return errs
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
type cacheKeyIterator struct {
|
||||
|
|
|
@ -9,7 +9,6 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -292,19 +291,19 @@ func TestCompactor_DecodeError(t *testing.T) {
|
|||
compactor.FileStore = ffs
|
||||
|
||||
files, err := compactor.CompactFull([]string{f1, f2, f3}, zap.NewNop())
|
||||
if err == nil {
|
||||
t.Fatalf("expected error writing snapshot: %v", err)
|
||||
}
|
||||
if len(files) > 0 {
|
||||
t.Fatalf("no files should be compacted: got %v", len(files))
|
||||
|
||||
}
|
||||
require.Error(t, err, "expected error writing snapshot")
|
||||
require.Zero(t, len(files), "no files should be compacted")
|
||||
|
||||
compactor.Open()
|
||||
|
||||
if _, err = compactor.CompactFull([]string{f1, f2, f3}, zap.NewNop()); err == nil || !strings.Contains(err.Error(), "decode error: unable to decompress block type float for key 'cpu,host=A#!~#value': unpackBlock: not enough data for timestamp") {
|
||||
t.Fatalf("expected error writing snapshot: %v", err)
|
||||
}
|
||||
_, err = compactor.CompactFull([]string{f1, f2, f3}, zap.NewNop())
|
||||
|
||||
require.ErrorContains(t, err, "decode error: unable to decompress block type float for key 'cpu,host=A#!~#value': unpackBlock: not enough data for timestamp")
|
||||
tsm1.MoveTsmOnReadErr(err, zap.NewNop(), func(strings []string, strings2 []string, f func([]tsm1.TSMFile)) error {
|
||||
require.Equal(t, 1, len(strings))
|
||||
require.Equal(t, strings[0], f3)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// Ensures that a compaction will properly merge multiple TSM files
|
||||
|
|
|
@ -2332,16 +2332,7 @@ func (s *compactionStrategy) compactGroup() {
|
|||
|
||||
log.Warn("Error compacting TSM files", zap.Error(err))
|
||||
|
||||
// We hit a bad TSM file - rename so the next compaction can proceed.
|
||||
if _, ok := err.(errBlockRead); ok {
|
||||
path := err.(errBlockRead).file
|
||||
log.Info("Renaming a corrupt TSM file due to compaction error", zap.Error(err))
|
||||
if err := s.fileStore.ReplaceWithCallback([]string{path}, nil, nil); err != nil {
|
||||
log.Info("Error removing bad TSM file", zap.Error(err))
|
||||
} else if e := os.Rename(path, path+"."+BadTSMFileExtension); e != nil {
|
||||
log.Info("Error renaming corrupt TSM file", zap.Error((err)))
|
||||
}
|
||||
}
|
||||
MoveTsmOnReadErr(err, log, s.fileStore.ReplaceWithCallback)
|
||||
|
||||
atomic.AddInt64(s.errorStat, 1)
|
||||
time.Sleep(time.Second)
|
||||
|
@ -2370,6 +2361,20 @@ func (s *compactionStrategy) compactGroup() {
|
|||
atomic.AddInt64(s.successStat, 1)
|
||||
}
|
||||
|
||||
func MoveTsmOnReadErr(err error, log *zap.Logger, ReplaceWithCallback func([]string, []string, func([]TSMFile)) error) {
|
||||
var blockReadErr errBlockRead
|
||||
// We hit a bad TSM file - rename so the next compaction can proceed.
|
||||
if ok := errors.As(err, &blockReadErr); ok {
|
||||
path := blockReadErr.file
|
||||
log.Error("Renaming a corrupt TSM file due to compaction error", zap.String("file", path), zap.Error(err))
|
||||
if err := ReplaceWithCallback([]string{path}, nil, nil); err != nil {
|
||||
log.Info("Error removing bad TSM file", zap.String("file", path), zap.Error(err))
|
||||
} else if e := os.Rename(path, path+"."+BadTSMFileExtension); e != nil {
|
||||
log.Info("Error renaming corrupt TSM file", zap.String("file", path), zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// levelCompactionStrategy returns a compactionStrategy for the given level.
|
||||
// It returns nil if there are no TSM files to compact.
|
||||
func (e *Engine) levelCompactionStrategy(group CompactionGroup, fast bool, level int) *compactionStrategy {
|
||||
|
|
|
@ -3,12 +3,12 @@ package tsm1
|
|||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
@ -1867,8 +1867,16 @@ func TestTSMReader_References(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBatchKeyIterator_Errors(t *testing.T) {
|
||||
const MaxErrors = 10
|
||||
|
||||
const testFile = "testFile.tsm"
|
||||
errorCases := []error{
|
||||
fmt.Errorf("test error 0"),
|
||||
errBlockRead{
|
||||
file: testFile,
|
||||
err: fmt.Errorf("decode error: unable to decompress block type %s for key '%s': %v",
|
||||
"string", "summary#!~#mfu_estimated_percent", fmt.Errorf("test invalid error 1"))},
|
||||
fmt.Errorf("test error 2"),
|
||||
fmt.Errorf("test error 3"),
|
||||
}
|
||||
dir, name := createTestTSM(t)
|
||||
defer os.RemoveAll(dir)
|
||||
fr, err := os.Open(name)
|
||||
|
@ -1893,27 +1901,30 @@ func TestBatchKeyIterator_Errors(t *testing.T) {
|
|||
}()
|
||||
interrupts := make(chan struct{})
|
||||
var iter KeyIterator
|
||||
if iter, err = NewTSMBatchKeyIterator(3, false, MaxErrors, interrupts, []string{name}, r); err != nil {
|
||||
if iter, err = NewTSMBatchKeyIterator(3, false, len(errorCases), interrupts, []string{name}, r); err != nil {
|
||||
t.Fatalf("unexpected error creating tsmBatchKeyIterator: %v", err)
|
||||
}
|
||||
var i int
|
||||
for i = 0; i < MaxErrors*2; i++ {
|
||||
saved := iter.(*tsmBatchKeyIterator).AppendError(fmt.Errorf("fake error: %d", i))
|
||||
if i < MaxErrors && !saved {
|
||||
t.Fatalf("error unexpectedly not saved: %d", i)
|
||||
}
|
||||
if i >= MaxErrors && saved {
|
||||
t.Fatalf("error unexpectedly saved: %d", i)
|
||||
for i = 0; i < 2; i++ {
|
||||
for j, e := range errorCases {
|
||||
saved := iter.(*tsmBatchKeyIterator).AppendError(e)
|
||||
index := (1 + j) + (len(errorCases) * i)
|
||||
if index < len(errorCases) && !saved {
|
||||
t.Fatalf("error unexpectedly not saved: %d", index)
|
||||
} else if index > len(errorCases) && saved {
|
||||
t.Fatalf("error unexpectedly saved: %d", index)
|
||||
}
|
||||
}
|
||||
errs := iter.Err()
|
||||
if errCnt := len(errs.(TSMErrors)); errCnt != (MaxErrors + 1) {
|
||||
t.Fatalf("saved wrong number of errors: expected %d, got %d", MaxErrors, errCnt)
|
||||
}
|
||||
expected := fmt.Sprintf("additional errors dropped: %d", i-MaxErrors)
|
||||
if strings.Compare(errs.(TSMErrors)[MaxErrors].Error(), expected) != 0 {
|
||||
t.Fatalf("expected: '%s', got: '%s", expected, errs.(TSMErrors)[MaxErrors].Error())
|
||||
}
|
||||
var blockReadError errBlockRead
|
||||
iterErr := iter.Err()
|
||||
joinErr, ok := iterErr.(interface{ Unwrap() []error })
|
||||
require.True(t, ok, "errs does not implement Unwrap() as a joinError should: %T", iterErr)
|
||||
require.Equal(t, 1+len(errorCases), len(joinErr.Unwrap()), "saved wrong number of errors")
|
||||
require.True(t, errors.As(iterErr, &blockReadError), "expected errBlockRead error, got %T", err)
|
||||
require.Equal(t, testFile, blockReadError.file, "unexpected file name in error")
|
||||
expected := fmt.Sprintf("additional errors dropped: %d", len(errorCases))
|
||||
require.Equal(t, expected, joinErr.Unwrap()[len(errorCases)].Error(), "unexpected error message for dropped errors")
|
||||
}
|
||||
|
||||
func createTestTSM(t *testing.T) (dir string, name string) {
|
||||
|
|
Loading…
Reference in New Issue