From e9d777b3363fa17142e085a383f81201fcd25be5 Mon Sep 17 00:00:00 2001 From: congqixia Date: Tue, 26 Jul 2022 19:32:30 +0800 Subject: [PATCH] Fix ParseSegmentIDBinlog panicks with bad input (#18413) Signed-off-by: Congqi Xia --- internal/datacoord/garbage_collector_test.go | 9 ++- internal/storage/binlog_util.go | 8 ++- internal/storage/binlog_util_test.go | 59 ++++++++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 internal/storage/binlog_util_test.go diff --git a/internal/datacoord/garbage_collector_test.go b/internal/datacoord/garbage_collector_test.go index 4af181e10d..433b1bd59c 100644 --- a/internal/datacoord/garbage_collector_test.go +++ b/internal/datacoord/garbage_collector_test.go @@ -239,9 +239,10 @@ func Test_garbageCollector_scan(t *testing.T) { gc.start() gc.scan() gc.clearEtcd() - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), []string{}) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), []string{}) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), []string{}) + // bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2]) validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others) gc.close() @@ -278,6 +279,8 @@ func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, i content := []byte("test") for i := 0; i < n; i++ { reader := bytes.NewReader(content) + // collID/partID/segID/fieldID/fileName + // [str]/id/id/string/string token := path.Join(funcutil.RandomString(8), strconv.Itoa(i), strconv.Itoa(i), funcutil.RandomString(8), funcutil.RandomString(8)) if i == 1 { token = path.Join(funcutil.RandomString(8), strconv.Itoa(i), strconv.Itoa(i), funcutil.RandomString(8)) diff --git a/internal/storage/binlog_util.go b/internal/storage/binlog_util.go index 04a24ed063..78bc2ca0dd 100644 --- a/internal/storage/binlog_util.go +++ b/internal/storage/binlog_util.go @@ -1,12 +1,18 @@ package storage import ( + "fmt" "strconv" "strings" ) +// ParseSegmentIDByBinlog parse segment id from binlog paths +// if path format is not expected, returns error func ParseSegmentIDByBinlog(path string) (UniqueID, error) { - // binlog path should consist of "files/insertLog/collID/partID/segID/fieldID/fileName" + // binlog path should consist of "[prefix]/insertLog/collID/partID/segID/fieldID/fileName" keyStr := strings.Split(path, "/") + if len(keyStr) != 7 { + return 0, fmt.Errorf("%s is not a valid binlog path", path) + } return strconv.ParseInt(keyStr[len(keyStr)-3], 10, 64) } diff --git a/internal/storage/binlog_util_test.go b/internal/storage/binlog_util_test.go new file mode 100644 index 0000000000..7c8e16e050 --- /dev/null +++ b/internal/storage/binlog_util_test.go @@ -0,0 +1,59 @@ +package storage + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseSegmentIDByBinlog(t *testing.T) { + + type testCase struct { + name string + input string + expectError bool + expectID UniqueID + } + + cases := []testCase{ + { + name: "normal case", + input: "files/insertLog/123/456/1/101/10000001", + expectError: false, + expectID: 1, + }, + { + name: "normal case long id", + input: "files/insertLog/123/456/434828745294479362/101/10000001", + expectError: false, + expectID: 434828745294479362, + }, + { + name: "bad format", + input: "files/123", + expectError: true, + }, + { + name: "empty input", + input: "", + expectError: true, + }, + { + name: "non-number segmentid", + input: "files/insertLog/123/456/segment_id/101/10000001", + expectError: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + id, err := ParseSegmentIDByBinlog(tc.input) + if tc.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tc.expectID, id) + } + }) + } +}