diff --git a/internal/datacoord/garbage_collector.go b/internal/datacoord/garbage_collector.go index 843057b511..e4e26caf1f 100644 --- a/internal/datacoord/garbage_collector.go +++ b/internal/datacoord/garbage_collector.go @@ -21,6 +21,8 @@ import ( "sync" "time" + "github.com/milvus-io/milvus/internal/common" + "github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/proto/commonpb" "github.com/milvus-io/milvus/internal/proto/datapb" @@ -138,8 +140,8 @@ func (gc *garbageCollector) scan() { continue } - segmentID, err := storage.ParseSegmentIDByBinlog(infoKey) - if err != nil { + segmentID, err := storage.ParseSegmentIDByBinlog(gc.option.rootPath, infoKey) + if err != nil && !common.IsIgnorableError(err) { log.Error("parse segment id error", zap.String("infoKey", infoKey), zap.Error(err)) continue } diff --git a/internal/storage/binlog_util.go b/internal/storage/binlog_util.go index 78bc2ca0dd..a2bad70e0a 100644 --- a/internal/storage/binlog_util.go +++ b/internal/storage/binlog_util.go @@ -4,15 +4,32 @@ import ( "fmt" "strconv" "strings" + + "github.com/milvus-io/milvus/internal/common" ) // ParseSegmentIDByBinlog parse segment id from binlog paths // if path format is not expected, returns error -func ParseSegmentIDByBinlog(path string) (UniqueID, error) { - // binlog path should consist of "[prefix]/insertLog/collID/partID/segID/fieldID/fileName" - keyStr := strings.Split(path, "/") - if len(keyStr) != 7 { - return 0, fmt.Errorf("%s is not a valid binlog path", path) +func ParseSegmentIDByBinlog(rootPath, path string) (UniqueID, error) { + // check path contains rootPath as prefix + if !strings.HasPrefix(path, rootPath) { + return 0, fmt.Errorf("path \"%s\" does not contains rootPath \"%s\"", path, rootPath) } - return strconv.ParseInt(keyStr[len(keyStr)-3], 10, 64) + p := path[len(rootPath):] + + // remove leading "/" + for strings.HasPrefix(p, "/") { + p = p[1:] + } + + // binlog path should consist of "[log_type]/collID/partID/segID/fieldID/fileName" + keyStr := strings.Split(p, "/") + if len(keyStr) == 5 { + return 0, common.NewIgnorableError(fmt.Errorf("%s does not contains a file name", path)) + } + + if len(keyStr) == 6 { + return strconv.ParseInt(keyStr[len(keyStr)-3], 10, 64) + } + return 0, fmt.Errorf("%s is not a valid binlog path", path) } diff --git a/internal/storage/binlog_util_test.go b/internal/storage/binlog_util_test.go index 7c8e16e050..988a2e9d83 100644 --- a/internal/storage/binlog_util_test.go +++ b/internal/storage/binlog_util_test.go @@ -3,53 +3,71 @@ package storage import ( "testing" + "github.com/milvus-io/milvus/internal/common" + "github.com/stretchr/testify/assert" ) func TestParseSegmentIDByBinlog(t *testing.T) { type testCase struct { - name string - input string - expectError bool - expectID UniqueID + name string + input string + rootPath string + expectError bool + expectID UniqueID + isIgnorableError bool } cases := []testCase{ { name: "normal case", input: "files/insertLog/123/456/1/101/10000001", + rootPath: "files", expectError: false, expectID: 1, }, { name: "normal case long id", input: "files/insertLog/123/456/434828745294479362/101/10000001", + rootPath: "files", expectError: false, expectID: 434828745294479362, }, { name: "bad format", input: "files/123", + rootPath: "files", expectError: true, }, { name: "empty input", input: "", + rootPath: "files", expectError: true, }, { name: "non-number segmentid", input: "files/insertLog/123/456/segment_id/101/10000001", + rootPath: "files", + expectError: true, + }, + { + name: "file name doesn't exists", + input: "tenant1/files/delta_log/609/610/457/793", + rootPath: "tenant1/files", expectError: true, }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - id, err := ParseSegmentIDByBinlog(tc.input) + id, err := ParseSegmentIDByBinlog(tc.rootPath, tc.input) if tc.expectError { assert.Error(t, err) + if tc.isIgnorableError { + assert.True(t, common.IsIgnorableError(err)) + } } else { assert.NoError(t, err) assert.Equal(t, tc.expectID, id)