Fix ParseSegmentIDBinlog panicks with bad input (#18413)

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/18420/head
congqixia 2022-07-26 19:32:30 +08:00 committed by GitHub
parent 9621b6615d
commit e9d777b336
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 72 additions and 4 deletions

View File

@ -239,9 +239,10 @@ func Test_garbageCollector_scan(t *testing.T) {
gc.start()
gc.scan()
gc.clearEtcd()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), []string{})
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), []string{})
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), []string{})
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
@ -278,6 +279,8 @@ func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, i
content := []byte("test")
for i := 0; i < n; i++ {
reader := bytes.NewReader(content)
// collID/partID/segID/fieldID/fileName
// [str]/id/id/string/string
token := path.Join(funcutil.RandomString(8), strconv.Itoa(i), strconv.Itoa(i), funcutil.RandomString(8), funcutil.RandomString(8))
if i == 1 {
token = path.Join(funcutil.RandomString(8), strconv.Itoa(i), strconv.Itoa(i), funcutil.RandomString(8))

View File

@ -1,12 +1,18 @@
package storage
import (
"fmt"
"strconv"
"strings"
)
// ParseSegmentIDByBinlog parse segment id from binlog paths
// if path format is not expected, returns error
func ParseSegmentIDByBinlog(path string) (UniqueID, error) {
// binlog path should consist of "files/insertLog/collID/partID/segID/fieldID/fileName"
// binlog path should consist of "[prefix]/insertLog/collID/partID/segID/fieldID/fileName"
keyStr := strings.Split(path, "/")
if len(keyStr) != 7 {
return 0, fmt.Errorf("%s is not a valid binlog path", path)
}
return strconv.ParseInt(keyStr[len(keyStr)-3], 10, 64)
}

View File

@ -0,0 +1,59 @@
package storage
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestParseSegmentIDByBinlog(t *testing.T) {
type testCase struct {
name string
input string
expectError bool
expectID UniqueID
}
cases := []testCase{
{
name: "normal case",
input: "files/insertLog/123/456/1/101/10000001",
expectError: false,
expectID: 1,
},
{
name: "normal case long id",
input: "files/insertLog/123/456/434828745294479362/101/10000001",
expectError: false,
expectID: 434828745294479362,
},
{
name: "bad format",
input: "files/123",
expectError: true,
},
{
name: "empty input",
input: "",
expectError: true,
},
{
name: "non-number segmentid",
input: "files/insertLog/123/456/segment_id/101/10000001",
expectError: true,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
id, err := ParseSegmentIDByBinlog(tc.input)
if tc.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tc.expectID, id)
}
})
}
}