fix: `influxd-ctl backup` will create a working backup when only `-shard` given (#22998)

`influxd-ctl backup` will now create a working backup when only the `-shard`
option is given. Previously this would create a backup that could not be
restored.

fixes #16751
pull/23091/head
Geoffrey Wossum 2022-01-28 14:59:20 -06:00 committed by GitHub
parent eb3bc7069f
commit 1ab50d7557
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 100 additions and 29 deletions

View File

@ -89,7 +89,8 @@ func (cmd *Command) Run(args ...string) error {
if err := cmd.backupMetastore(); err != nil {
return err
}
err = cmd.backupShard(cmd.database, cmd.retentionPolicy, cmd.shardID)
// Pass true for verifyLocation so we verify that db and rp are correct if given
err = cmd.backupShard(cmd.database, cmd.retentionPolicy, cmd.shardID, true)
} else if cmd.retentionPolicy != "" {
// always backup the metastore
@ -210,7 +211,7 @@ func (cmd *Command) parseFlags(args []string) (err error) {
// Ensure that only one arg is specified.
if fs.NArg() != 1 {
return errors.New("Exactly one backup path is required.")
return errors.New("exactly one backup path is required")
}
cmd.path = fs.Arg(0)
@ -219,18 +220,65 @@ func (cmd *Command) parseFlags(args []string) (err error) {
return err
}
func (cmd *Command) backupShard(db, rp, sid string) (err error) {
// backupShard will backup a single shard. sid is the shard ID as a decimal string and
// must be given. db and rp are the database and retention policy the shard belongs to,
// respectively. For both db and rp, if they are not given the snapshot service will
// be queried to find their correct value. Also for both db and rp, if they are given
// and verifyLocation is true, then the snapshotter service will be queried and the value
// will be checked against the databases value. If there is a mismatch, an error is returned.
func (cmd *Command) backupShard(db, rp, sid string, verifyLocation bool) (err error) {
reqType := snapshotter.RequestShardBackup
if !cmd.isBackup {
reqType = snapshotter.RequestShardExport
}
id, err := strconv.ParseUint(sid, 10, 64)
shardId, err := strconv.ParseUint(sid, 10, 64)
if err != nil {
return err
}
shardArchivePath, err := cmd.nextPath(filepath.Join(cmd.path, fmt.Sprintf(backup_util.BackupFilePattern, db, rp, id)))
// Get info about shard retention policy and database to fill-in missing db / rp
if db == "" || rp == "" || verifyLocation {
infoReq := &snapshotter.Request{
Type: snapshotter.RequestDatabaseInfo,
BackupDatabase: db, // use db if we did happen to get it to limit result set
}
infoResponse, err := cmd.requestInfo(infoReq)
if err != nil {
return err
}
var shardFound bool
for _, path := range infoResponse.Paths {
checkDb, checkRp, checkSid, err := backup_util.DBRetentionAndShardFromPath(path)
if err != nil {
return fmt.Errorf("error while finding shard's db/rp: %w", err)
}
if sid == checkSid {
// Found the shard, now fill-in / check db and rp
if db == "" {
db = checkDb
} else if verifyLocation && db != checkDb {
return fmt.Errorf("expected shard %d in database '%s', but found '%s'", shardId, db, checkDb)
}
if rp == "" {
rp = checkRp
} else if verifyLocation && rp != checkRp {
return fmt.Errorf("expected shard %d with retention policy '%s', but found '%s'", shardId, rp, checkRp)
}
shardFound = true
break
}
}
if !shardFound {
return fmt.Errorf("did not find shard %d", shardId)
}
}
shardArchivePath, err := cmd.nextPath(filepath.Join(cmd.path, fmt.Sprintf(backup_util.BackupFilePattern, db, rp, shardId)))
if err != nil {
return err
}
@ -246,7 +294,7 @@ func (cmd *Command) backupShard(db, rp, sid string) (err error) {
Type: reqType,
BackupDatabase: db,
BackupRetentionPolicy: rp,
ShardID: id,
ShardID: shardId,
Since: cmd.since,
ExportStart: cmd.start,
ExportEnd: cmd.end,
@ -306,15 +354,11 @@ func (cmd *Command) backupShard(db, rp, sid string) (err error) {
if err != nil {
return err
}
var shardID uint64
shardID, err = strconv.ParseUint(sid, 10, 64)
if err != nil {
return err
}
cmd.manifest.Files = append(cmd.manifest.Files, backup_util.Entry{
Database: db,
Policy: rp,
ShardID: shardID,
ShardID: shardId,
FileName: filename,
Size: cw.Total,
LastModified: 0,
@ -378,7 +422,8 @@ func (cmd *Command) backupResponsePaths(response *snapshotter.Response) error {
return err
}
err = cmd.backupShard(db, rp, id)
// Don't need to verify db and rp, we know they're correct here
err = cmd.backupShard(db, rp, id, false)
if err != nil && !cmd.continueOnError {
cmd.StderrLogger.Printf("error (%s) when backing up db: %s, rp %s, shard %s. continuing backup on remaining shards", err, db, rp, id)
@ -417,7 +462,7 @@ func (cmd *Command) backupMetastore() (retErr error) {
}
if n < 8 {
return errors.New("Not enough bytes data to verify")
return errors.New("not enough bytes data to verify")
}
magic := binary.BigEndian.Uint64(magicByte[:])

View File

@ -31,6 +31,9 @@ func TestServer_BackupAndRestore(t *testing.T) {
portableBackupDir, _ := ioutil.TempDir("", "backup")
defer os.RemoveAll(portableBackupDir)
shardBackupDir, _ := ioutil.TempDir("", "backup")
defer os.RemoveAll(shardBackupDir)
db := "mydb"
rp := "forever"
@ -119,23 +122,26 @@ func TestServer_BackupAndRestore(t *testing.T) {
}
// now backup
cmd := backup.NewCommand()
_, port, err := net.SplitHostPort(config.BindAddress)
if err != nil {
t.Fatal(err)
}
hostAddress := net.JoinHostPort("localhost", port)
if err := cmd.Run("-host", hostAddress, "-database", "mydb", fullBackupDir); err != nil {
if err := backup.NewCommand().Run("-host", hostAddress, "-database", "mydb", fullBackupDir); err != nil {
t.Fatalf("error backing up: %s, hostAddress: %s", err.Error(), hostAddress)
}
time.Sleep(time.Second)
if err := cmd.Run("-host", hostAddress, "-database", "mydb", "-start", "1970-01-01T00:00:00.001Z", "-end", "1970-01-01T00:00:00.009Z", partialBackupDir); err != nil {
if err := backup.NewCommand().Run("-host", hostAddress, "-database", "mydb", "-start", "1970-01-01T00:00:00.001Z", "-end", "1970-01-01T00:00:00.009Z", partialBackupDir); err != nil {
t.Fatalf("error backing up: %s, hostAddress: %s", err.Error(), hostAddress)
}
// also testing short-form flag here
if err := cmd.Run("-portable", "-host", hostAddress, "-db", "mydb", "-start", "1970-01-01T00:00:00.001Z", "-end", "1970-01-01T00:00:00.009Z", portableBackupDir); err != nil {
if err := backup.NewCommand().Run("-portable", "-host", hostAddress, "-db", "mydb", "-start", "1970-01-01T00:00:00.001Z", "-end", "1970-01-01T00:00:00.009Z", portableBackupDir); err != nil {
t.Fatalf("error backing up: %s, hostAddress: %s", err.Error(), hostAddress)
}
if err := backup.NewCommand().Run("-portable", "-host", hostAddress, "-shard", "1", shardBackupDir); err != nil {
t.Fatalf("error backing up: %s, hostAddress: %s", err.Error(), hostAddress)
}
@ -158,9 +164,7 @@ func TestServer_BackupAndRestore(t *testing.T) {
}
// restore
cmd := restore.NewCommand()
if err := cmd.Run("-metadir", config.Meta.Dir, "-datadir", config.Data.Dir, "-database", "mydb", fullBackupDir); err != nil {
if err := restore.NewCommand().Run("-metadir", config.Meta.Dir, "-datadir", config.Data.Dir, "-database", "mydb", fullBackupDir); err != nil {
t.Fatalf("error restoring: %s", err.Error())
}
@ -199,7 +203,7 @@ func TestServer_BackupAndRestore(t *testing.T) {
}
defer proxy.Close()
go proxy.Serve()
cmd.Run("-host", proxy.Addr().String(), "-online", "-newdb", "mydbbak", "-db", "mydb", partialBackupDir)
restore.NewCommand().Run("-host", proxy.Addr().String(), "-online", "-newdb", "mydbbak", "-db", "mydb", partialBackupDir)
// wait for the import to finish, and unlock the shard engine.
time.Sleep(time.Second)
@ -214,7 +218,7 @@ func TestServer_BackupAndRestore(t *testing.T) {
}
// 3. portable should be the same as the non-portable live restore
cmd.Run("-host", hostAddress, "-portable", "-newdb", "mydbbak2", "-db", "mydb", portableBackupDir)
restore.NewCommand().Run("-host", hostAddress, "-portable", "-newdb", "mydbbak2", "-db", "mydb", portableBackupDir)
// wait for the import to finish, and unlock the shard engine.
time.Sleep(time.Second)
@ -228,21 +232,34 @@ func TestServer_BackupAndRestore(t *testing.T) {
t.Fatalf("query results wrong:\n\texp: %s\n\tgot: %s", partialExpected, res)
}
// 4. backup all DB's, then drop them, then restore them and all 3 above tests should pass again.
// now backup
bCmd := backup.NewCommand()
// 4. The shard backup should be same as the portable live restore
restore.NewCommand().Run("-host", hostAddress, "-portable", "-newdb", "mydbbak3", "-db", "mydb", shardBackupDir)
if err := bCmd.Run("-portable", "-host", hostAddress, portableBackupDir); err != nil {
// wait for the import to finish, and unlock the shard engine.
time.Sleep(time.Second)
res, err = s.Query(`select * from "mydbbak3"."forever"."myseries"`)
if err != nil {
t.Fatalf("error querying: %s", err.Error())
}
if res != expected {
t.Fatalf("query results wrong:\n\texp: %s\n\tgot: %s", expected, res)
}
// 5. backup all DB's, then drop them, then restore them and all 3 above tests should pass again.
// now backup
if err := backup.NewCommand().Run("-portable", "-host", hostAddress, portableBackupDir); err != nil {
t.Fatalf("error backing up: %s, hostAddress: %s", err.Error(), hostAddress)
}
_, err = s.Query(`drop database mydb; drop database mydbbak; drop database mydbbak2;`)
_, err = s.Query(`drop database mydb; drop database mydbbak; drop database mydbbak2; drop database mydbbak3`)
if err != nil {
t.Fatalf("Error dropping databases %s", err.Error())
}
// 3. portable should be the same as the non-portable live restore
cmd.Run("-host", hostAddress, "-portable", portableBackupDir)
restore.NewCommand().Run("-host", hostAddress, "-portable", portableBackupDir)
// wait for the import to finish, and unlock the shard engine.
time.Sleep(3 * time.Second)
@ -271,6 +288,15 @@ func TestServer_BackupAndRestore(t *testing.T) {
t.Fatalf("query results wrong:\n\texp: %s\n\tgot: %s", partialExpected, res)
}
res, err = s.Query(`select * from "mydbbak3"."forever"."myseries"`)
if err != nil {
t.Fatalf("error querying: %s", err.Error())
}
if res != expected {
t.Fatalf("query results wrong:\n\texp: %s\n\tgot: %s", expected, res)
}
res, err = s.Query(`select * from "mydb"."forever"."myseries"`)
if err != nil {
t.Fatalf("error querying: %s", err.Error())