pkg/archive: Backup extractor avoids max path limits

Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
pull/8449/head
Tiger Kaovilai 2024-11-24 02:04:49 -05:00
parent 9f0026d7dc
commit a2699e765e
No known key found for this signature in database
GPG Key ID: F05DADBB55627443
8 changed files with 206 additions and 32 deletions

View File

@ -0,0 +1 @@
pkg/archive: Backup extractor avoids max path limits

View File

@ -59,7 +59,7 @@ func InvokeDeleteActions(ctx *Context) error {
}
// get items out of backup tarball into a temp directory
dir, err := archive.NewExtractor(ctx.Log, ctx.Filesystem).UnzipAndExtractBackup(ctx.BackupReader)
dir, longNames, err := archive.NewExtractor(ctx.Log, ctx.Filesystem).UnzipAndExtractBackup(ctx.BackupReader)
if err != nil {
return errors.Wrapf(err, "error extracting backup")
}
@ -71,7 +71,7 @@ func InvokeDeleteActions(ctx *Context) error {
ctx.Log.Debugf("Downloaded and extracted the backup file to: %s", dir)
backupResources, err := archive.NewParser(ctx.Log, ctx.Filesystem).Parse(dir)
backupResources, err := archive.NewParser(ctx.Log, ctx.Filesystem).Parse(dir, longNames)
if existErr := errors.Is(err, archive.ErrNotExist); existErr {
ctx.Log.Debug("ignore invoking delete item actions: ", err)
return nil

View File

@ -19,6 +19,8 @@ package archive
import (
"archive/tar"
"compress/gzip"
"crypto/sha256"
"fmt"
"io"
"path/filepath"
@ -42,11 +44,11 @@ func NewExtractor(log logrus.FieldLogger, fs filesystem.Interface) *Extractor {
}
// UnzipAndExtractBackup extracts a reader on a gzipped tarball to a local temp directory
func (e *Extractor) UnzipAndExtractBackup(src io.Reader) (string, error) {
func (e *Extractor) UnzipAndExtractBackup(src io.Reader) (string, map[string]string, error) {
gzr, err := gzip.NewReader(src)
if err != nil {
e.log.Infof("error creating gzip reader: %v", err)
return "", err
return "", nil, err
}
defer gzr.Close()
@ -66,13 +68,18 @@ func (e *Extractor) writeFile(target string, tarRdr *tar.Reader) error {
return nil
}
func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) {
// 255 is common limit for file names.
// -5 to account for .json extension
const maxPathLength = 250
// returns tempfir containing backup contents, map[sha256]longNames, error
func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, map[string]string, error) {
dir, err := e.fs.TempDir("", "")
if err != nil {
e.log.Infof("error creating temp dir: %v", err)
return "", err
return "", nil, err
}
longNames := make(map[string]string)
for {
header, err := tarRdr.Next()
@ -81,17 +88,23 @@ func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) {
}
if err != nil {
e.log.Infof("error reading tar: %v", err)
return "", err
return "", nil, err
}
target := filepath.Join(dir, header.Name) //nolint:gosec // Internal usage. No need to check.
// If the target is longer than maxPathLength, we'll use the sha256 of the header name as the filename.
// https://github.com/vmware-tanzu/velero/issues/8434
if len(target) > maxPathLength {
shortSha256Name := fmt.Sprintf("%x", sha256.Sum256([]byte(header.Name))) // sha256 name is 64 characters
longNames[shortSha256Name] = header.Name
target = filepath.Join(dir, shortSha256Name)
}
switch header.Typeflag {
case tar.TypeDir:
err := e.fs.MkdirAll(target, header.FileInfo().Mode())
if err != nil {
e.log.Infof("mkdirall error: %v", err)
return "", err
return "", nil, err
}
case tar.TypeReg:
@ -99,16 +112,15 @@ func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) {
err := e.fs.MkdirAll(filepath.Dir(target), header.FileInfo().Mode())
if err != nil {
e.log.Infof("mkdirall error: %v", err)
return "", err
return "", nil, err
}
// create the file
if err := e.writeFile(target, tarRdr); err != nil {
e.log.Infof("error copying: %v", err)
return "", err
return "", nil, err
}
}
}
return dir, nil
return dir, longNames, nil
}

View File

@ -18,11 +18,15 @@ package archive
import (
"archive/tar"
"bytes"
"compress/gzip"
"io"
"os"
"path/filepath"
"strings"
"testing"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
"github.com/vmware-tanzu/velero/pkg/test"
@ -75,7 +79,7 @@ func TestUnzipAndExtractBackup(t *testing.T) {
file, err := ext.fs.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644)
require.NoError(t, err)
_, err = ext.UnzipAndExtractBackup(file.(io.Reader))
_, _, err = ext.UnzipAndExtractBackup(file.(io.Reader))
if tc.wantErr && (err == nil) {
t.Errorf("%s: wanted error but got nil", tc.name)
}
@ -149,3 +153,51 @@ func createRegular(fs filesystem.Interface) (string, error) {
return outName, nil
}
func TestReadBackupWithLongFilenames(t *testing.T) {
log := logrus.New()
fs := test.NewFakeFileSystem()
e := NewExtractor(log, fs)
// Create a tar reader with a file that has a very long name
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
// Create a filename longer than maxPathLength
longFilename := strings.Repeat("a", maxPathLength+10) + ".txt"
content := []byte("test content")
hdr := &tar.Header{
Name: longFilename,
Mode: 0600,
Size: int64(len(content)),
Typeflag: tar.TypeReg,
}
if err := tw.WriteHeader(hdr); err != nil {
t.Fatal(err)
}
if _, err := tw.Write(content); err != nil {
t.Fatal(err)
}
tw.Close()
// Read the backup
dir, longNames, err := e.readBackup(tar.NewReader(&buf))
// Verify results
require.NoError(t, err)
require.NotEmpty(t, dir)
require.NotEmpty(t, longNames)
// Verify that a shortened SHA256 name was created and mapped correctly
found := false
for shortName, originalName := range longNames {
if originalName == longFilename {
found = true
// Verify the short name length is within limits
require.LessOrEqual(t, len(filepath.Join(dir, shortName)), maxPathLength)
}
}
require.True(t, found, "Long filename mapping not found")
}

View File

@ -64,7 +64,7 @@ func NewParser(log logrus.FieldLogger, fs filesystem.Interface) *Parser {
// Parse reads an extracted backup on the file system and returns
// a structured catalog of the resources and items contained within it.
func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
func (p *Parser) Parse(dir string, longNames map[string]string) (map[string]*ResourceItems, error) {
// ensure top-level "resources" directory exists, and read subdirectories
// of it, where each one is expected to correspond to a resource.
resourcesDir := filepath.Join(dir, velerov1api.ResourcesDir)
@ -81,9 +81,8 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
p.log.Warnf("Ignoring unexpected file %q in directory %q", resourceDir.Name(), strings.TrimPrefix(resourcesDir, dir+"/"))
continue
}
resourceItems := &ResourceItems{
GroupResource: resourceDir.Name(),
GroupResource: itemNameIfHasLongName(resourceDir.Name(), longNames),
ItemsByNamespace: map[string][]string{},
}
@ -95,7 +94,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
return nil, errors.Wrapf(err, "error checking for existence of directory %q", strings.TrimPrefix(clusterScopedDir, dir+"/"))
}
if exists {
items, err := p.getResourceItemsForScope(clusterScopedDir, dir)
items, err := p.getResourceItemsForScope(clusterScopedDir, dir, longNames)
if err != nil {
return nil, err
}
@ -124,7 +123,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
continue
}
items, err := p.getResourceItemsForScope(filepath.Join(namespaceScopedDir, namespaceDir.Name()), dir)
items, err := p.getResourceItemsForScope(filepath.Join(namespaceScopedDir, namespaceDir.Name()), dir, longNames)
if err != nil {
return nil, err
}
@ -135,7 +134,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
}
}
resources[resourceDir.Name()] = resourceItems
resources[itemNameIfHasLongName(resourceDir.Name(), longNames)] = resourceItems
}
return resources, nil
@ -143,7 +142,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
// getResourceItemsForScope returns the list of items with a namespace or
// cluster-scoped subdirectory for a specific resource.
func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string) ([]string, error) {
func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string, longNames map[string]string) ([]string, error) {
files, err := p.fs.ReadDir(dir)
if err != nil {
return nil, errors.Wrapf(err, "error reading contents of directory %q", strings.TrimPrefix(dir, archiveRootDir+"/"))
@ -156,12 +155,22 @@ func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string) ([]string,
continue
}
items = append(items, strings.TrimSuffix(file.Name(), ".json"))
items = append(items, itemNameIfHasLongName(strings.TrimSuffix(file.Name(), ".json"), longNames))
}
return items, nil
}
func itemNameIfHasLongName(itemName string, longNames map[string]string) string {
if longNames == nil {
return itemName
}
if longName, ok := longNames[itemName]; ok {
return longName
}
return itemName
}
// checkAndReadDir is a wrapper around fs.DirExists and fs.ReadDir that does checks
// and returns errors if directory cannot be read.
func (p *Parser) checkAndReadDir(dir string) ([]os.FileInfo, error) {
@ -183,7 +192,7 @@ func (p *Parser) checkAndReadDir(dir string) ([]os.FileInfo, error) {
// ParseGroupVersions extracts the versions for each API Group from the backup
// directory names and stores them in a metav1 APIGroup object.
func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, error) {
func (p *Parser) ParseGroupVersions(dir string, longNames map[string]string) (map[string]metav1.APIGroup, error) {
resourcesDir := filepath.Join(dir, velerov1api.ResourcesDir)
// Get the subdirectories inside the "resources" directory. The subdirectories
@ -197,8 +206,9 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err
// Loop through the resource.group directory names.
for _, rgd := range rgDirs {
rgdName := itemNameIfHasLongName(rgd.Name(), longNames)
group := metav1.APIGroup{
Name: extractGroupName(rgd.Name()),
Name: extractGroupName(rgdName),
}
rgdPath := filepath.Join(resourcesDir, rgd.Name())
@ -213,7 +223,7 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err
var supportedVersions []metav1.GroupVersionForDiscovery
for _, gvd := range gvDirs {
gvdName := gvd.Name()
gvdName := itemNameIfHasLongName(gvd.Name(), longNames)
// Don't save the namespaces or clusters directories in list of
// supported API Group Versions.
@ -241,7 +251,7 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err
group.Versions = supportedVersions
resourceAGs[rgd.Name()] = group
resourceAGs[rgdName] = group
}
return resourceAGs, nil

View File

@ -32,6 +32,7 @@ func TestParse(t *testing.T) {
name string
files []string
dir string
longNames map[string]string
wantErrMsg error
want map[string]*ResourceItems
}{
@ -89,6 +90,59 @@ func TestParse(t *testing.T) {
},
},
},
{
name: "a mix of long api name, long file name cluster-scoped and namespaced items across multiple resources are correctly returned",
dir: "root-dir",
longNames: map[string]string{
"somesha256": "item-1",
"longapinamesha256": "thelongestapinameyouhaveeverseenever",
},
files: []string{
"root-dir/resources/widgets.foo/cluster/somesha256.json",
"root-dir/resources/widgets.foo/cluster/item-2.json",
"root-dir/resources/widgets.foo/namespaces/ns-1/item-1.json",
"root-dir/resources/widgets.foo/namespaces/ns-1/item-2.json",
"root-dir/resources/widgets.foo/namespaces/ns-2/item-1.json",
"root-dir/resources/longapinamesha256/namespaces/ns-2/item-2.json",
"root-dir/resources/dongles.foo/cluster/item-3.json",
"root-dir/resources/dongles.foo/cluster/item-4.json",
"root-dir/resources/dongles.bar/namespaces/ns-3/item-3.json",
"root-dir/resources/dongles.bar/namespaces/ns-3/item-4.json",
"root-dir/resources/dongles.bar/namespaces/ns-4/item-5.json",
"root-dir/resources/dongles.bar/namespaces/ns-4/item-6.json",
},
want: map[string]*ResourceItems{
"widgets.foo": {
GroupResource: "widgets.foo",
ItemsByNamespace: map[string][]string{
"": {"item-2", "item-1"},
"ns-1": {"item-1", "item-2"},
"ns-2": {"item-1"},
},
},
"thelongestapinameyouhaveeverseenever": {
GroupResource: "thelongestapinameyouhaveeverseenever",
ItemsByNamespace: map[string][]string{
"ns-2": {"item-2"},
},
},
"dongles.foo": {
GroupResource: "dongles.foo",
ItemsByNamespace: map[string][]string{
"": {"item-3", "item-4"},
},
},
"dongles.bar": {
GroupResource: "dongles.bar",
ItemsByNamespace: map[string][]string{
"ns-3": {"item-3", "item-4"},
"ns-4": {"item-5", "item-6"},
},
},
},
},
}
for _, tc := range tests {
@ -108,7 +162,7 @@ func TestParse(t *testing.T) {
}
}
res, err := p.Parse(tc.dir)
res, err := p.Parse(tc.dir, tc.longNames)
if tc.wantErrMsg != nil {
assert.ErrorIs(t, err, tc.wantErrMsg, "Error should be: %v, got: %v", tc.wantErrMsg, err)
} else {
@ -125,6 +179,7 @@ func TestParseGroupVersions(t *testing.T) {
files []string
backupDir string
wantErrMsg error
longNames map[string]string
want map[string]metav1.APIGroup
}{
{
@ -203,6 +258,48 @@ func TestParseGroupVersions(t *testing.T) {
},
},
},
{
name: "when resource directories and files have long names, they are correctly mapped and parsed",
backupDir: "/var/folders",
longNames: map[string]string{
"longhash123": "pods",
"longhash456": "deployments.apps",
"filehash789": "very-long-pod-name-that-exceeds-normal-length-limits-123456789",
"filehashABC": "another-long-deployment-name-with-many-characters-123456789",
},
files: []string{
"/var/folders/resources/longhash123/v1-preferredversion/namespaces/default/filehash789.json",
"/var/folders/resources/longhash456/v1-preferredversion/namespaces/default/filehashABC.json",
},
want: map[string]metav1.APIGroup{
"pods": {
Name: "",
Versions: []metav1.GroupVersionForDiscovery{
{
GroupVersion: "v1",
Version: "v1",
},
},
PreferredVersion: metav1.GroupVersionForDiscovery{
GroupVersion: "v1",
Version: "v1",
},
},
"deployments.apps": {
Name: "apps",
Versions: []metav1.GroupVersionForDiscovery{
{
GroupVersion: "apps/v1",
Version: "v1",
},
},
PreferredVersion: metav1.GroupVersionForDiscovery{
GroupVersion: "apps/v1",
Version: "v1",
},
},
},
},
}
for _, tc := range tests {
@ -222,7 +319,7 @@ func TestParseGroupVersions(t *testing.T) {
}
}
res, err := p.ParseGroupVersions(tc.backupDir)
res, err := p.ParseGroupVersions(tc.backupDir, tc.longNames)
if tc.wantErrMsg != nil {
assert.ErrorIs(t, err, tc.wantErrMsg, "Error should be: %v, got: %v", tc.wantErrMsg, err)
} else {

View File

@ -155,7 +155,7 @@ func (ctx *restoreContext) gatherSourceTargetUserGroupVersions() (
map[string]metav1.APIGroup,
error,
) {
sourceRGVersions, err := archive.NewParser(ctx.log, ctx.fileSystem).ParseGroupVersions(ctx.restoreDir)
sourceRGVersions, err := archive.NewParser(ctx.log, ctx.fileSystem).ParseGroupVersions(ctx.restoreDir, ctx.restoreDirLongNames)
if err != nil {
return nil, nil, nil, errors.Wrap(err, "parsing versions from directory names")
}

View File

@ -335,6 +335,7 @@ type restoreContext struct {
backupReader io.Reader
restore *velerov1api.Restore
restoreDir string
restoreDirLongNames map[string]string
resourceIncludesExcludes *collections.IncludesExcludes
resourceStatusIncludesExcludes *collections.IncludesExcludes
namespaceIncludesExcludes *collections.IncludesExcludes
@ -425,7 +426,7 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
ctx.log.Infof("Starting restore of backup %s", kube.NamespaceAndName(ctx.backup))
dir, err := archive.NewExtractor(ctx.log, ctx.fileSystem).UnzipAndExtractBackup(ctx.backupReader)
dir, longNames, err := archive.NewExtractor(ctx.log, ctx.fileSystem).UnzipAndExtractBackup(ctx.backupReader)
if err != nil {
ctx.log.Infof("error unzipping and extracting: %v", err)
errs.AddVeleroError(err)
@ -455,8 +456,9 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
// Need to set this for additionalItems to be restored.
ctx.restoreDir = dir
ctx.restoreDirLongNames = longNames
backupResources, err := archive.NewParser(ctx.log, ctx.fileSystem).Parse(ctx.restoreDir)
backupResources, err := archive.NewParser(ctx.log, ctx.fileSystem).Parse(ctx.restoreDir, ctx.restoreDirLongNames)
// If ErrNotExist occurs, it implies that the backup to be restored includes zero items.
// Need to add a warning about it and jump out of the function.
if errors.Cause(err) == archive.ErrNotExist {