Merge branch 'main' into pvnullfix

Signed-off-by: Anshul Ahuja <anshul.ahu@gmail.com>
pull/8174/head
Anshul Ahuja 2024-09-02 04:53:53 +00:00
commit 79156bedad
294 changed files with 12335 additions and 11822 deletions

View File

@ -11,11 +11,12 @@ jobs:
build-cli:
runs-on: ubuntu-latest
steps:
- name: Check out the code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.22'
id: go
go-version-file: 'go.mod'
# Look for a CLI that's made for this PR
- name: Fetch built CLI
id: cache
@ -29,26 +30,11 @@ jobs:
# This key controls the prefixes that we'll look at in the cache to restore from
restore-keys: |
velero-${{ github.event.pull_request.number }}-
- name: Fetch cached go modules
uses: actions/cache@v4
if: steps.cache.outputs.cache-hit != 'true'
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Check out the code
uses: actions/checkout@v4
if: steps.cache.outputs.cache-hit != 'true'
# If no binaries were built for this PR, build it now.
- name: Build Velero CLI
if: steps.cache.outputs.cache-hit != 'true'
run: |
make local
# Check the common CLI against all Kubernetes versions
crd-check:
needs: build-cli

View File

@ -11,11 +11,12 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Check out the code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.22'
id: go
go-version-file: 'go.mod'
# Look for a CLI that's made for this PR
- name: Fetch built CLI
id: cli-cache
@ -31,17 +32,6 @@ jobs:
path: ./velero.tar
# The cache key a combination of the current PR number and the commit SHA
key: velero-image-${{ github.event.pull_request.number }}-${{ github.sha }}
- name: Fetch cached go modules
uses: actions/cache@v4
if: steps.cli-cache.outputs.cache-hit != 'true'
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Check out the code
uses: actions/checkout@v4
if: steps.cli-cache.outputs.cache-hit != 'true' || steps.image-cache.outputs.cache-hit != 'true'
# If no binaries were built for this PR, build it now.
- name: Build Velero CLI
if: steps.cli-cache.outputs.cache-hit != 'true'
@ -67,25 +57,20 @@ jobs:
- 1.27.10
- 1.28.6
- 1.29.1
focus:
# tests to focus on, use `|` to concatenate multiple regexes to run on the same job
# ordered according to e2e_suite_test.go order
- Basic\]\[ClusterResource
- ResourceFiltering
- ResourceModifier|Backups|PrivilegesMgmt\]\[SSR
- Schedule\]\[OrderedResources
- NamespaceMapping\]\[Single\]\[Restic|NamespaceMapping\]\[Multiple\]\[Restic
- Basic\]\[Nodeport
- Basic\]\[StorageClass
labels:
# labels are used to filter running E2E cases
- Basic && (ClusterResource || NodePort || StorageClass)
- ResourceFiltering && !Restic
- ResourceModifier || (Backups && BackupsSync) || PrivilegesMgmt || OrderedResources
- (NamespaceMapping && Single && Restic) || (NamespaceMapping && Multiple && Restic)
fail-fast: false
steps:
- name: Check out the code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.22'
id: go
- name: Check out the code
uses: actions/checkout@v4
go-version-file: 'go.mod'
- name: Install MinIO
run:
docker run -d --rm -p 9000:9000 -e "MINIO_ACCESS_KEY=minio" -e "MINIO_SECRET_KEY=minio123" -e "MINIO_DEFAULT_BUCKETS=bucket,additional-bucket" bitnami/minio:2021.6.17-debian-10-r7
@ -108,14 +93,6 @@ jobs:
- name: Load Velero Image
run:
kind load image-archive velero.tar
# always try to fetch the cached go modules as the e2e test needs it either
- name: Fetch cached go modules
uses: actions/cache@v4
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Run E2E test
run: |
cat << EOF > /tmp/credential
@ -128,13 +105,18 @@ jobs:
curl -LO https://dl.k8s.io/release/v${{ matrix.k8s }}/bin/linux/amd64/kubectl
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
GOPATH=~/go CLOUD_PROVIDER=kind \
OBJECT_STORE_PROVIDER=aws BSL_CONFIG=region=minio,s3ForcePathStyle="true",s3Url=http://$(hostname -i):9000 \
CREDS_FILE=/tmp/credential BSL_BUCKET=bucket \
ADDITIONAL_OBJECT_STORE_PROVIDER=aws ADDITIONAL_BSL_CONFIG=region=minio,s3ForcePathStyle="true",s3Url=http://$(hostname -i):9000 \
ADDITIONAL_CREDS_FILE=/tmp/credential ADDITIONAL_BSL_BUCKET=additional-bucket \
GINKGO_FOCUS='${{ matrix.focus }}' VELERO_IMAGE=velero:pr-test \
GINKGO_SKIP='SKIP_KIND|pv-backup|Restic|Snapshot|LongTime' \
GOPATH=~/go \
CLOUD_PROVIDER=kind \
OBJECT_STORE_PROVIDER=aws \
BSL_CONFIG=region=minio,s3ForcePathStyle="true",s3Url=http://$(hostname -i):9000 \
CREDS_FILE=/tmp/credential \
BSL_BUCKET=bucket \
ADDITIONAL_OBJECT_STORE_PROVIDER=aws \
ADDITIONAL_BSL_CONFIG=region=minio,s3ForcePathStyle="true",s3Url=http://$(hostname -i):9000 \
ADDITIONAL_CREDS_FILE=/tmp/credential \
ADDITIONAL_BSL_BUCKET=additional-bucket \
VELERO_IMAGE=velero:pr-test \
GINKGO_LABELS="${{ matrix.labels }}" \
make -C test/ run-e2e
timeout-minutes: 30
- name: Upload debug bundle
@ -142,4 +124,4 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: DebugBundle
path: /home/runner/work/velero/velero/test/e2e/debug-bundle*
path: /home/runner/work/velero/velero/test/e2e/debug-bundle*

View File

@ -7,20 +7,12 @@ jobs:
strategy:
fail-fast: false
steps:
- name: Check out the code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.22'
id: go
- name: Check out the code
uses: actions/checkout@v4
- name: Fetch cached go modules
uses: actions/cache@v4
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
go-version-file: 'go.mod'
- name: Make ci
run: make ci
- name: Upload test coverage

View File

@ -15,7 +15,7 @@ jobs:
with:
# ignore the config/.../crd.go file as it's generated binary data that is edited elswhere.
skip: .git,*.png,*.jpg,*.woff,*.ttf,*.gif,*.ico,./config/crd/v1beta1/crds/crds.go,./config/crd/v1/crds/crds.go,./config/crd/v2alpha1/crds/crds.go,./go.sum,./LICENSE
ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme
ignore_words_list: iam,aks,ist,bridget,ue,shouldnot,atleast,notin,sme,optin
check_filenames: true
check_hidden: true

View File

@ -12,7 +12,6 @@ jobs:
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
id: go
- name: Linter check
uses: golangci/golangci-lint-action@v6
with:

View File

@ -14,95 +14,82 @@ jobs:
name: Build
runs-on: ubuntu-latest
steps:
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.22'
id: go
- uses: actions/checkout@v4
# Fix issue of setup-gcloud
- run: |
sudo apt-get install python2.7
export CLOUDSDK_PYTHON="/usr/bin/python2"
- id: 'auth'
uses: google-github-actions/auth@v2
with:
credentials_json: '${{ secrets.GCS_SA_KEY }}'
- name: 'set up GCloud SDK'
uses: google-github-actions/setup-gcloud@v2
- name: 'use gcloud CLI'
run: |
gcloud info
- name: Set up QEMU
id: qemu
uses: docker/setup-qemu-action@v3
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
with:
version: latest
- name: Build
run: |
make local
# Clean go cache to ease the build environment storage pressure.
go clean -modcache -cache
- name: Test
run: make test
- name: Upload test coverage
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage.out
verbose: true
# Use the JSON key in secret to login gcr.io
- uses: 'docker/login-action@v3'
with:
registry: 'gcr.io' # or REGION.docker.pkg.dev
username: '_json_key'
password: '${{ secrets.GCR_SA_KEY }}'
# Only try to publish the container image from the root repo; forks don't have permission to do so and will always get failures.
- name: Publish container image
if: github.repository == 'vmware-tanzu/velero'
run: |
sudo swapoff -a
sudo rm -f /mnt/swapfile
docker system prune -a --force
- name: Check out the code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
# Fix issue of setup-gcloud
- run: |
sudo apt-get install python2.7
export CLOUDSDK_PYTHON="/usr/bin/python2"
- id: 'auth'
uses: google-github-actions/auth@v2
with:
credentials_json: '${{ secrets.GCS_SA_KEY }}'
- name: 'set up GCloud SDK'
uses: google-github-actions/setup-gcloud@v2
- name: 'use gcloud CLI'
run: |
gcloud info
- name: Set up QEMU
id: qemu
uses: docker/setup-qemu-action@v3
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
with:
version: latest
- name: Build
run: |
make local
# Clean go cache to ease the build environment storage pressure.
go clean -modcache -cache
- name: Test
run: make test
- name: Upload test coverage
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage.out
verbose: true
# Use the JSON key in secret to login gcr.io
- uses: 'docker/login-action@v3'
with:
registry: 'gcr.io' # or REGION.docker.pkg.dev
username: '_json_key'
password: '${{ secrets.GCR_SA_KEY }}'
# Only try to publish the container image from the root repo; forks don't have permission to do so and will always get failures.
- name: Publish container image
if: github.repository == 'vmware-tanzu/velero'
run: |
sudo swapoff -a
sudo rm -f /mnt/swapfile
docker system prune -a --force
# Build and push Velero image to docker registry
docker login -u ${{ secrets.DOCKER_USER }} -p ${{ secrets.DOCKER_PASSWORD }}
VERSION=$(./hack/docker-push.sh | grep 'VERSION:' | awk -F: '{print $2}' | xargs)
# Build and push Velero image to docker registry
docker login -u ${{ secrets.DOCKER_USER }} -p ${{ secrets.DOCKER_PASSWORD }}
VERSION=$(./hack/docker-push.sh | grep 'VERSION:' | awk -F: '{print $2}' | xargs)
# Upload Velero image package to GCS
source hack/ci/build_util.sh
BIN=velero
RESTORE_HELPER_BIN=velero-restore-helper
GCS_BUCKET=velero-builds
VELERO_IMAGE=${BIN}-${VERSION}
VELERO_RESTORE_HELPER_IMAGE=${RESTORE_HELPER_BIN}-${VERSION}
VELERO_IMAGE_FILE=${VELERO_IMAGE}.tar.gz
VELERO_RESTORE_HELPER_IMAGE_FILE=${VELERO_RESTORE_HELPER_IMAGE}.tar.gz
VELERO_IMAGE_BACKUP_FILE=${VELERO_IMAGE}-'build.'${GITHUB_RUN_NUMBER}.tar.gz
VELERO_RESTORE_HELPER_IMAGE_BACKUP_FILE=${VELERO_RESTORE_HELPER_IMAGE}-'build.'${GITHUB_RUN_NUMBER}.tar.gz
# Upload Velero image package to GCS
source hack/ci/build_util.sh
BIN=velero
RESTORE_HELPER_BIN=velero-restore-helper
GCS_BUCKET=velero-builds
VELERO_IMAGE=${BIN}-${VERSION}
VELERO_RESTORE_HELPER_IMAGE=${RESTORE_HELPER_BIN}-${VERSION}
VELERO_IMAGE_FILE=${VELERO_IMAGE}.tar.gz
VELERO_RESTORE_HELPER_IMAGE_FILE=${VELERO_RESTORE_HELPER_IMAGE}.tar.gz
VELERO_IMAGE_BACKUP_FILE=${VELERO_IMAGE}-'build.'${GITHUB_RUN_NUMBER}.tar.gz
VELERO_RESTORE_HELPER_IMAGE_BACKUP_FILE=${VELERO_RESTORE_HELPER_IMAGE}-'build.'${GITHUB_RUN_NUMBER}.tar.gz
cp ${VELERO_IMAGE_FILE} ${VELERO_IMAGE_BACKUP_FILE}
cp ${VELERO_RESTORE_HELPER_IMAGE_FILE} ${VELERO_RESTORE_HELPER_IMAGE_BACKUP_FILE}
cp ${VELERO_IMAGE_FILE} ${VELERO_IMAGE_BACKUP_FILE}
cp ${VELERO_RESTORE_HELPER_IMAGE_FILE} ${VELERO_RESTORE_HELPER_IMAGE_BACKUP_FILE}
uploader ${VELERO_IMAGE_FILE} ${GCS_BUCKET}
uploader ${VELERO_RESTORE_HELPER_IMAGE_FILE} ${GCS_BUCKET}
uploader ${VELERO_IMAGE_BACKUP_FILE} ${GCS_BUCKET}
uploader ${VELERO_RESTORE_HELPER_IMAGE_BACKUP_FILE} ${GCS_BUCKET}
uploader ${VELERO_IMAGE_FILE} ${GCS_BUCKET}
uploader ${VELERO_RESTORE_HELPER_IMAGE_FILE} ${GCS_BUCKET}
uploader ${VELERO_IMAGE_BACKUP_FILE} ${GCS_BUCKET}
uploader ${VELERO_RESTORE_HELPER_IMAGE_BACKUP_FILE} ${GCS_BUCKET}

View File

@ -20,4 +20,4 @@ jobs:
days-before-pr-close: -1
# Only issues made after Feb 09 2021.
start-date: "2021-09-02T00:00:00"
exempt-issue-labels: "Epic,Area/CLI,Area/Cloud/AWS,Area/Cloud/Azure,Area/Cloud/GCP,Area/Cloud/vSphere,Area/CSI,Area/Design,Area/Documentation,Area/Plugins,Bug,Enhancement/User,kind/requirement,kind/refactor,kind/tech-debt,limitation,Needs investigation,Needs triage,Needs Product,P0 - Hair on fire,P1 - Important,P2 - Long-term important,P3 - Wouldn't it be nice if...,Product Requirements,Restic - GA,Restic,release-blocker,Security"
exempt-issue-labels: "Epic,Area/CLI,Area/Cloud/AWS,Area/Cloud/Azure,Area/Cloud/GCP,Area/Cloud/vSphere,Area/CSI,Area/Design,Area/Documentation,Area/Plugins,Bug,Enhancement/User,kind/requirement,kind/refactor,kind/tech-debt,limitation,Needs investigation,Needs triage,Needs Product,P0 - Hair on fire,P1 - Important,P2 - Long-term important,P3 - Wouldn't it be nice if...,Product Requirements,Restic - GA,Restic,release-blocker,Security,backlog"

View File

@ -238,14 +238,9 @@ linters-settings:
testifylint:
# TODO: enable them all
disable:
- error-is-as
- expected-actual
- go-require
- float-compare
- require-error
- suite-dont-use-pkg
- suite-extra-assert-call
- suite-thelper
enable-all: true
testpackage:
# regexp pattern to skip files

View File

@ -13,7 +13,7 @@
# limitations under the License.
# Velero binary build section
FROM --platform=$BUILDPLATFORM golang:1.22-bookworm as velero-builder
FROM --platform=$BUILDPLATFORM golang:1.22-bookworm AS velero-builder
ARG GOPROXY
ARG BIN
@ -47,7 +47,7 @@ RUN mkdir -p /output/usr/bin && \
go clean -modcache -cache
# Restic binary build section
FROM --platform=$BUILDPLATFORM golang:1.22-bookworm as restic-builder
FROM --platform=$BUILDPLATFORM golang:1.22-bookworm AS restic-builder
ARG BIN
ARG TARGETOS

View File

@ -0,0 +1 @@
Descriptive restore error when restoring into a terminating namespace.

View File

@ -1 +1 @@
New data path for data mover ms according to design #7574
New data path for data mover ms according to design #7576

View File

@ -0,0 +1 @@
Add design for backup repository configurations for issue #7620, #7301

View File

@ -0,0 +1 @@
For issue #7700 and #7747, add the design for backup PVC configurations

View File

@ -0,0 +1 @@
New data path for data mover ms according to design #7576

View File

@ -0,0 +1 @@
Data mover ms watcher according to design #7576

View File

@ -0,0 +1 @@
Make PVPatchMaximumDuration timeout configurable

View File

@ -0,0 +1 @@
Created new ItemBlockAction (IBA) plugin type

View File

@ -0,0 +1 @@
Avoid wrapping failed PVB status with empty message.

View File

@ -0,0 +1 @@
Data mover micro service backup according to design #7576

View File

@ -0,0 +1 @@
Internal ItemBlockAction plugins

View File

@ -0,0 +1 @@
Data mover micro service restore according to design #7576

View File

@ -0,0 +1 @@
Data mover micro service DUCR/DDCR controller refactor according to design #7576

View File

@ -0,0 +1 @@
Updates to IBM COS documentation to match current version

View File

@ -0,0 +1 @@
According to design #7576, after node-agent restarts, if a DU/DD is in InProgress status, re-capture the data mover ms pod and continue the execution

View File

@ -0,0 +1 @@
Patch dbr's status when error happens

View File

@ -0,0 +1 @@
Fix issue #7620, add backup repository configuration implementation and support cacheLimit configuration for Kopia repo

View File

@ -0,0 +1 @@
Fix issue #8072, add the warning messages for restic deprecation

View File

@ -0,0 +1 @@
Fix issue #8032, make node-agent configMap name configurable

View File

@ -0,0 +1 @@
Add support for backup PVC configuration

View File

@ -0,0 +1 @@
Delete generated k8s client and informer.

View File

@ -0,0 +1 @@
Add docs for backup pvc config support

View File

@ -0,0 +1 @@
Modify E2E and perf test report generated directory

View File

@ -0,0 +1 @@
Fix issue #7620, add doc for backup repo config

View File

@ -0,0 +1 @@
Add resource modifier for velero restore describe CLI

View File

@ -0,0 +1 @@
Apply backupPVCConfig to backupPod volume spec

View File

@ -0,0 +1 @@
Fix issue #8134, allow to config resource request/limit for data mover micro service pods

View File

@ -0,0 +1 @@
Fix issue #8155, Merge Kopia upstream commits for critical issue fixes and performance improvements

View File

@ -54,6 +54,13 @@ spec:
description: MaintenanceFrequency is how often maintenance should
be run.
type: string
repositoryConfig:
additionalProperties:
type: string
description: RepositoryConfig is for repository-specific configuration
fields.
nullable: true
type: object
repositoryType:
description: RepositoryType indicates the type of the backend repository
enum:

File diff suppressed because one or more lines are too long

View File

@ -26,8 +26,8 @@ Therefore, in order to improve the compatibility, it is worthy to configure the
## Non-Goals
- It is also beneficial to support VGDP instances affinity for PodVolume backup/restore, however, it is not possible since VGDP instances for PodVolume backup/restore should always run in the node where the source/target pods are created.
- It is also beneficial to support VGDP instances affinity for data movement restores, however, it is not possible in some cases. For example, when the `volumeBindingMode` in the storageclass is `WaitForFirstConsumer`, the restore volume must be mounted in the node where the target pod is scheduled, so the VGDP instance must run in the same node. On the other hand, considering the fact that restores may not frequently and centrally run, we will not support data movement restores.
- As elaberated in the [Volume Snapshot Data Movement Design][2], the Exposer may take different ways to expose snapshots, i.e., through backup pods (this is the only way supported at present). The implementation section below only considers this approach currently, if a new expose method is introduced in future, the definition of the affinity configurations and behaviors should still work, but we may need a new implementation.
- It is also beneficial to support VGDP instances affinity for data movement restores, however, it is not possible in some cases. For example, when the `volumeBindingMode` in the StorageClass is `WaitForFirstConsumer`, the restore volume must be mounted in the node where the target pod is scheduled, so the VGDP instance must run in the same node. On the other hand, considering the fact that restores may not frequently and centrally run, we will not support data movement restores.
- As elaborated in the [Volume Snapshot Data Movement Design][2], the Exposer may take different ways to expose snapshots, i.e., through backup pods (this is the only way supported at present). The implementation section below only considers this approach currently, if a new expose method is introduced in future, the definition of the affinity configurations and behaviors should still work, but we may need a new implementation.
## Solution

View File

@ -0,0 +1,358 @@
# Velero Backup performance Improvements and VolumeGroupSnapshot enablement
There are two different goals here, linked by a single primary missing feature in the Velero backup workflow.
The first goal is to enhance backup performance by allowing the primary backup controller to run in multiple threads, enabling Velero to back up multiple items at the same time for a given backup.
The second goal is to enable Velero to eventually support VolumeGroupSnapshots.
For both of these goals, Velero needs a way to determine which items should be backed up together.
This design proposal will include two development phases:
- Phase 1 will refactor the backup workflow to identify blocks of related items that should be backed up together, and then coordinate backup hooks among items in the block.
- Phase 2 will add multiple worker threads for backing up item blocks, so instead of backing up each block as it identified, the velero backup workflow will instead add the block to a channel and one of the workers will pick it up.
- Actual support for VolumeGroupSnapshots is out-of-scope here and will be handled in a future design proposal, but the item block refactor introduced in Phase 1 is a primary building block for this future proposal.
## Background
Currently, during backup processing, the main Velero backup controller runs in a single thread, completely finishing the primary backup processing for one resource before moving on to the next one.
We can improve the overall backup performance by backing up multiple items for a backup at the same time, but before we can do this we must first identify resources that need to be backed up together.
Generally speaking, resources that need to be backed up together are resources with interdependencies -- pods with their PVCs, PVCs with their PVs, groups of pods that form a single application, CRs, pods, and other resources that belong to the same operator, etc.
As part of this initial refactoring, once these "Item Blocks" are identified, an additional change will be to move pod hook processing up to the ItemBlock level.
If there are multiple pods in the ItemBlock, pre-hooks for all pods will be run before backing up the items, followed by post-hooks for all pods.
This change to hook processing is another prerequisite for future VolumeGroupSnapshot support, since supporting this will require backing up the pods and volumes together for any volumes which belong to the same group.
Once we are backing up items by block, the next step will be to create multiple worker threads to process and back up ItemBlocks, so that we can back up multiple ItemBlocks at the same time.
In looking at the different kinds of large backups that Velero must deal with, two obvious scenarios come to mind:
1. Backups with a relatively small number of large volumes
2. Backups with a large number of relatively small volumes.
In case 1, the majority of the time spent on the backup is in the asynchronous phases -- CSI snapshot creation actions after the snaphandle exists, and DataUpload processing. In that case, parallel item processing will likely have a minimal impact on overall backup completion time.
In case 2, the majority of time spent on the backup will likely be during the synchronous actions. Especially as regards CSI snapshot creation, the waiting for the VSC snaphandle to exist will result in significant passage of time with thousands of volumes. This is the sort of use case which will benefit the most from parallel item processing.
## Goals
- Identify groups of related items to back up together (ItemBlocks).
- Manage backup hooks at the ItemBlock level rather than per-item.
- Using worker threads, back up ItemBlocks at the same time.
## Non Goals
- Support VolumeGroupSnapshots: this is a future feature, although certain prerequisites for this enhancement are included in this proposal.
- Process multiple backups in parallel: this is a future feature, although certain prerequisites for this enhancement are included in this proposal.
- Refactoring plugin infrastructure to avoid RPC calls for internal plugins.
- Restore performance improvements: this is potentially a future feature
## High-Level Design
### ItemBlock concept
The updated design is based on a new struct/type called `ItemBlock`.
Essentially, an `ItemBlock` is a group of items that must be backed up together in order to guarantee backup integrity.
When we eventually split item backup across multiple worker threads, `ItemBlocks` will be kept together as the basic unit of backup.
To facilitate this, a new plugin type, `ItemBlockAction` will allow relationships between items to be identified by velero -- any resources that must be backed up with other resources will need IBA plugins defined for them.
Examples of `ItemBlocks` include:
1. A pod, its mounted PVCs, and the bound PVs for those PVCs.
2. A VolumeGroup (related PVCs and PVs) along with any pods mounting these volumes.
3. For a ReadWriteMany PVC, the PVC, its bound PV, and all pods mounting this PVC.
### Phase 1: ItemBlock processing
- A new plugin type, `ItemBlockAction`, will be created
- `ItemBlockAction` will contain the API method `GetRelatedItems`, which will be needed for determining which items to group together into `ItemBlocks`.
- When processing the list of items returned from the item collector, instead of simply calling `BackupItem` on each in turn, we will use the `GetRelatedItems` API call to determine other items to include with the current item in an ItemBlock. Repeat recursively on each item returned.
- Don't include an item in more than one ItemBlock -- if the next item from the item collector is already in a block, skip it.
- Once ItemBlock is determined, call new func `BackupItemBlock` instead of `BackupItem`.
- New func `BackupItemBlock` will call pre hooks for any pods in the block, then back up the items in the block (`BackupItem` will no longer run hooks directly), then call post hooks for any pods in the block.
- The finalize phase will not be affected by the ItemBlock design, since this is just updating resources after async operations are completed on the items and there is no need to run these updates in parallel.
### Phase 2: Process ItemBlocks for a single backup in multiple threads
- Concurrent `BackupItemBlock` operations will be executed by worker threads invoked by the backup controller, which will communicate with the backup controller operation via a shared channel.
- The ItemBlock processing loop implemented in Phase 1 will be modified to send each newly-created ItemBlock to the shared channel rather than calling `BackupItemBlock` inline.
- Users will be able to configure the number of workers available for concurrent `BackupItemBlock` operations.
- Access to the BackedUpItems map must be synchronized
## Detailed Design
### Phase 1: ItemBlock processing
#### New ItemBlockAction plugin type
In order for Velero to identify groups of items to back up together in an ItemBlock, we need a way to identify items which need to be backed up along with the current item. While the current `Execute` BackupItemAction method does return a list of additional items which are required by the current item, we need to know this *before* we start the item backup. To support this, we need a new plugin type, `ItemBlockAction` (IBA) with an API method, `GetRelatedItems` which Velero will call on each item as it processes. The expectation is that the registered IBA plugins will return the same items as returned as additional items by the BIA `Execute` method, with the exception that items which are not created until calling `Execute` should not be returned here, as they don't exist yet.
#### Proto definition (compiled into golang by protoc)
The ItemBlockAction plugin type is defined as follows:
```
service ItemBlockAction {
rpc AppliesTo(ItemBlockActionAppliesToRequest) returns (ItemBlockActionAppliesToResponse);
rpc GetRelatedItems(ItemBlockActionGetRelatedItemsRequest) returns (ItemBlockActionGetRelatedItemsResponse);
}
message ItemBlockActionAppliesToRequest {
string plugin = 1;
}
message ItemBlockActionAppliesToResponse {
ResourceSelector ResourceSelector = 1;
}
message ItemBlockActionGetRelatedItemsRequest {
string plugin = 1;
bytes item = 2;
bytes backup = 3;
}
message ItemBlockActionGetRelatedItemsResponse {
repeated generated.ResourceIdentifier relatedItems = 1;
}
```
A new PluginKind, `ItemBlockActionV1`, will be created, and the backup process will be modified to use this plugin kind.
For any BIA plugins which return additional items from `Execute()` that need to be backed up at the same time or sequentially in the same worker thread as the current items should add a new IBA plugin to return these same items (minus any which won't exist before BIA `Execute()` is called).
This mainly applies to plugins that operate on pods which reference resources which must be backed up along with the pod and are potentially affected by pod hooks or for plugins which connect multiple pods whose volumes should be backed up at the same time.
### Changes to processing item list from the Item Collector
#### New structs ItemBlock and ItemBlockItem
```go
type ItemBlock struct {
log logrus.FieldLogger
// This is a reference to the shared itemBackupper for the backup
itemBackupper *itemBackupper
Items []ItemBlockItem
}
type ItemBlockItem struct {
gr schema.GroupResource
item *unstructured.Unstructured
preferredGVR schema.GroupVersionResource
}
```
#### Current workflow
In the `BackupWithResolvers` func, the current Velero implementation iterates over the list of items for backup returned by the Item Collector. For each item, Velero loads the item from the file created by the Item Collector, we call `backupItem`, update the GR map if successful, remove the (temporary) file containing item metadata, and update progress for the backup.
#### Modifications to the loop over ItemCollector results
The `kubernetesResource` struct used by the item collector will be modified to add an `orderedResource` bool which will be set true for all of the resources moved to the beginning of the list as a result of being ordered resources.
While the item collector already puts ordered resources first, there is no indication in the list which of these initial items are from the ordered resources list and which are the remaining (unordered) items.
Velero needs to know which resources are ordered because when we process them later, these initial resources must be processed sequentially, one at a time, before processing the remaining resources in a parallel manner.
The current workflow within each iteration of the ItemCollector.items loop will replaced with the following:
- (note that some of the below should be pulled out into a helper func to facilitate recursive call to it for items returned from `GetRelatedItems`.)
- Before loop iteration, create a new `itemsInBlock` map of type map[velero.ResourceIdentifier]bool which represents the set of items already included in a block.
- If `item` is already in `itemsInBlock`, continue. This one has already been processed.
- Add `item` to `itemsInBlock`.
- Load item from ItemCollector file. Close/remove file after loading (on error return or not, possibly with similar anonymous func to current impl)
- Get matching IBA plugins for item, call `GetRelatedItems` for each. For each item returned, get full item content from ItemCollector (if present in item list, pulling from file, removing file when done) or from cluster (if not present in item list), add item to the current block, add item to `itemsInBlock` map, and then recursively apply current step to each (i.e. call IBA method, add to block, etc.)
- Once full ItemBlock list is generated, call `backupItemBlock(block ItemBlock)
- Add `backupItemBlock` return values to `backedUpGroupResources` map
#### New func `backupItemBlock`
Method signature for new func `backupItemBlock` is as follows:
```go
func backupItemBlock(block ItemBlock) []schema.GroupResource
```
The return value is a slice of GRs for resources which were backed up. Velero tracks these to determine which CRDs need to be included in the backup. Note that we need to make sure we include in this not only those resources that were backed up directly, but also those backed up indirectly via additional items BIA execute returns.
In order to handle backup hooks, this func will first take the input item list (`block.items`) and get a list of included pods, filtered to include only those not yet backed up (using `block.itemBackupper.backupRequest.BackedUpItems`). Iterate over this list and execute pre hooks (pulled out of `itemBackupper.backupItemInternal`) for each item.
Now iterate over the full list (`block.items`) and call `backupItem` for each. After the first, the later items should already have been backed up, but calling a second time is harmless, since the first thing Velero does is check the `BackedUpItems` map, exiting if item is already backed up). We still need this call in case there's a plugin which returns something in `GetAdditionalItems` but forgets to return it in the `Execute` additional items return value. If we don't do this, we could end up missing items.
After backing up the items in the block, we now execute post hooks using the same filtered item list we used for pre hooks, again taking the logic from `itemBackupper.backupItemInternal`).
#### `itemBackupper.backupItemInternal` cleanup
After implementing backup hooks in `backupItemBlock`, hook processing should be removed from `itemBackupper.backupItemInternal`.
### Phase 2: Process ItemBlocks for a single backup in multiple threads
#### New input field for number of ItemBlock workers
The velero installer and server CLIs will get a new input field `itemBlockWorkerCount`, which will be passed along to the `backupReconciler`.
The `backupReconciler` struct will also have this new field added.
#### Worker pool for item block processing
A new type, `ItemBlockWorker` will be added which will manage a pool of worker goroutines which will process item blocks, a shared input channel for passing blocks to workers, and a WaitGroup to shut down cleanly when the reconciler exits.
```go
type ItemBlockWorkerPool struct {
itemBlockChannel chan ItemBlockInput
wg *sync.WaitGroup
logger logrus.FieldLogger
}
type ItemBlockInput struct {
itemBlock ItemBlock
returnChan chan ItemBlockReturn
}
type ItemBlockReturn struct {
itemBlock ItemBlock
resources []schema.GroupResource
err error
}
func (*p ItemBlockWorkerPool) getInputChannel() chan ItemBlockInput
func RunItemBlockWorkers(context context.Context, workers int)
func processItemBlocksWorker(context context.Context, itemBlockChannel chan ItemBlockInput, logger logrus.FieldLogger, wg *sync.WaitGroup)
```
The worker pool will be started by calling `RunItemBlockWorkers` in `backupReconciler.SetupWithManager`, passing in the worker count and reconciler context.
`SetupWithManager` will also add the input channel to the `itemBackupper` so that it will be available during backup processing.
The func `RunItemBlockWorkers` will create the `ItemBlockWorkerPool` with a shared buffered input channel (fixed buffer size) and start `workers` gororoutines which will each call `processItemBlocksWorker`.
The `processItemBlocksWorker` func (run by the worker goroutines) will read from `itemBlockChannel`, call `BackupItemBlock` on the retrieved `ItemBlock`, and then send the return value to the retrieved `returnChan`, and then process the next block.
#### Modify ItemBlock processing loop to send ItemBlocks to the worker pool rather than backing them up directly
The ItemBlock processing loop implemented in Phase 1 will be modified to send each newly-created ItemBlock to the shared channel rather than calling `BackupItemBlock` inline, using a WaitGroup to manage in-process items. A separate goroutine will be created to process returns for this backup. After completion of the ItemBlock processing loop, velero will use the WaitGroup to wait for all ItemBlock processing to complete before moving forward.
A simplified example of what this response goroutine might look like:
```go
// omitting cancel handling, context, etc
ret := make(chan ItemBlockReturn)
wg := &sync.WaitGroup{}
// Handle returns
go func() {
for {
select {
case response := <-ret: // process each BackupItemBlock response
func() {
defer wg.Done()
responses = append(responses, response)
}()
case <-ctx.Done():
return
}
}
}()
// Simplified illustration, looping over and assumed already-determined ItemBlock list
for _, itemBlock := range itemBlocks {
wg.Add(1)
inputChan <- ItemBlockInput{itemBlock: itemBlock, returnChan: ret}
}
done := make(chan struct{})
go func() {
defer close(done)
wg.Wait()
}()
// Wait for all the ItemBlocks to be processed
select {
case <-done:
logger.Info("done processing ItemBlocks")
}
// responses from BackupItemBlock calls are in responses
```
When processing the responses, the main thing is to set `backedUpGroupResources[item.groupResource]=true` for each GR returned, which will give the same result as the current implementation calling items one-by-one and setting that field as needed.
The ItemBlock processing loop described above will be split into two separate iterations. For the first iteration, velero will only process those items at the beginning of the loop identified as `orderedResources` -- when the groups generated from these resources are passed to the worker channel, velero will wait for the response before moving on to the next ItemBlock.
This is to ensure that the ordered resources are processed in the required order. Once the last ordered resource is processed, the remaining ItemBlocks will be processed and sent to the worker channel without waiting for a response, in order to allow these ItemBlocks to be processed in parallel.
The reason we must execute `ItemBlocks` with ordered resources first (and one at a time) is that this is a list of resources identified by the user as resources which must be backed up first, and in a particular order.
#### Synchronize access to the BackedUpItems map
Velero uses a map of BackedUpItems to track which items have already been backed up. This prevents velero from attempting to back up an item more than once, as well as guarding against creating infinite loops due to circular dependencies in the additional items returns. Since velero will now be accessing this map from the parallel goroutines, access to the map must be synchronized with mutexes.
### Backup Finalize phase
The finalize phase will not be affected by the ItemBlock design, since this is just updating resources after async operations are completed on the items and there is no need to run these updates in parallel.
## Alternatives considered
### BackpuItemAction v3 API
Instead of adding a new `ItemBlockAction` plugin type, we could add a `GetAdditionalItems` method to BackupItemAction.
This was rejected because the new plugin type provides a cleaner interface, and keeps the function of grouping related items separate from the function of modifying item content for the backup.
### Per-backup worker pool
The current design makes use of a permanent worker pool, started at backup controller startup time. With this design, when we follow on with running multiple backups in parallel, the same set of workers will take ItemBlock inputs from more than one backup. Another approach that was initially considered was a temporary worker pool, created while processing a backup, and deleted upon backup completion.
#### User-visible API differences between the two approaches
The main user-visible difference here is in the configuration API. For the permanent worker approach, the worker count represents the total worker count for all backups. The concurrent backup count represents the number of backups running at the same time. At any given time, though, the maximum number of worker threads backing up items concurrently is equal to the worker count. If worker count is 15 and the concurrent backup count is 3, then there will be, at most, 15 items being processed at the same time, split among up to three running backups.
For the per-backup worker approach, the worker count represents the worker count for each backup. The concurrent backup count, as before, represents the number of backups running at the same time. If worker count is 15 and the concurrent backup count is 3, then there will be, at most, 45 items being processed at the same time, up to 15 for each of up to three running backups.
#### Comparison of the two approaches
- Permanent worker pool advantages:
- This is the more commonly-followed Kubernetes pattern. It's generally better to follow standard practices, unless there are genuine reasons for the use case to go in a different way.
- It's easier for users to understand the maximum number of concurrent items processed, which will have performance impact and impact on the resource requirements for the Velero pod. Users will not have to multiply the config numbers in their heads when working out how many total workers are present.
- It will give us more flexibility for future enhancements around concurrent backups. One possible use case: backup priority. Maybe a user wants scheduled backups to have a lower priority than user-generated backups, since a user is sitting there waiting for completion -- a shared worker pool could react to the priority by taking ItemBlocks for the higher priority backup first, which would allow a large lower-priority backup's items to be preempted by a higher-priority backup's items without needing to explicitly stop the main controller flow for that backup.
- Per-backup worker pool advantages:
- Lower memory consumption than permanent worker pool, but the total memory used by a worker blocked on input will be pretty low, so if we're talking only 10-20 workers, the impact will be minimal.
## Compatibility
### Example IBA implementation for BIA plugins which return additional items
Included below is an example of what might be required for a BIA plugin which returns additional items.
The code is taken from the internal velero `pod_action.go` which identifies the items required for a given pod.
In this particular case, the only function of pod_action is to return additional items, so we can really just convert this plugin to an IBA plugin. If there were other actions, such as modifying the pod content on backup, then we would still need the pod action, and the related items vs. content manipulation functions would need to be separated.
```go
// PodAction implements ItemBlockAction.
type PodAction struct {
log logrus.FieldLogger
}
// NewPodAction creates a new ItemAction for pods.
func NewPodAction(logger logrus.FieldLogger) *PodAction {
return &PodAction{log: logger}
}
// AppliesTo returns a ResourceSelector that applies only to pods.
func (a *PodAction) AppliesTo() (velero.ResourceSelector, error) {
return velero.ResourceSelector{
IncludedResources: []string{"pods"},
}, nil
}
// GetRelatedItems scans the pod's spec.volumes for persistentVolumeClaim volumes and returns a
// ResourceIdentifier list containing references to all of the persistentVolumeClaim volumes used by
// the pod. This ensures that when a pod is backed up, all referenced PVCs are backed up too.
func (a *PodAction) GetRelatedItems(item runtime.Unstructured, backup *v1.Backup) (runtime.Unstructured, []velero.ResourceIdentifier, error) {
pod := new(corev1api.Pod)
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(item.UnstructuredContent(), pod); err != nil {
return nil, errors.WithStack(err)
}
var relatedItems []velero.ResourceIdentifier
if pod.Spec.PriorityClassName != "" {
a.log.Infof("Adding priorityclass %s to relatedItems", pod.Spec.PriorityClassName)
relatedItems = append(relatedItems, velero.ResourceIdentifier{
GroupResource: kuberesource.PriorityClasses,
Name: pod.Spec.PriorityClassName,
})
}
if len(pod.Spec.Volumes) == 0 {
a.log.Info("pod has no volumes")
return relatedItems, nil
}
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName != "" {
a.log.Infof("Adding pvc %s to relatedItems", volume.PersistentVolumeClaim.ClaimName)
relatedItems = append(relatedItems, velero.ResourceIdentifier{
GroupResource: kuberesource.PersistentVolumeClaims,
Namespace: pod.Namespace,
Name: volume.PersistentVolumeClaim.ClaimName,
})
}
}
return relatedItems, nil
}
// API call
func (a *PodAction) Name() string {
return "PodAction"
}
```
## Implementation
Phase 1 and Phase 2 could be implemented within the same Velero release cycle, but they need not be.
Phase 1 is expected to be implemented in Velero 1.15.
Phase 2 could either be in 1.15 as well, or in a later release, depending on the release timing and resource availability.

View File

@ -0,0 +1,94 @@
# Backup PVC Configuration Design
## Glossary & Abbreviation
**Velero Generic Data Path (VGDP)**: VGDP is the collective modules that is introduced in [Unified Repository design][1]. Velero uses these modules to finish data transfer for various purposes (i.e., PodVolume backup/restore, Volume Snapshot Data Movement). VGDP modules include uploaders and the backup repository.
**Exposer**: Exposer is a module that is introduced in [Volume Snapshot Data Movement Design][2]. Velero uses this module to expose the volume snapshots to Velero node-agent pods or node-agent associated pods so as to complete the data movement from the snapshots.
**backupPVC**: The intermediate PVC created by the exposer for VGDP to access data from, see [Volume Snapshot Data Movement Design][2] for more details.
**backupPod**: The pod consumes the backupPVC so that VGDP could access data from the backupPVC, see [Volume Snapshot Data Movement Design][2] for more details.
**sourcePVC**: The PVC to be backed up, see [Volume Snapshot Data Movement Design][2] for more details.
## Background
As elaberated in [Volume Snapshot Data Movement Design][2], a backupPVC may be created by the Exposer and the VGDP reads data from the backupPVC.
In some scenarios, users may need to configure some advanced settings of the backupPVC so that the data movement could work in best performance in their environments. Specifically:
- For some storage providers, when creating a read-only volume from a snapshot, it is very fast; whereas, if a writable volume is created from the snapshot, they need to clone the entire disk data, which is time consuming. If the backupPVC's `accessModes` is set as `ReadOnlyMany`, the volume driver is able to tell the storage to create a read-only volume, which may dramatically shorten the snapshot expose time. On the other hand, `ReadOnlyMany` is not supported by all volumes. Therefore, users should be allowed to configure the `accessModes` for the backupPVC.
- Some storage providers create one or more replicas when creating a volume, the number of replicas is defined in the storage class. However, it doesn't make any sense to keep replicas when an intermediate volume used by the backup. Therefore, users should be allowed to configure another storage class specifically used by the backupPVC.
## Goals
- Create a mechanism for users to specify various configurations for backupPVC
## Non-Goals
## Solution
We will use the ```node-agent-config``` configMap to host the backupPVC configurations.
This configMap is not created by Velero, users should create it manually on demand. The configMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one configMap in each namespace which applies to node-agent in that namespace only.
Node-agent server checks these configurations at startup time and use it to initiate the related Exposer modules. Therefore, users could edit this configMap any time, but in order to make the changes effective, node-agent server needs to be restarted.
Inside ```node-agent-config``` configMap we will add one new kind of configuration as the data in the configMap, the name is ```backupPVC```.
Users may want to set different backupPVC configurations for different volumes, therefore, we define the configurations as a map and allow users to specific configurations by storage class. Specifically, the key of the map element is the storage class name used by the sourcePVC and the value is the set of configurations for the backupPVC created for the sourcePVC.
The data structure for ```node-agent-config``` is as below:
```go
type Configs struct {
// LoadConcurrency is the config for data path load concurrency per node.
LoadConcurrency *LoadConcurrency `json:"loadConcurrency,omitempty"`
// LoadAffinity is the config for data path load affinity.
LoadAffinity []*LoadAffinity `json:"loadAffinity,omitempty"`
// BackupPVC is the config for backupPVC of snapshot data movement.
BackupPVC map[string]BackupPVC `json:"backupPVC,omitempty"`
}
type BackupPVC struct {
// StorageClass is the name of storage class to be used by the backupPVC.
StorageClass string `json:"storageClass,omitempty"`
// ReadOnly sets the backupPVC's access mode as read only.
ReadOnly bool `json:"readOnly,omitempty"`
}
```
### Sample
A sample of the ```node-agent-config``` configMap is as below:
```json
{
"backupPVC": {
"storage-class-1": {
"storageClass": "snapshot-storage-class",
"readOnly": true
},
"storage-class-2": {
"storageClass": "snapshot-storage-class"
},
"storage-class-3": {
"readOnly": true
}
}
}
```
To create the configMap, users need to save something like the above sample to a json file and then run below command:
```
kubectl create cm node-agent-config -n velero --from-file=<json file name>
```
### Implementation
The `backupPVC` is passed to the exposer and the exposer sets the related specification and create the backupPVC.
If `backupPVC.storageClass` doesn't exist or set as empty, the sourcePVC's storage class will be used.
If `backupPVC.readOnly` is set to true, `ReadOnlyMany` will be the only value set to the backupPVC's `accessModes`, otherwise, `ReadWriteOnce` is used.
Once `backupPVC.storageClass` is set, users must make sure that the specified storage class exists in the cluster and can be used the the backupPVC, otherwise, the corresponding DataUpload CR will stay in `Accepted` phase until the prepare timeout (by default 30min).
Once `backupPVC.readOnly` is set to true, users must make sure that the storage supports to create a `ReadOnlyMany` PVC from a snapshot, otherwise, the corresponding DataUpload CR will stay in `Accepted` phase until the prepare timeout (by default 30min).
Once above problems happen, the DataUpload CR is cancelled after prepare timeout and the backupPVC and backupPod will be deleted, so there is no way to tell the cause is one of the above problems or others.
To help the troubleshooting, we can add some diagnostic mechanism to discover the status of the backupPod before deleting it as a result of the prepare timeout.
[1]: Implemented/unified-repo-and-kopia-integration/unified-repo-and-kopia-integration.md
[2]: volume-snapshot-data-movement/volume-snapshot-data-movement.md

View File

@ -0,0 +1,123 @@
# Backup Repository Configuration Design
## Glossary & Abbreviation
**Backup Storage**: The storage to store the backup data. Check [Unified Repository design][1] for details.
**Backup Repository**: Backup repository is layered between BR data movers and Backup Storage to provide BR related features that is introduced in [Unified Repository design][1].
## Background
According to the [Unified Repository design][1] Velero uses selectable backup repositories for various backup/restore methods, i.e., fs-backup, volume snapshot data movement, etc. To achieve the best performance, backup repositories may need to be configured according to the running environments.
For example, if there are sufficient CPU and memory resources in the environment, users may enable compression feature provided by the backup repository, so as to achieve the best backup throughput.
As another example, if the local disk space is not sufficient, users may want to constraint the backup repository's cache size, so as to prevent the repository from running out of the disk space.
Therefore, it is worthy to allow users to configure some essential parameters of the backup repsoitories, and the configuration may vary from backup repositories.
## Goals
- Create a mechanism for users to specify configurations for backup repositories
## Non-Goals
## Solution
### BackupRepository CRD
After a backup repository is initialized, a BackupRepository CR is created to represent the instance of the backup repository. The BackupRepository's spec is a core parameter used by Unified Repo modules when interactive with the backup repsoitory. Therefore, we can add the configurations into the BackupRepository CR called ```repositoryConfig```.
The configurations may be different varying from backup repositories, therefore, we will not define each of the configurations explicitly. Instead, we add a map in the BackupRepository's spec to take any configuration to be set to the backup repository.
During various operations to the backup repository, the Unified Repo modules will retrieve from the map for the specific configuration that is required at that time. So even though it is specified, a configuration may not be visited/hornored if the operations don't require it for the specific backup repository, this won't bring any issue. When and how a configuration is hornored is decided by the configuration itself and should be clarified in the configuration's specification.
Below is the new BackupRepository's spec after adding the configuration map:
```yaml
spec:
description: BackupRepositorySpec is the specification for a BackupRepository.
properties:
backupStorageLocation:
description: |-
BackupStorageLocation is the name of the BackupStorageLocation
that should contain this repository.
type: string
maintenanceFrequency:
description: MaintenanceFrequency is how often maintenance should
be run.
type: string
repositoryConfig:
additionalProperties:
type: string
description: RepositoryConfig contains configurations for the specific
repository.
type: object
repositoryType:
description: RepositoryType indicates the type of the backend repository
enum:
- kopia
- restic
- ""
type: string
resticIdentifier:
description: |-
ResticIdentifier is the full restic-compatible string for identifying
this repository.
type: string
volumeNamespace:
description: |-
VolumeNamespace is the namespace this backup repository contains
pod volume backups for.
type: string
required:
- backupStorageLocation
- maintenanceFrequency
- resticIdentifier
- volumeNamespace
type: object
```
### BackupRepository configMap
The BackupRepository CR is not created explicitly by a Velero CLI, but created as part of the backup/restore/maintenance operation if the CR doesn't exist. As a result, users don't have any way to specify the configurations before the BackupRepository CR is created.
Therefore, a BackupRepository configMap is introduced as a template of the configurations to be applied to the backup repository CR.
When the backup repository CR is created by the BackupRepository controller, the configurations in the configMap are copied to the ```repositoryConfig``` field.
For an existing BackupRepository CR, the configMap is never visited, if users want to modify the configuration value, they should directly edit the BackupRepository CR.
The BackupRepository configMap is created by users in velero installation namespace. The configMap name must be specified in the velero server parameter ```--backup-repository-config```, otherwise, it won't effect.
If the configMap name is specified but the configMap doesn't exist by the time of a backup repository is created, the configMap name is ignored.
For any reason, if the configMap doesn't effect, nothing is specified to the backup repository CR, so the Unified Repo modules use the hard-coded values to configure the backup repository.
The BackupRepository configMap supports backup repository type specific configurations, even though users can only specify one configMap.
So in the configMap struct, multiple entries are supported, indexed by the backup repository type. During the backup repository creation, the configMap is searched by the repository type.
### Configurations
With the above mechanisms, any kind of configuration could be added. Here list the configurations defined at present:
```cacheLimitMB```: specifies the size limit(in MB) for the local data cache. The more data is cached locally, the less data may be downloaded from the backup storage, so the better performance may be achieved. Practically, users can specify any size that is smaller than the free space so that the disk space won't run out. This parameter is for each repository connection, that is, users could change it before connecting to the repository. If a backup repository doesn't use local cache, this parameter will be ignored. For Kopia repository, this parameter is supported.
```enableCompression```: specifies to enable/disable compression for a backup repsotiory. Most of the backup repositories support the data compression feature, if it is not supported by a backup repository, this parameter is ignored. Most of the backup repositories support to dynamically enable/disable compression, so this parameter is defined to be used whenever creating a write connection to the backup repository, if the dynamically changing is not supported, this parameter will be hornored only when initializing the backup repository. For Kopia repository, this parameter is supported and can be dynamically modified.
### Sample
Below is an example of the BackupRepository configMap with the configurations:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: <config-name>
namespace: velero
data:
<repository-type-1>: |
{
"cacheLimitMB": 2048,
"enableCompression": true
}
<repository-type-2>: |
{
"cacheLimitMB": 1,
"enableCompression": false
}
```
To create the configMap, users need to save something like the above sample to a file and then run below commands:
```
kubectl apply -f <yaml file name>
```
[1]: Implemented/unified-repo-and-kopia-integration/unified-repo-and-kopia-integration.md

View File

@ -0,0 +1,261 @@
# Repository maintenance job configuration design
## Abstract
Add this design to make the repository maintenance job can read configuration from a dedicate ConfigMap and make the Job's necessary parts configurable, e.g. `PodSpec.Affinity` and `PodSpec.resources`.
## Background
Repository maintenance is split from the Velero server to a k8s Job in v1.14 by design [repository maintenance job](Implemented/repository-maintenance.md).
The repository maintenance Job configuration was read from the Velero server CLI parameter, and it inherits the most of Velero server's Deployment's PodSpec to fill un-configured fields.
This design introduces a new way to let the user to customize the repository maintenance behavior instead of inheriting from the Velero server Deployment or reading from `velero server` CLI parameters.
The configurations added in this design including the resource limitations, node selection.
It's possible new configurations are introduced in future releases based on this design.
For the node selection, the repository maintenance Job also inherits from the Velero server deployment before, but the Job may last for a while and cost noneligible resources, especially memory.
The users have the need to choose which k8s node to run the maintenance Job.
This design reuses the data structure introduced by design [node-agent affinity configuration](Implemented/node-agent-affinity.md) to make the repository maintenance job can choose which node running on.
## Goals
- Unify the repository maintenance Job configuration at one place.
- Let user can choose repository maintenance Job running on which nodes.
- Replace the existing `velero server` parameters `--maintenance-job-cpu-request`, `--maintenance-job-mem-request`, `--maintenance-job-cpu-limit` and `--maintenance-job-mem-limit` by the proposal ConfigMap.
## Non Goals
- There was an [issue](https://github.com/vmware-tanzu/velero/issues/7911) to require the whole Job's PodSpec should be configurable. That's not in the scope of this design.
- Please notice this new configuration is dedicated for the repository maintenance. Repository itself configuration is not covered.
## Compatibility
v1.14 uses the `velero server` CLI's parameter to pass the repository maintenance job configuration.
In v1.15, those parameters are removed, including `--maintenance-job-cpu-request`, `--maintenance-job-mem-request`, `--maintenance-job-cpu-limit` and `--maintenance-job-mem-limit`.
Instead, the parameters are read from the ConfigMap specified by `velero server` CLI parameter `--repo-maintenance-job-config` introduced by this design.
## Design
This design introduces a new ConfigMap specified by `velero server` CLI parameter `--repo-maintenance-job-config` as the source of the repository maintenance job configuration. The specified ConfigMap is read from the namespace where Velero is installed.
If the ConfigMap doesn't exist, the internal default values are used.
Example of using the parameter `--repo-maintenance-job-config`:
```
velero server \
...
--repo-maintenance-job-config repo-job-config
...
```
**Notice**
* Velero doesn't own this ConfigMap. If the user wants to customize the repository maintenance job, the user needs to create this ConfigMap.
* Velero reads this ConfigMap content at starting a new repository maintenance job, so the ConfigMap change will not take affect until the next created job.
### Structure
The data structure for ```repo-maintenance-job-config``` is as below:
```go
type MaintenanceConfigMap map[string]Configs
type Configs struct {
// LoadAffinity is the config for data path load affinity.
LoadAffinity []*LoadAffinity `json:"loadAffinity,omitempty"`
// Resources is the config for the CPU and memory resources setting.
Resource Resources `json:"resources,omitempty"`
}
type LoadAffinity struct {
// NodeSelector specifies the label selector to match nodes
NodeSelector metav1.LabelSelector `json:"nodeSelector"`
}
type Resources struct {
// The repository maintenance job CPU request setting
CPURequest string `json:"cpuRequest,omitempty"`
// The repository maintenance job memory request setting
MemRequest string `json:"memRequest,omitempty"`
// The repository maintenance job CPU limit setting
CPULimit string `json:"cpuLimit,omitempty"`
// The repository maintenance job memory limit setting
MemLimit string `json:"memLimit,omitempty"`
}
```
The ConfigMap content is a map.
If there is a key value as `global` in the map, the key's value is applied to all BackupRepositories maintenance jobs that don't their own specific configuration in the ConfigMap.
The other keys in the map is the combination of three elements of a BackupRepository:
* The namespace in which BackupRepository backs up volume data
* The BackupRepository referenced BackupStorageLocation's name
* The BackupRepository's type. Possible values are `kopia` and `restic`
If there is a key match with BackupRepository, the key's value is applied to the BackupRepository's maintenance jobs.
By this way, it's possible to let user configure before the BackupRepository is created.
This is especially convenient for administrator configuring during the Velero installation.
For example, the following BackupRepository's key should be `test-default-kopia`
``` yaml
- apiVersion: velero.io/v1
kind: BackupRepository
metadata:
generateName: test-default-kopia-
labels:
velero.io/repository-type: kopia
velero.io/storage-location: default
velero.io/volume-namespace: test
name: test-default-kopia-kgt6n
namespace: velero
spec:
backupStorageLocation: default
maintenanceFrequency: 1h0m0s
repositoryType: kopia
resticIdentifier: gs:jxun:/restic/test
volumeNamespace: test
```
The `LoadAffinity` structure is reused from design [node-agent affinity configuration](Implemented/node-agent-affinity.md).
It's possible that the users want to choose nodes that match condition A or condition B to run the job.
For example, the user want to let the nodes is in a specified machine type or the nodes locate in the us-central1-x zones to run the job.
This can be done by adding multiple entries in the `LoadAffinity` array.
### Affinity Example
A sample of the ```repo-maintenance-job-config``` ConfigMap is as below:
``` bash
cat <<EOF > repo-maintenance-job-config.json
{
"global": {
resources: {
"cpuRequest": "100m",
"cpuLimit": "200m",
"memRequest": "100Mi",
"memLimit": "200Mi"
},
"loadAffinity": [
{
"nodeSelector": {
"matchExpressions": [
{
"key": "cloud.google.com/machine-family",
"operator": "In",
"values": [
"e2"
]
}
]
}
},
{
"nodeSelector": {
"matchExpressions": [
{
"key": "topology.kubernetes.io/zone",
"operator": "In",
"values": [
"us-central1-a",
"us-central1-b",
"us-central1-c"
]
}
]
}
}
]
}
}
EOF
```
This sample showcases two affinity configurations:
- matchLabels: maintenance job runs on nodes with label key `cloud.google.com/machine-family` and value `e2`.
- matchLabels: maintenance job runs on nodes located in `us-central1-a`, `us-central1-b` and `us-central1-c`.
The nodes matching one of the two conditions are selected.
To create the configMap, users need to save something like the above sample to a json file and then run below command:
```
kubectl create cm repo-maintenance-job-config -n velero --from-file=repo-maintenance-job-config.json
```
### Value assigning rules
If the Velero BackupRepositoryController cannot find the introduced ConfigMap, the following default values are used for repository maintenance job:
``` go
config := Configs {
// LoadAffinity is the config for data path load affinity.
LoadAffinity: nil,
// Resources is the config for the CPU and memory resources setting.
Resources: Resources{
// The repository maintenance job CPU request setting
CPURequest: "0m",
// The repository maintenance job memory request setting
MemRequest: "0Mi",
// The repository maintenance job CPU limit setting
CPULimit: "0m",
// The repository maintenance job memory limit setting
MemLimit: "0Mi",
},
}
```
If the Velero BackupRepositoryController finds the introduced ConfigMap with only `global` element, the `global` value is used.
If the Velero BackupRepositoryController finds the introduced ConfigMap with only element matches the BackupRepository, the matched element value is used.
If the Velero BackupRepositoryController finds the introduced ConfigMap with both `global` element and element matches the BackupRepository, the matched element defined values overwrite the `global` value, and the `global` value is still used for matched element undefined values.
For example, the ConfigMap content has two elements.
``` json
{
"global": {
"resources": {
"cpuRequest": "100m",
"cpuLimit": "200m",
"memRequest": "100Mi",
"memLimit": "200Mi"
}
},
"ns1-default-kopia": {
"resources": {
"memRequest": "400Mi",
"memLimit": "800Mi"
}
}
}
```
The config value used for BackupRepository backing up volume data in namespace `ns1`, referencing BSL `default`, and the type is `Kopia`:
``` go
config := Configs {
// LoadAffinity is the config for data path load affinity.
LoadAffinity: nil,
// The repository maintenance job CPU request setting
CPURequest: "100m",
// The repository maintenance job memory request setting
MemRequest: "400Mi",
// The repository maintenance job CPU limit setting
CPULimit: "200m",
// The repository maintenance job memory limit setting
MemLimit: "800Mi",
}
```
### Implementation
During the Velero repository controller starts to maintain a repository, it will call the repository manager's `PruneRepo` function to build the maintenance Job.
The ConfigMap specified by `velero server` CLI parameter `--repo-maintenance-job-config` is get to reinitialize the repository `MaintenanceConfig` setting.
``` go
config, err := GetConfigs(context.Background(), namespace, crClient)
if err == nil {
if len(config.LoadAffinity) > 0 {
mgr.maintenanceCfg.Affinity = toSystemAffinity((*nodeagent.LoadAffinity)(config.LoadAffinity[0]))
}
......
} else {
log.Info("Cannot find the repo-maintenance-job-config ConfigMap: %s", err.Error())
}
```
## Alternatives Considered
An other option is creating each ConfigMap for a BackupRepository.
This is not ideal for scenario that has a lot of BackupRepositories in the cluster.

View File

@ -176,6 +176,27 @@ Below diagram shows how VGDP logs are redirected:
This log redirecting mechanism is thread safe since the hook acquires the write lock before writing the log buffer, so it guarantees that in the node-agent log there is no corruptions after redirecting the log, and the redirected logs and the original node-agent logs are not projected into each other.
### Resource Control
The CPU/memory resource of backupPod/restorePod is configurable, which means users are allowed to configure resources per volume backup/restore.
By default, the [Best Effort policy][5] is used, and users are allowed to change it through the ```node-agent-config``` configMap. Specifically, we add below structures to the configMap:
```
type Configs struct {
// PodResources is the resource config for various types of pods launched by node-agent, i.e., data mover pods.
PodResources *PodResources `json:"podResources,omitempty"`
}
type PodResources struct {
CPURequest string `json:"cpuRequest,omitempty"`
MemoryRequest string `json:"memoryRequest,omitempty"`
CPULimit string `json:"cpuLimit,omitempty"`
MemoryLimit string `json:"memoryLimit,omitempty"`
}
```
The string values must mactch Kubernetes Quantity expressions; for each resource, the "request" value must not be larger than the "limit" value. Otherwise, if any one of the values fail, all the resource configurations will be ignored.
The configurations are loaded by node-agent at start time, so users can change the values in the configMap any time, but the changes won't effect until node-agent restarts.
## node-agent
node-agent is still required. Even though VGDP is now not running inside node-agent, node-agent still hosts the data mover controller which reconciles DUCR/DDCR and operates DUCR/DDCR in other steps before the VGDP instance is started, i.e., Accept, Expose, etc.
Privileged mode and root user are not required for node-agent anymore by Volume Snapshot Data Movement, however, they are still required by PVB(PodVolumeBackup) and PVR(PodVolumeRestore). Therefore, we will keep the node-agent deamonset as is, for any users who don't use PVB/PVR and have concern about the privileged mode/root user, they need to manually modify the deamonset spec to remove the dependencies.
@ -198,4 +219,5 @@ CLI is not changed.
[2]: ../volume-snapshot-data-movement/volume-snapshot-data-movement.md
[3]: https://kubernetes.io/blog/2022/09/02/cosi-kubernetes-object-storage-management/
[4]: ../Implemented/node-agent-concurrency.md
[5]: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/

2
go.mod
View File

@ -177,4 +177,4 @@ require (
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)
replace github.com/kopia/kopia => github.com/project-velero/kopia v0.0.0-20240417031915-e07d5b7de567
replace github.com/kopia/kopia => github.com/project-velero/kopia v0.0.0-20240829032136-7fca59662a06

4
go.sum
View File

@ -613,8 +613,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA=
github.com/project-velero/kopia v0.0.0-20240417031915-e07d5b7de567 h1:Gb5eZktsqgnhOfQmKWIlVA9yKvschdr8n8d6y1RLFA0=
github.com/project-velero/kopia v0.0.0-20240417031915-e07d5b7de567/go.mod h1:2HlqZb/N6SNsWUCZzyeh9Lw29PeDRHDkMUiuQCEWt4Y=
github.com/project-velero/kopia v0.0.0-20240829032136-7fca59662a06 h1:QLtEHOokfqpsW99nDFyU2IB47LsGhDqMICGPA+ZgjpM=
github.com/project-velero/kopia v0.0.0-20240829032136-7fca59662a06/go.mod h1:2HlqZb/N6SNsWUCZzyeh9Lw29PeDRHDkMUiuQCEWt4Y=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=

View File

@ -30,23 +30,6 @@ if ! command -v controller-gen > /dev/null; then
exit 1
fi
# get code-generation tools (for now keep in GOPATH since they're not fully modules-compatible yet)
mkdir -p ${GOPATH}/src/k8s.io
pushd ${GOPATH}/src/k8s.io
git clone -b v0.22.2 https://github.com/kubernetes/code-generator
popd
${GOPATH}/src/k8s.io/code-generator/generate-groups.sh \
all \
github.com/vmware-tanzu/velero/pkg/generated \
github.com/vmware-tanzu/velero/pkg/apis \
"velero:v1,v2alpha1" \
--go-header-file ./hack/boilerplate.go.txt \
--output-base ../../.. \
$@
# Generate apiextensions.k8s.io/v1
# Generate CRD for v1.
controller-gen \
crd:crdVersions=v1 \

View File

@ -83,7 +83,7 @@ func TestNamespacedFileStore(t *testing.T) {
require.NoError(t, err)
}
require.Equal(t, path, tc.expectedPath)
require.Equal(t, tc.expectedPath, path)
contents, err := fs.ReadFile(path)
require.NoError(t, err)

View File

@ -52,8 +52,8 @@ func TestParseCapacity(t *testing.T) {
t.Run(test.input, func(t *testing.T) {
actual, actualErr := parseCapacity(test.input)
if test.expected != emptyCapacity {
assert.Equal(t, test.expected.lower.Cmp(actual.lower), 0)
assert.Equal(t, test.expected.upper.Cmp(actual.upper), 0)
assert.Equal(t, 0, test.expected.lower.Cmp(actual.lower))
assert.Equal(t, 0, test.expected.upper.Cmp(actual.upper))
}
assert.Equal(t, test.expectedErr, actualErr)
})

View File

@ -1023,28 +1023,28 @@ func TestRestoreVolumeInfoTrackNativeSnapshot(t *testing.T) {
restore := builder.ForRestore("velero", "testRestore").Result()
tracker := NewRestoreVolInfoTracker(restore, logrus.New(), fakeCilent)
tracker.TrackNativeSnapshot("testPV", "snap-001", "ebs", "us-west-1", 10000)
assert.Equal(t, *tracker.pvNativeSnapshotMap["testPV"], NativeSnapshotInfo{
assert.Equal(t, NativeSnapshotInfo{
SnapshotHandle: "snap-001",
VolumeType: "ebs",
VolumeAZ: "us-west-1",
IOPS: "10000",
})
}, *tracker.pvNativeSnapshotMap["testPV"])
tracker.TrackNativeSnapshot("testPV", "snap-002", "ebs", "us-west-2", 15000)
assert.Equal(t, *tracker.pvNativeSnapshotMap["testPV"], NativeSnapshotInfo{
assert.Equal(t, NativeSnapshotInfo{
SnapshotHandle: "snap-002",
VolumeType: "ebs",
VolumeAZ: "us-west-2",
IOPS: "15000",
})
}, *tracker.pvNativeSnapshotMap["testPV"])
tracker.RenamePVForNativeSnapshot("testPV", "newPV")
_, ok := tracker.pvNativeSnapshotMap["testPV"]
assert.False(t, ok)
assert.Equal(t, *tracker.pvNativeSnapshotMap["newPV"], NativeSnapshotInfo{
assert.Equal(t, NativeSnapshotInfo{
SnapshotHandle: "snap-002",
VolumeType: "ebs",
VolumeAZ: "us-west-2",
IOPS: "15000",
})
}, *tracker.pvNativeSnapshotMap["newPV"])
}
func TestRestoreVolumeInfoResult(t *testing.T) {

View File

@ -41,6 +41,11 @@ type BackupRepositorySpec struct {
// MaintenanceFrequency is how often maintenance should be run.
MaintenanceFrequency metav1.Duration `json:"maintenanceFrequency"`
// RepositoryConfig is for repository-specific configuration fields.
// +optional
// +nullable
RepositoryConfig map[string]string `json:"repositoryConfig,omitempty"`
}
// BackupRepositoryPhase represents the lifecycle phase of a BackupRepository.

View File

@ -111,7 +111,7 @@ func (in *BackupRepository) DeepCopyInto(out *BackupRepository) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
out.Spec = in.Spec
in.Spec.DeepCopyInto(&out.Spec)
in.Status.DeepCopyInto(&out.Status)
}
@ -169,6 +169,13 @@ func (in *BackupRepositoryList) DeepCopyObject() runtime.Object {
func (in *BackupRepositorySpec) DeepCopyInto(out *BackupRepositorySpec) {
*out = *in
out.MaintenanceFrequency = in.MaintenanceFrequency
if in.RepositoryConfig != nil {
in, out := &in.RepositoryConfig, &out.RepositoryConfig
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackupRepositorySpec.

View File

@ -26,8 +26,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
v1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/kuberesource"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
"github.com/vmware-tanzu/velero/pkg/util/actionhelpers"
)
// PVCAction inspects a PersistentVolumeClaim for the PersistentVolume
@ -51,7 +51,7 @@ func (a *PVCAction) AppliesTo() (velero.ResourceSelector, error) {
func (a *PVCAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runtime.Unstructured, []velero.ResourceIdentifier, error) {
a.log.Info("Executing PVCAction")
var pvc corev1api.PersistentVolumeClaim
pvc := new(corev1api.PersistentVolumeClaim)
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(item.UnstructuredContent(), &pvc); err != nil {
return nil, nil, errors.Wrap(err, "unable to convert unstructured item to persistent volume claim")
}
@ -60,10 +60,6 @@ func (a *PVCAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runti
return item, nil, nil
}
pv := velero.ResourceIdentifier{
GroupResource: kuberesource.PersistentVolumes,
Name: pvc.Spec.VolumeName,
}
// remove dataSource if exists from prior restored CSI volumes
if pvc.Spec.DataSource != nil {
pvc.Spec.DataSource = nil
@ -94,5 +90,5 @@ func (a *PVCAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runti
return nil, nil, errors.Wrap(err, "unable to convert pvc to unstructured item")
}
return &unstructured.Unstructured{Object: pvcMap}, []velero.ResourceIdentifier{pv}, nil
return &unstructured.Unstructured{Object: pvcMap}, actionhelpers.RelatedItemsForPVC(pvc, a.log), nil
}

View File

@ -203,7 +203,7 @@ func TestExecute(t *testing.T) {
resultUnstructed, _, _, _, err := pvcBIA.Execute(&unstructured.Unstructured{Object: pvcMap}, tc.backup)
if tc.expectedErr != nil {
require.Equal(t, err, tc.expectedErr)
require.EqualError(t, err, tc.expectedErr.Error())
} else {
require.NoError(t, err)
}
@ -367,7 +367,7 @@ func TestCancel(t *testing.T) {
err = pvcBIA.Cancel(tc.operationID, tc.backup)
if tc.expectedErr != nil {
require.Equal(t, err, tc.expectedErr)
require.EqualError(t, err, tc.expectedErr.Error())
}
du := new(velerov2alpha1.DataUpload)

View File

@ -23,8 +23,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
v1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/kuberesource"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
"github.com/vmware-tanzu/velero/pkg/util/actionhelpers"
)
// PodAction implements ItemAction.
@ -55,32 +55,5 @@ func (a *PodAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runti
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(item.UnstructuredContent(), pod); err != nil {
return nil, nil, errors.WithStack(err)
}
var additionalItems []velero.ResourceIdentifier
if pod.Spec.PriorityClassName != "" {
a.log.Infof("Adding priorityclass %s to additionalItems", pod.Spec.PriorityClassName)
additionalItems = append(additionalItems, velero.ResourceIdentifier{
GroupResource: kuberesource.PriorityClasses,
Name: pod.Spec.PriorityClassName,
})
}
if len(pod.Spec.Volumes) == 0 {
a.log.Info("pod has no volumes")
return item, additionalItems, nil
}
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName != "" {
a.log.Infof("Adding pvc %s to additionalItems", volume.PersistentVolumeClaim.ClaimName)
additionalItems = append(additionalItems, velero.ResourceIdentifier{
GroupResource: kuberesource.PersistentVolumeClaims,
Namespace: pod.Namespace,
Name: volume.PersistentVolumeClaim.ClaimName,
})
}
}
return item, additionalItems, nil
return item, actionhelpers.RelatedItemsForPod(pod, a.log), nil
}

View File

@ -19,40 +19,24 @@ package actions
import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
rbac "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
v1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerodiscovery "github.com/vmware-tanzu/velero/pkg/discovery"
"github.com/vmware-tanzu/velero/pkg/kuberesource"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
"github.com/vmware-tanzu/velero/pkg/util/actionhelpers"
)
// ServiceAccountAction implements ItemAction.
type ServiceAccountAction struct {
log logrus.FieldLogger
clusterRoleBindings []ClusterRoleBinding
clusterRoleBindings []actionhelpers.ClusterRoleBinding
}
// NewServiceAccountAction creates a new ItemAction for service accounts.
func NewServiceAccountAction(logger logrus.FieldLogger, clusterRoleBindingListers map[string]ClusterRoleBindingLister, discoveryHelper velerodiscovery.Helper) (*ServiceAccountAction, error) {
// Look up the supported RBAC version
var supportedAPI metav1.GroupVersionForDiscovery
for _, ag := range discoveryHelper.APIGroups() {
if ag.Name == rbac.GroupName {
supportedAPI = ag.PreferredVersion
break
}
}
crbLister := clusterRoleBindingListers[supportedAPI.Version]
// This should be safe because the List call will return a 0-item slice
// if there's no matching API version.
crbs, err := crbLister.List()
func NewServiceAccountAction(logger logrus.FieldLogger, clusterRoleBindingListers map[string]actionhelpers.ClusterRoleBindingLister, discoveryHelper velerodiscovery.Helper) (*ServiceAccountAction, error) {
crbs, err := actionhelpers.ClusterRoleBindingsForAction(clusterRoleBindingListers, discoveryHelper)
if err != nil {
return nil, err
}
@ -82,40 +66,5 @@ func (a *ServiceAccountAction) Execute(item runtime.Unstructured, backup *v1.Bac
return nil, nil, errors.WithStack(err)
}
var (
namespace = objectMeta.GetNamespace()
name = objectMeta.GetName()
bindings = sets.NewString()
roles = sets.NewString()
)
for _, crb := range a.clusterRoleBindings {
for _, s := range crb.ServiceAccountSubjects(namespace) {
if s == name {
a.log.Infof("Adding clusterrole %s and clusterrolebinding %s to additionalItems since serviceaccount %s/%s is a subject",
crb.RoleRefName(), crb.Name(), namespace, name)
bindings.Insert(crb.Name())
roles.Insert(crb.RoleRefName())
break
}
}
}
var additionalItems []velero.ResourceIdentifier
for binding := range bindings {
additionalItems = append(additionalItems, velero.ResourceIdentifier{
GroupResource: kuberesource.ClusterRoleBindings,
Name: binding,
})
}
for role := range roles {
additionalItems = append(additionalItems, velero.ResourceIdentifier{
GroupResource: kuberesource.ClusterRoles,
Name: role,
})
}
return item, additionalItems, nil
return item, actionhelpers.RelatedItemsForServiceAccount(objectMeta, a.clusterRoleBindings, a.log), nil
}

View File

@ -31,21 +31,22 @@ import (
"github.com/vmware-tanzu/velero/pkg/kuberesource"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
"github.com/vmware-tanzu/velero/pkg/util/actionhelpers"
)
func newV1ClusterRoleBindingList(rbacCRBList []rbac.ClusterRoleBinding) []ClusterRoleBinding {
var crbs []ClusterRoleBinding
func newV1ClusterRoleBindingList(rbacCRBList []rbac.ClusterRoleBinding) []actionhelpers.ClusterRoleBinding {
var crbs []actionhelpers.ClusterRoleBinding
for _, c := range rbacCRBList {
crbs = append(crbs, v1ClusterRoleBinding{crb: c})
crbs = append(crbs, actionhelpers.V1ClusterRoleBinding{Crb: c})
}
return crbs
}
func newV1beta1ClusterRoleBindingList(rbacCRBList []rbacbeta.ClusterRoleBinding) []ClusterRoleBinding {
var crbs []ClusterRoleBinding
func newV1beta1ClusterRoleBindingList(rbacCRBList []rbacbeta.ClusterRoleBinding) []actionhelpers.ClusterRoleBinding {
var crbs []actionhelpers.ClusterRoleBinding
for _, c := range rbacCRBList {
crbs = append(crbs, v1beta1ClusterRoleBinding{crb: c})
crbs = append(crbs, actionhelpers.V1beta1ClusterRoleBinding{Crb: c})
}
return crbs
@ -55,10 +56,10 @@ type FakeV1ClusterRoleBindingLister struct {
v1crbs []rbac.ClusterRoleBinding
}
func (f FakeV1ClusterRoleBindingLister) List() ([]ClusterRoleBinding, error) {
var crbs []ClusterRoleBinding
func (f FakeV1ClusterRoleBindingLister) List() ([]actionhelpers.ClusterRoleBinding, error) {
var crbs []actionhelpers.ClusterRoleBinding
for _, c := range f.v1crbs {
crbs = append(crbs, v1ClusterRoleBinding{crb: c})
crbs = append(crbs, actionhelpers.V1ClusterRoleBinding{Crb: c})
}
return crbs, nil
}
@ -67,10 +68,10 @@ type FakeV1beta1ClusterRoleBindingLister struct {
v1beta1crbs []rbacbeta.ClusterRoleBinding
}
func (f FakeV1beta1ClusterRoleBindingLister) List() ([]ClusterRoleBinding, error) {
var crbs []ClusterRoleBinding
func (f FakeV1beta1ClusterRoleBindingLister) List() ([]actionhelpers.ClusterRoleBinding, error) {
var crbs []actionhelpers.ClusterRoleBinding
for _, c := range f.v1beta1crbs {
crbs = append(crbs, v1beta1ClusterRoleBinding{crb: c})
crbs = append(crbs, actionhelpers.V1beta1ClusterRoleBinding{Crb: c})
}
return crbs, nil
}
@ -93,21 +94,21 @@ func TestNewServiceAccountAction(t *testing.T) {
tests := []struct {
name string
version string
expectedCRBs []ClusterRoleBinding
expectedCRBs []actionhelpers.ClusterRoleBinding
}{
{
name: "rbac v1 API instantiates an saAction",
version: rbac.SchemeGroupVersion.Version,
expectedCRBs: []ClusterRoleBinding{
v1ClusterRoleBinding{
crb: rbac.ClusterRoleBinding{
expectedCRBs: []actionhelpers.ClusterRoleBinding{
actionhelpers.V1ClusterRoleBinding{
Crb: rbac.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "v1crb-1",
},
},
},
v1ClusterRoleBinding{
crb: rbac.ClusterRoleBinding{
actionhelpers.V1ClusterRoleBinding{
Crb: rbac.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "v1crb-2",
},
@ -118,16 +119,16 @@ func TestNewServiceAccountAction(t *testing.T) {
{
name: "rbac v1beta1 API instantiates an saAction",
version: rbacbeta.SchemeGroupVersion.Version,
expectedCRBs: []ClusterRoleBinding{
v1beta1ClusterRoleBinding{
crb: rbacbeta.ClusterRoleBinding{
expectedCRBs: []actionhelpers.ClusterRoleBinding{
actionhelpers.V1beta1ClusterRoleBinding{
Crb: rbacbeta.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "v1beta1crb-1",
},
},
},
v1beta1ClusterRoleBinding{
crb: rbacbeta.ClusterRoleBinding{
actionhelpers.V1beta1ClusterRoleBinding{
Crb: rbacbeta.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "v1beta1crb-2",
},
@ -138,7 +139,7 @@ func TestNewServiceAccountAction(t *testing.T) {
{
name: "no RBAC API instantiates an saAction with empty slice",
version: "",
expectedCRBs: []ClusterRoleBinding{},
expectedCRBs: []actionhelpers.ClusterRoleBinding{},
},
}
// Set up all of our fakes outside the test loop
@ -171,10 +172,10 @@ func TestNewServiceAccountAction(t *testing.T) {
},
}
clusterRoleBindingListers := map[string]ClusterRoleBindingLister{
clusterRoleBindingListers := map[string]actionhelpers.ClusterRoleBindingLister{
rbac.SchemeGroupVersion.Version: FakeV1ClusterRoleBindingLister{v1crbs: v1crbs},
rbacbeta.SchemeGroupVersion.Version: FakeV1beta1ClusterRoleBindingLister{v1beta1crbs: v1beta1crbs},
"": noopClusterRoleBindingLister{},
"": actionhelpers.NoopClusterRoleBindingLister{},
}
for _, test := range tests {

View File

@ -74,7 +74,7 @@ func TestSortOrderedResource(t *testing.T) {
{namespace: "ns1", name: "pod1"},
}
sortedResources := sortResourcesByOrder(log, podResources, order)
assert.Equal(t, sortedResources, expectedResources)
assert.Equal(t, expectedResources, sortedResources)
// Test cluster resources
pvResources := []*kubernetesResource{
@ -87,7 +87,7 @@ func TestSortOrderedResource(t *testing.T) {
{name: "pv1"},
}
sortedPvResources := sortResourcesByOrder(log, pvResources, pvOrder)
assert.Equal(t, sortedPvResources, expectedPvResources)
assert.Equal(t, expectedPvResources, sortedPvResources)
}
func TestFilterNamespaces(t *testing.T) {

View File

@ -19,6 +19,7 @@ package builder
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/vmware-tanzu/velero/pkg/apis/velero/shared"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
)
@ -111,8 +112,32 @@ func (d *DataDownloadBuilder) ObjectMeta(opts ...ObjectMetaOpt) *DataDownloadBui
return d
}
// Labels sets the DataDownload's Labels.
func (d *DataDownloadBuilder) Labels(labels map[string]string) *DataDownloadBuilder {
d.object.Labels = labels
return d
}
// StartTimestamp sets the DataDownload's StartTimestamp.
func (d *DataDownloadBuilder) StartTimestamp(startTime *metav1.Time) *DataDownloadBuilder {
d.object.Status.StartTimestamp = startTime
return d
}
// CompletionTimestamp sets the DataDownload's StartTimestamp.
func (d *DataDownloadBuilder) CompletionTimestamp(completionTimestamp *metav1.Time) *DataDownloadBuilder {
d.object.Status.CompletionTimestamp = completionTimestamp
return d
}
// Progress sets the DataDownload's Progress.
func (d *DataDownloadBuilder) Progress(progress shared.DataMoveOperationProgress) *DataDownloadBuilder {
d.object.Status.Progress = progress
return d
}
// Node sets the DataDownload's Node.
func (d *DataDownloadBuilder) Node(node string) *DataDownloadBuilder {
d.object.Status.Node = node
return d
}

View File

@ -133,7 +133,14 @@ func (d *DataUploadBuilder) Labels(labels map[string]string) *DataUploadBuilder
return d
}
// Progress sets the DataUpload's Progress.
func (d *DataUploadBuilder) Progress(progress shared.DataMoveOperationProgress) *DataUploadBuilder {
d.object.Status.Progress = progress
return d
}
// Node sets the DataUpload's Node.
func (d *DataUploadBuilder) Node(node string) *DataUploadBuilder {
d.object.Status.Node = node
return d
}

View File

@ -46,7 +46,7 @@ func TestFormattedGitSHA(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
GitSHA = test.sha
GitTreeState = test.state
assert.Equal(t, FormattedGitSHA(), test.expected)
assert.Equal(t, test.expected, FormattedGitSHA())
})
}
}

View File

@ -45,7 +45,7 @@ func TestBuildUserAgent(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
resp := buildUserAgent(test.command, test.version, test.gitSha, test.os, test.arch)
assert.Equal(t, resp, test.expected)
assert.Equal(t, test.expected, resp)
})
}
}

View File

@ -138,7 +138,7 @@ func TestCreateOptions_OrderedResources(t *testing.T) {
"pods": "ns1/p1,ns1/p2",
"persistentvolumeclaims": "ns2/pvc1,ns2/pvc2",
}
assert.Equal(t, orderedResources, expectedResources)
assert.Equal(t, expectedResources, orderedResources)
orderedResources, err = ParseOrderedResources("pods= ns1/p1,ns1/p2 ; persistentvolumes=pv1,pv2")
assert.NoError(t, err)
@ -147,7 +147,7 @@ func TestCreateOptions_OrderedResources(t *testing.T) {
"pods": "ns1/p1,ns1/p2",
"persistentvolumes": "pv1,pv2",
}
assert.Equal(t, orderedResources, expectedMixedResources)
assert.Equal(t, expectedMixedResources, orderedResources)
}
func TestCreateCommand(t *testing.T) {

View File

@ -79,7 +79,7 @@ func TestNewLogsCommand(t *testing.T) {
err = l.Run(c, f)
require.Error(t, err)
require.Contains(t, err.Error(), fmt.Sprintf("logs for backup \"%s\" are not available until it's finished processing", backupName))
require.ErrorContains(t, err, fmt.Sprintf("logs for backup \"%s\" are not available until it's finished processing", backupName))
})
t.Run("Backup not exist test", func(t *testing.T) {

View File

@ -0,0 +1,286 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"context"
"fmt"
"os"
"strings"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"github.com/vmware-tanzu/velero/internal/credentials"
"github.com/vmware-tanzu/velero/pkg/buildinfo"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd/util/signals"
"github.com/vmware-tanzu/velero/pkg/datamover"
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/logging"
ctrl "sigs.k8s.io/controller-runtime"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
ctlcache "sigs.k8s.io/controller-runtime/pkg/cache"
ctlclient "sigs.k8s.io/controller-runtime/pkg/client"
)
type dataMoverBackupConfig struct {
volumePath string
volumeMode string
duName string
resourceTimeout time.Duration
}
func NewBackupCommand(f client.Factory) *cobra.Command {
config := dataMoverBackupConfig{}
logLevelFlag := logging.LogLevelFlag(logrus.InfoLevel)
formatFlag := logging.NewFormatFlag()
command := &cobra.Command{
Use: "backup",
Short: "Run the velero data-mover backup",
Long: "Run the velero data-mover backup",
Hidden: true,
Run: func(c *cobra.Command, args []string) {
logLevel := logLevelFlag.Parse()
logrus.Infof("Setting log-level to %s", strings.ToUpper(logLevel.String()))
logger := logging.DefaultLogger(logLevel, formatFlag.Parse())
logger.Infof("Starting Velero data-mover backup %s (%s)", buildinfo.Version, buildinfo.FormattedGitSHA())
f.SetBasename(fmt.Sprintf("%s-%s", c.Parent().Name(), c.Name()))
s, err := newdataMoverBackup(logger, f, config)
if err != nil {
exitWithMessage(logger, false, "Failed to create data mover backup, %v", err)
}
s.run()
},
}
command.Flags().Var(logLevelFlag, "log-level", fmt.Sprintf("The level at which to log. Valid values are %s.", strings.Join(logLevelFlag.AllowedValues(), ", ")))
command.Flags().Var(formatFlag, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(formatFlag.AllowedValues(), ", ")))
command.Flags().StringVar(&config.volumePath, "volume-path", config.volumePath, "The full path of the volume to be backed up")
command.Flags().StringVar(&config.volumeMode, "volume-mode", config.volumeMode, "The mode of the volume to be backed up")
command.Flags().StringVar(&config.duName, "data-upload", config.duName, "The data upload name")
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters.")
_ = command.MarkFlagRequired("volume-path")
_ = command.MarkFlagRequired("volume-mode")
_ = command.MarkFlagRequired("data-upload")
_ = command.MarkFlagRequired("resource-timeout")
return command
}
const (
// defaultCredentialsDirectory is the path on disk where credential
// files will be written to
defaultCredentialsDirectory = "/tmp/credentials"
)
type dataMoverBackup struct {
logger logrus.FieldLogger
ctx context.Context
cancelFunc context.CancelFunc
client ctlclient.Client
cache ctlcache.Cache
namespace string
nodeName string
config dataMoverBackupConfig
kubeClient kubernetes.Interface
dataPathMgr *datapath.Manager
}
func newdataMoverBackup(logger logrus.FieldLogger, factory client.Factory, config dataMoverBackupConfig) (*dataMoverBackup, error) {
ctx, cancelFunc := context.WithCancel(context.Background())
clientConfig, err := factory.ClientConfig()
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create client config")
}
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))
scheme := runtime.NewScheme()
if err := velerov1api.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to add velero v1 scheme")
}
if err := velerov2alpha1api.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to add velero v2alpha1 scheme")
}
if err := v1.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to add core v1 scheme")
}
nodeName := os.Getenv("NODE_NAME")
// use a field selector to filter to only pods scheduled on this node.
cacheOption := ctlcache.Options{
Scheme: scheme,
ByObject: map[ctlclient.Object]ctlcache.ByObject{
&v1.Pod{}: {
Field: fields.Set{"spec.nodeName": nodeName}.AsSelector(),
},
&velerov2alpha1api.DataUpload{}: {
Field: fields.Set{"metadata.namespace": factory.Namespace()}.AsSelector(),
},
},
}
cli, err := ctlclient.New(clientConfig, ctlclient.Options{
Scheme: scheme,
})
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create client")
}
cache, err := ctlcache.New(clientConfig, cacheOption)
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create client cache")
}
s := &dataMoverBackup{
logger: logger,
ctx: ctx,
cancelFunc: cancelFunc,
client: cli,
cache: cache,
config: config,
namespace: factory.Namespace(),
nodeName: nodeName,
}
s.kubeClient, err = factory.KubeClient()
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create kube client")
}
s.dataPathMgr = datapath.NewManager(1)
return s, nil
}
var funcExitWithMessage = exitWithMessage
var funcCreateDataPathService = (*dataMoverBackup).createDataPathService
func (s *dataMoverBackup) run() {
signals.CancelOnShutdown(s.cancelFunc, s.logger)
go func() {
if err := s.cache.Start(s.ctx); err != nil {
s.logger.WithError(err).Warn("error starting cache")
}
}()
s.runDataPath()
}
func (s *dataMoverBackup) runDataPath() {
s.logger.Infof("Starting micro service in node %s for du %s", s.nodeName, s.config.duName)
dpService, err := funcCreateDataPathService(s)
if err != nil {
s.cancelFunc()
funcExitWithMessage(s.logger, false, "Failed to create data path service for DataUpload %s: %v", s.config.duName, err)
return
}
s.logger.Infof("Starting data path service %s", s.config.duName)
err = dpService.Init()
if err != nil {
dpService.Shutdown()
s.cancelFunc()
funcExitWithMessage(s.logger, false, "Failed to init data path service for DataUpload %s: %v", s.config.duName, err)
return
}
s.logger.Infof("Running data path service %s", s.config.duName)
result, err := dpService.RunCancelableDataPath(s.ctx)
if err != nil {
dpService.Shutdown()
s.cancelFunc()
funcExitWithMessage(s.logger, false, "Failed to run data path service for DataUpload %s: %v", s.config.duName, err)
return
}
s.logger.WithField("du", s.config.duName).Info("Data path service completed")
dpService.Shutdown()
s.logger.WithField("du", s.config.duName).Info("Data path service is shut down")
s.cancelFunc()
funcExitWithMessage(s.logger, true, result)
}
var funcNewCredentialFileStore = credentials.NewNamespacedFileStore
var funcNewCredentialSecretStore = credentials.NewNamespacedSecretStore
func (s *dataMoverBackup) createDataPathService() (dataPathService, error) {
credentialFileStore, err := funcNewCredentialFileStore(
s.client,
s.namespace,
defaultCredentialsDirectory,
filesystem.NewFileSystem(),
)
if err != nil {
return nil, errors.Wrapf(err, "error to create credential file store")
}
credSecretStore, err := funcNewCredentialSecretStore(s.client, s.namespace)
if err != nil {
return nil, errors.Wrapf(err, "error to create credential secret store")
}
credGetter := &credentials.CredentialGetter{FromFile: credentialFileStore, FromSecret: credSecretStore}
duInformer, err := s.cache.GetInformer(s.ctx, &velerov2alpha1api.DataUpload{})
if err != nil {
return nil, errors.Wrap(err, "error to get controller-runtime informer from manager")
}
repoEnsurer := repository.NewEnsurer(s.client, s.logger, s.config.resourceTimeout)
return datamover.NewBackupMicroService(s.ctx, s.client, s.kubeClient, s.config.duName, s.namespace, s.nodeName, datapath.AccessPoint{
ByPath: s.config.volumePath,
VolMode: uploader.PersistentVolumeMode(s.config.volumeMode),
}, s.dataPathMgr, repoEnsurer, credGetter, duInformer, s.logger), nil
}

View File

@ -0,0 +1,216 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"context"
"errors"
"fmt"
"testing"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
ctlclient "sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/internal/credentials"
cacheMock "github.com/vmware-tanzu/velero/pkg/cmd/cli/datamover/mocks"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
)
func fakeCreateDataPathServiceWithErr(_ *dataMoverBackup) (dataPathService, error) {
return nil, errors.New("fake-create-data-path-error")
}
var frHelper *fakeRunHelper
func fakeCreateDataPathService(_ *dataMoverBackup) (dataPathService, error) {
return frHelper, nil
}
type fakeRunHelper struct {
initErr error
runCancelableDataPathErr error
runCancelableDataPathResult string
exitMessage string
succeed bool
}
func (fr *fakeRunHelper) Init() error {
return fr.initErr
}
func (fr *fakeRunHelper) RunCancelableDataPath(_ context.Context) (string, error) {
if fr.runCancelableDataPathErr != nil {
return "", fr.runCancelableDataPathErr
} else {
return fr.runCancelableDataPathResult, nil
}
}
func (fr *fakeRunHelper) Shutdown() {
}
func (fr *fakeRunHelper) ExitWithMessage(logger logrus.FieldLogger, succeed bool, message string, a ...any) {
fr.succeed = succeed
fr.exitMessage = fmt.Sprintf(message, a...)
}
func TestRunDataPath(t *testing.T) {
tests := []struct {
name string
duName string
createDataPathFail bool
initDataPathErr error
runCancelableDataPathErr error
runCancelableDataPathResult string
expectedMessage string
expectedSucceed bool
}{
{
name: "create data path failed",
duName: "fake-name",
createDataPathFail: true,
expectedMessage: "Failed to create data path service for DataUpload fake-name: fake-create-data-path-error",
},
{
name: "init data path failed",
duName: "fake-name",
initDataPathErr: errors.New("fake-init-data-path-error"),
expectedMessage: "Failed to init data path service for DataUpload fake-name: fake-init-data-path-error",
},
{
name: "run data path failed",
duName: "fake-name",
runCancelableDataPathErr: errors.New("fake-run-data-path-error"),
expectedMessage: "Failed to run data path service for DataUpload fake-name: fake-run-data-path-error",
},
{
name: "succeed",
duName: "fake-name",
runCancelableDataPathResult: "fake-run-data-path-result",
expectedMessage: "fake-run-data-path-result",
expectedSucceed: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
frHelper = &fakeRunHelper{
initErr: test.initDataPathErr,
runCancelableDataPathErr: test.runCancelableDataPathErr,
runCancelableDataPathResult: test.runCancelableDataPathResult,
}
if test.createDataPathFail {
funcCreateDataPathService = fakeCreateDataPathServiceWithErr
} else {
funcCreateDataPathService = fakeCreateDataPathService
}
funcExitWithMessage = frHelper.ExitWithMessage
s := &dataMoverBackup{
logger: velerotest.NewLogger(),
cancelFunc: func() {},
config: dataMoverBackupConfig{
duName: test.duName,
},
}
s.runDataPath()
assert.Equal(t, test.expectedMessage, frHelper.exitMessage)
assert.Equal(t, test.expectedSucceed, frHelper.succeed)
})
}
}
type fakeCreateDataPathServiceHelper struct {
fileStoreErr error
secretStoreErr error
}
func (fc *fakeCreateDataPathServiceHelper) NewNamespacedFileStore(_ ctlclient.Client, _ string, _ string, _ filesystem.Interface) (credentials.FileStore, error) {
return nil, fc.fileStoreErr
}
func (fc *fakeCreateDataPathServiceHelper) NewNamespacedSecretStore(_ ctlclient.Client, _ string) (credentials.SecretStore, error) {
return nil, fc.secretStoreErr
}
func TestCreateDataPathService(t *testing.T) {
tests := []struct {
name string
fileStoreErr error
secretStoreErr error
mockGetInformer bool
getInformerErr error
expectedError string
}{
{
name: "create credential file store error",
fileStoreErr: errors.New("fake-file-store-error"),
expectedError: "error to create credential file store: fake-file-store-error",
},
{
name: "create credential secret store",
secretStoreErr: errors.New("fake-secret-store-error"),
expectedError: "error to create credential secret store: fake-secret-store-error",
},
{
name: "get informer error",
mockGetInformer: true,
getInformerErr: errors.New("fake-get-informer-error"),
expectedError: "error to get controller-runtime informer from manager: fake-get-informer-error",
},
{
name: "succeed",
mockGetInformer: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fcHelper := &fakeCreateDataPathServiceHelper{
fileStoreErr: test.fileStoreErr,
secretStoreErr: test.secretStoreErr,
}
funcNewCredentialFileStore = fcHelper.NewNamespacedFileStore
funcNewCredentialSecretStore = fcHelper.NewNamespacedSecretStore
cache := cacheMock.NewCache(t)
if test.mockGetInformer {
cache.On("GetInformer", mock.Anything, mock.Anything).Return(nil, test.getInformerErr)
}
funcExitWithMessage = frHelper.ExitWithMessage
s := &dataMoverBackup{
cache: cache,
}
_, err := s.createDataPathService()
if test.expectedError != "" {
assert.EqualError(t, err, test.expectedError)
} else {
assert.NoError(t, err)
}
})
}
}

View File

@ -0,0 +1,74 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"context"
"fmt"
"os"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/vmware-tanzu/velero/pkg/client"
)
func NewCommand(f client.Factory) *cobra.Command {
command := &cobra.Command{
Use: "data-mover",
Short: "Run the velero data-mover",
Long: "Run the velero data-mover",
Hidden: true,
}
command.AddCommand(
NewBackupCommand(f),
NewRestoreCommand(f),
)
return command
}
type dataPathService interface {
Init() error
RunCancelableDataPath(context.Context) (string, error)
Shutdown()
}
var funcExit = os.Exit
var funcCreateFile = os.Create
func exitWithMessage(logger logrus.FieldLogger, succeed bool, message string, a ...any) {
exitCode := 0
if !succeed {
exitCode = 1
}
toWrite := fmt.Sprintf(message, a...)
podFile, err := funcCreateFile("/dev/termination-log")
if err != nil {
logger.WithError(err).Error("Failed to create termination log file")
exitCode = 1
} else {
if _, err := podFile.WriteString(toWrite); err != nil {
logger.WithError(err).Error("Failed to write error to termination log file")
exitCode = 1
}
podFile.Close()
}
funcExit(exitCode)
}

View File

@ -0,0 +1,131 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"errors"
"fmt"
"io"
"os"
"path/filepath"
"testing"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
)
type exitWithMessageMock struct {
createErr error
writeFail bool
filePath string
exitCode int
}
func (em *exitWithMessageMock) Exit(code int) {
em.exitCode = code
}
func (em *exitWithMessageMock) CreateFile(name string) (*os.File, error) {
if em.createErr != nil {
return nil, em.createErr
}
if em.writeFail {
return os.OpenFile(em.filePath, os.O_CREATE|os.O_RDONLY, 0500)
} else {
return os.Create(em.filePath)
}
}
func TestExitWithMessage(t *testing.T) {
tests := []struct {
name string
message string
succeed bool
args []interface{}
createErr error
writeFail bool
expectedExitCode int
expectedMessage string
}{
{
name: "create pod file failed",
createErr: errors.New("fake-create-file-error"),
succeed: true,
expectedExitCode: 1,
},
{
name: "write pod file failed",
writeFail: true,
succeed: true,
expectedExitCode: 1,
},
{
name: "not succeed",
message: "fake-message-1, arg-1 %s, arg-2 %v, arg-3 %v",
args: []interface{}{
"arg-1-1",
10,
false,
},
expectedExitCode: 1,
expectedMessage: fmt.Sprintf("fake-message-1, arg-1 %s, arg-2 %v, arg-3 %v", "arg-1-1", 10, false),
},
{
name: "not succeed",
message: "fake-message-2, arg-1 %s, arg-2 %v, arg-3 %v",
args: []interface{}{
"arg-1-2",
20,
true,
},
succeed: true,
expectedMessage: fmt.Sprintf("fake-message-2, arg-1 %s, arg-2 %v, arg-3 %v", "arg-1-2", 20, true),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
podFile := filepath.Join(os.TempDir(), uuid.NewString())
em := exitWithMessageMock{
createErr: test.createErr,
writeFail: test.writeFail,
filePath: podFile,
}
funcExit = em.Exit
funcCreateFile = em.CreateFile
exitWithMessage(velerotest.NewLogger(), test.succeed, test.message, test.args...)
assert.Equal(t, test.expectedExitCode, em.exitCode)
if test.createErr == nil && !test.writeFail {
reader, err := os.Open(podFile)
require.NoError(t, err)
message, err := io.ReadAll(reader)
require.NoError(t, err)
reader.Close()
assert.Equal(t, test.expectedMessage, string(message))
}
})
}
}

View File

@ -0,0 +1,231 @@
// Code generated by mockery v2.39.1. DO NOT EDIT.
package mocks
import (
cache "sigs.k8s.io/controller-runtime/pkg/cache"
client "sigs.k8s.io/controller-runtime/pkg/client"
context "context"
mock "github.com/stretchr/testify/mock"
schema "k8s.io/apimachinery/pkg/runtime/schema"
types "k8s.io/apimachinery/pkg/types"
)
// Cache is an autogenerated mock type for the Cache type
type Cache struct {
mock.Mock
}
// Get provides a mock function with given fields: ctx, key, obj, opts
func (_m *Cache) Get(ctx context.Context, key types.NamespacedName, obj client.Object, opts ...client.GetOption) error {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, key, obj)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
if len(ret) == 0 {
panic("no return value specified for Get")
}
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, types.NamespacedName, client.Object, ...client.GetOption) error); ok {
r0 = rf(ctx, key, obj, opts...)
} else {
r0 = ret.Error(0)
}
return r0
}
// GetInformer provides a mock function with given fields: ctx, obj, opts
func (_m *Cache) GetInformer(ctx context.Context, obj client.Object, opts ...cache.InformerGetOption) (cache.Informer, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, obj)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
if len(ret) == 0 {
panic("no return value specified for GetInformer")
}
var r0 cache.Informer
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...cache.InformerGetOption) (cache.Informer, error)); ok {
return rf(ctx, obj, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...cache.InformerGetOption) cache.Informer); ok {
r0 = rf(ctx, obj, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(cache.Informer)
}
}
if rf, ok := ret.Get(1).(func(context.Context, client.Object, ...cache.InformerGetOption) error); ok {
r1 = rf(ctx, obj, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// GetInformerForKind provides a mock function with given fields: ctx, gvk, opts
func (_m *Cache) GetInformerForKind(ctx context.Context, gvk schema.GroupVersionKind, opts ...cache.InformerGetOption) (cache.Informer, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, gvk)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
if len(ret) == 0 {
panic("no return value specified for GetInformerForKind")
}
var r0 cache.Informer
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, schema.GroupVersionKind, ...cache.InformerGetOption) (cache.Informer, error)); ok {
return rf(ctx, gvk, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, schema.GroupVersionKind, ...cache.InformerGetOption) cache.Informer); ok {
r0 = rf(ctx, gvk, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(cache.Informer)
}
}
if rf, ok := ret.Get(1).(func(context.Context, schema.GroupVersionKind, ...cache.InformerGetOption) error); ok {
r1 = rf(ctx, gvk, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// IndexField provides a mock function with given fields: ctx, obj, field, extractValue
func (_m *Cache) IndexField(ctx context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error {
ret := _m.Called(ctx, obj, field, extractValue)
if len(ret) == 0 {
panic("no return value specified for IndexField")
}
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, client.Object, string, client.IndexerFunc) error); ok {
r0 = rf(ctx, obj, field, extractValue)
} else {
r0 = ret.Error(0)
}
return r0
}
// List provides a mock function with given fields: ctx, list, opts
func (_m *Cache) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, list)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
if len(ret) == 0 {
panic("no return value specified for List")
}
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, client.ObjectList, ...client.ListOption) error); ok {
r0 = rf(ctx, list, opts...)
} else {
r0 = ret.Error(0)
}
return r0
}
// RemoveInformer provides a mock function with given fields: ctx, obj
func (_m *Cache) RemoveInformer(ctx context.Context, obj client.Object) error {
ret := _m.Called(ctx, obj)
if len(ret) == 0 {
panic("no return value specified for RemoveInformer")
}
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, client.Object) error); ok {
r0 = rf(ctx, obj)
} else {
r0 = ret.Error(0)
}
return r0
}
// Start provides a mock function with given fields: ctx
func (_m *Cache) Start(ctx context.Context) error {
ret := _m.Called(ctx)
if len(ret) == 0 {
panic("no return value specified for Start")
}
var r0 error
if rf, ok := ret.Get(0).(func(context.Context) error); ok {
r0 = rf(ctx)
} else {
r0 = ret.Error(0)
}
return r0
}
// WaitForCacheSync provides a mock function with given fields: ctx
func (_m *Cache) WaitForCacheSync(ctx context.Context) bool {
ret := _m.Called(ctx)
if len(ret) == 0 {
panic("no return value specified for WaitForCacheSync")
}
var r0 bool
if rf, ok := ret.Get(0).(func(context.Context) bool); ok {
r0 = rf(ctx)
} else {
r0 = ret.Get(0).(bool)
}
return r0
}
// NewCache creates a new instance of Cache. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewCache(t interface {
mock.TestingT
Cleanup(func())
}) *Cache {
mock := &Cache{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}

View File

@ -0,0 +1,272 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"context"
"fmt"
"os"
"strings"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/buildinfo"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd/util/signals"
"github.com/vmware-tanzu/velero/pkg/datamover"
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/logging"
ctlcache "sigs.k8s.io/controller-runtime/pkg/cache"
ctlclient "sigs.k8s.io/controller-runtime/pkg/client"
)
type dataMoverRestoreConfig struct {
volumePath string
volumeMode string
ddName string
resourceTimeout time.Duration
}
func NewRestoreCommand(f client.Factory) *cobra.Command {
logLevelFlag := logging.LogLevelFlag(logrus.InfoLevel)
formatFlag := logging.NewFormatFlag()
config := dataMoverRestoreConfig{}
command := &cobra.Command{
Use: "restore",
Short: "Run the velero data-mover restore",
Long: "Run the velero data-mover restore",
Hidden: true,
Run: func(c *cobra.Command, args []string) {
logLevel := logLevelFlag.Parse()
logrus.Infof("Setting log-level to %s", strings.ToUpper(logLevel.String()))
logger := logging.DefaultLogger(logLevel, formatFlag.Parse())
logger.Infof("Starting Velero data-mover restore %s (%s)", buildinfo.Version, buildinfo.FormattedGitSHA())
f.SetBasename(fmt.Sprintf("%s-%s", c.Parent().Name(), c.Name()))
s, err := newdataMoverRestore(logger, f, config)
if err != nil {
exitWithMessage(logger, false, "Failed to create data mover restore, %v", err)
}
s.run()
},
}
command.Flags().Var(logLevelFlag, "log-level", fmt.Sprintf("The level at which to log. Valid values are %s.", strings.Join(logLevelFlag.AllowedValues(), ", ")))
command.Flags().Var(formatFlag, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(formatFlag.AllowedValues(), ", ")))
command.Flags().StringVar(&config.volumePath, "volume-path", config.volumePath, "The full path of the volume to be restored")
command.Flags().StringVar(&config.volumeMode, "volume-mode", config.volumeMode, "The mode of the volume to be restored")
command.Flags().StringVar(&config.ddName, "data-download", config.ddName, "The data download name")
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters.")
_ = command.MarkFlagRequired("volume-path")
_ = command.MarkFlagRequired("volume-mode")
_ = command.MarkFlagRequired("data-download")
_ = command.MarkFlagRequired("resource-timeout")
return command
}
type dataMoverRestore struct {
logger logrus.FieldLogger
ctx context.Context
cancelFunc context.CancelFunc
client ctlclient.Client
cache ctlcache.Cache
namespace string
nodeName string
config dataMoverRestoreConfig
kubeClient kubernetes.Interface
dataPathMgr *datapath.Manager
}
func newdataMoverRestore(logger logrus.FieldLogger, factory client.Factory, config dataMoverRestoreConfig) (*dataMoverRestore, error) {
ctx, cancelFunc := context.WithCancel(context.Background())
clientConfig, err := factory.ClientConfig()
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create client config")
}
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))
scheme := runtime.NewScheme()
if err := velerov1api.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to add velero v1 scheme")
}
if err := velerov2alpha1api.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to add velero v2alpha1 scheme")
}
if err := v1.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to add core v1 scheme")
}
nodeName := os.Getenv("NODE_NAME")
// use a field selector to filter to only pods scheduled on this node.
cacheOption := ctlcache.Options{
Scheme: scheme,
ByObject: map[ctlclient.Object]ctlcache.ByObject{
&v1.Pod{}: {
Field: fields.Set{"spec.nodeName": nodeName}.AsSelector(),
},
&velerov2alpha1api.DataDownload{}: {
Field: fields.Set{"metadata.namespace": factory.Namespace()}.AsSelector(),
},
},
}
cli, err := ctlclient.New(clientConfig, ctlclient.Options{
Scheme: scheme,
})
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create client")
}
cache, err := ctlcache.New(clientConfig, cacheOption)
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create client cache")
}
s := &dataMoverRestore{
logger: logger,
ctx: ctx,
cancelFunc: cancelFunc,
client: cli,
cache: cache,
config: config,
namespace: factory.Namespace(),
nodeName: nodeName,
}
s.kubeClient, err = factory.KubeClient()
if err != nil {
cancelFunc()
return nil, errors.Wrap(err, "error to create kube client")
}
s.dataPathMgr = datapath.NewManager(1)
return s, nil
}
var funcCreateDataPathRestore = (*dataMoverRestore).createDataPathService
func (s *dataMoverRestore) run() {
signals.CancelOnShutdown(s.cancelFunc, s.logger)
go func() {
if err := s.cache.Start(s.ctx); err != nil {
s.logger.WithError(err).Warn("error starting cache")
}
}()
s.runDataPath()
}
func (s *dataMoverRestore) runDataPath() {
s.logger.Infof("Starting micro service in node %s for dd %s", s.nodeName, s.config.ddName)
dpService, err := funcCreateDataPathRestore(s)
if err != nil {
s.cancelFunc()
funcExitWithMessage(s.logger, false, "Failed to create data path service for DataDownload %s: %v", s.config.ddName, err)
return
}
s.logger.Infof("Starting data path service %s", s.config.ddName)
err = dpService.Init()
if err != nil {
dpService.Shutdown()
s.cancelFunc()
funcExitWithMessage(s.logger, false, "Failed to init data path service for DataDownload %s: %v", s.config.ddName, err)
return
}
result, err := dpService.RunCancelableDataPath(s.ctx)
if err != nil {
dpService.Shutdown()
s.cancelFunc()
funcExitWithMessage(s.logger, false, "Failed to run data path service for DataDownload %s: %v", s.config.ddName, err)
return
}
s.logger.WithField("dd", s.config.ddName).Info("Data path service completed")
dpService.Shutdown()
s.logger.WithField("dd", s.config.ddName).Info("Data path service is shut down")
s.cancelFunc()
funcExitWithMessage(s.logger, true, result)
}
func (s *dataMoverRestore) createDataPathService() (dataPathService, error) {
credentialFileStore, err := funcNewCredentialFileStore(
s.client,
s.namespace,
defaultCredentialsDirectory,
filesystem.NewFileSystem(),
)
if err != nil {
return nil, errors.Wrapf(err, "error to create credential file store")
}
credSecretStore, err := funcNewCredentialSecretStore(s.client, s.namespace)
if err != nil {
return nil, errors.Wrapf(err, "error to create credential secret store")
}
credGetter := &credentials.CredentialGetter{FromFile: credentialFileStore, FromSecret: credSecretStore}
duInformer, err := s.cache.GetInformer(s.ctx, &velerov2alpha1api.DataDownload{})
if err != nil {
return nil, errors.Wrap(err, "error to get controller-runtime informer from manager")
}
repoEnsurer := repository.NewEnsurer(s.client, s.logger, s.config.resourceTimeout)
return datamover.NewRestoreMicroService(s.ctx, s.client, s.kubeClient, s.config.ddName, s.namespace, s.nodeName, datapath.AccessPoint{
ByPath: s.config.volumePath,
VolMode: uploader.PersistentVolumeMode(s.config.volumeMode),
}, s.dataPathMgr, repoEnsurer, credGetter, duInformer, s.logger), nil
}

View File

@ -0,0 +1,166 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"errors"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
cacheMock "github.com/vmware-tanzu/velero/pkg/cmd/cli/datamover/mocks"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
)
func fakeCreateDataPathRestoreWithErr(_ *dataMoverRestore) (dataPathService, error) {
return nil, errors.New("fake-create-data-path-error")
}
func fakeCreateDataPathRestore(_ *dataMoverRestore) (dataPathService, error) {
return frHelper, nil
}
func TestRunDataPathRestore(t *testing.T) {
tests := []struct {
name string
ddName string
createDataPathFail bool
initDataPathErr error
runCancelableDataPathErr error
runCancelableDataPathResult string
expectedMessage string
expectedSucceed bool
}{
{
name: "create data path failed",
ddName: "fake-name",
createDataPathFail: true,
expectedMessage: "Failed to create data path service for DataDownload fake-name: fake-create-data-path-error",
},
{
name: "init data path failed",
ddName: "fake-name",
initDataPathErr: errors.New("fake-init-data-path-error"),
expectedMessage: "Failed to init data path service for DataDownload fake-name: fake-init-data-path-error",
},
{
name: "run data path failed",
ddName: "fake-name",
runCancelableDataPathErr: errors.New("fake-run-data-path-error"),
expectedMessage: "Failed to run data path service for DataDownload fake-name: fake-run-data-path-error",
},
{
name: "succeed",
ddName: "fake-name",
runCancelableDataPathResult: "fake-run-data-path-result",
expectedMessage: "fake-run-data-path-result",
expectedSucceed: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
frHelper = &fakeRunHelper{
initErr: test.initDataPathErr,
runCancelableDataPathErr: test.runCancelableDataPathErr,
runCancelableDataPathResult: test.runCancelableDataPathResult,
}
if test.createDataPathFail {
funcCreateDataPathRestore = fakeCreateDataPathRestoreWithErr
} else {
funcCreateDataPathRestore = fakeCreateDataPathRestore
}
funcExitWithMessage = frHelper.ExitWithMessage
s := &dataMoverRestore{
logger: velerotest.NewLogger(),
cancelFunc: func() {},
config: dataMoverRestoreConfig{
ddName: test.ddName,
},
}
s.runDataPath()
assert.Equal(t, test.expectedMessage, frHelper.exitMessage)
assert.Equal(t, test.expectedSucceed, frHelper.succeed)
})
}
}
func TestCreateDataPathRestore(t *testing.T) {
tests := []struct {
name string
fileStoreErr error
secretStoreErr error
mockGetInformer bool
getInformerErr error
expectedError string
}{
{
name: "create credential file store error",
fileStoreErr: errors.New("fake-file-store-error"),
expectedError: "error to create credential file store: fake-file-store-error",
},
{
name: "create credential secret store",
secretStoreErr: errors.New("fake-secret-store-error"),
expectedError: "error to create credential secret store: fake-secret-store-error",
},
{
name: "get informer error",
mockGetInformer: true,
getInformerErr: errors.New("fake-get-informer-error"),
expectedError: "error to get controller-runtime informer from manager: fake-get-informer-error",
},
{
name: "succeed",
mockGetInformer: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fcHelper := &fakeCreateDataPathServiceHelper{
fileStoreErr: test.fileStoreErr,
secretStoreErr: test.secretStoreErr,
}
funcNewCredentialFileStore = fcHelper.NewNamespacedFileStore
funcNewCredentialSecretStore = fcHelper.NewNamespacedSecretStore
cache := cacheMock.NewCache(t)
if test.mockGetInformer {
cache.On("GetInformer", mock.Anything, mock.Anything).Return(nil, test.getInformerErr)
}
funcExitWithMessage = frHelper.ExitWithMessage
s := &dataMoverRestore{
cache: cache,
}
_, err := s.createDataPathService()
if test.expectedError != "" {
assert.EqualError(t, err, test.expectedError)
} else {
assert.NoError(t, err)
}
})
}
}

View File

@ -364,8 +364,10 @@ func (o *Options) Validate(c *cobra.Command, args []string, f client.Factory) er
return err
}
if err := uploader.ValidateUploaderType(o.UploaderType); err != nil {
if msg, err := uploader.ValidateUploaderType(o.UploaderType); err != nil {
return err
} else if msg != "" {
fmt.Printf("⚠️ %s\n", msg)
}
// If we're only installing CRDs, we can skip the rest of the validation.

View File

@ -60,7 +60,10 @@ import (
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
cacheutil "k8s.io/client-go/tools/cache"
)
var (
@ -84,6 +87,7 @@ type nodeAgentServerConfig struct {
metricsAddress string
resourceTimeout time.Duration
dataMoverPrepareTimeout time.Duration
nodeAgentConfig string
}
func NewServerCommand(f client.Factory) *cobra.Command {
@ -104,7 +108,7 @@ func NewServerCommand(f client.Factory) *cobra.Command {
logLevel := logLevelFlag.Parse()
logrus.Infof("Setting log-level to %s", strings.ToUpper(logLevel.String()))
logger := logging.DefaultLogger(logLevel, formatFlag.Parse())
logger := logging.DefaultMergeLogger(logLevel, formatFlag.Parse())
logger.Infof("Starting Velero node-agent server %s (%s)", buildinfo.Version, buildinfo.FormattedGitSHA())
f.SetBasename(fmt.Sprintf("%s-%s", c.Parent().Name(), c.Name()))
@ -120,6 +124,7 @@ func NewServerCommand(f client.Factory) *cobra.Command {
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics")
command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-config", config.nodeAgentConfig, "The name of configMap containing node-agent configurations.")
return command
}
@ -189,6 +194,9 @@ func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, confi
&velerov2alpha1api.DataDownload{}: {
Field: fields.Set{"metadata.namespace": factory.Namespace()}.AsSelector(),
},
&v1.Event{}: {
Field: fields.Set{"metadata.namespace": factory.Namespace()}.AsSelector(),
},
},
}
mgr, err := ctrl.NewManager(clientConfig, ctrl.Options{
@ -288,19 +296,50 @@ func (s *nodeAgentServer) run() {
var loadAffinity *nodeagent.LoadAffinity
if s.dataPathConfigs != nil && len(s.dataPathConfigs.LoadAffinity) > 0 {
loadAffinity = s.dataPathConfigs.LoadAffinity[0]
s.logger.Infof("Using customized loadAffinity %v", loadAffinity)
}
dataUploadReconciler := controller.NewDataUploadReconciler(s.mgr.GetClient(), s.kubeClient, s.csiSnapshotClient.SnapshotV1(), s.dataPathMgr, loadAffinity, repoEnsurer, clock.RealClock{}, credentialGetter, s.nodeName, s.fileSystem, s.config.dataMoverPrepareTimeout, s.logger, s.metrics)
s.attemptDataUploadResume(dataUploadReconciler)
var backupPVCConfig map[string]nodeagent.BackupPVC
if s.dataPathConfigs != nil && s.dataPathConfigs.BackupPVCConfig != nil {
backupPVCConfig = s.dataPathConfigs.BackupPVCConfig
s.logger.Infof("Using customized backupPVC config %v", backupPVCConfig)
}
podResources := v1.ResourceRequirements{}
if s.dataPathConfigs != nil && s.dataPathConfigs.PodResources != nil {
if res, err := kube.ParseResourceRequirements(s.dataPathConfigs.PodResources.CPURequest, s.dataPathConfigs.PodResources.MemoryRequest, s.dataPathConfigs.PodResources.CPULimit, s.dataPathConfigs.PodResources.MemoryLimit); err != nil {
s.logger.WithError(err).Warn("Pod resource requirements are invalid, ignore")
} else {
podResources = res
s.logger.Infof("Using customized pod resource requirements %v", s.dataPathConfigs.PodResources)
}
}
dataUploadReconciler := controller.NewDataUploadReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.csiSnapshotClient.SnapshotV1(), s.dataPathMgr, loadAffinity, backupPVCConfig, podResources, clock.RealClock{}, s.nodeName, s.config.dataMoverPrepareTimeout, s.logger, s.metrics)
if err = dataUploadReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.WithError(err).Fatal("Unable to create the data upload controller")
}
dataDownloadReconciler := controller.NewDataDownloadReconciler(s.mgr.GetClient(), s.kubeClient, s.dataPathMgr, repoEnsurer, credentialGetter, s.nodeName, s.config.dataMoverPrepareTimeout, s.logger, s.metrics)
s.attemptDataDownloadResume(dataDownloadReconciler)
dataDownloadReconciler := controller.NewDataDownloadReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, podResources, s.nodeName, s.config.dataMoverPrepareTimeout, s.logger, s.metrics)
if err = dataDownloadReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.WithError(err).Fatal("Unable to create the data download controller")
}
go func() {
if err := s.waitCacheForResume(); err != nil {
s.logger.WithError(err).Error("Failed to wait cache for resume, will not resume DU/DD")
return
}
if err := dataUploadReconciler.AttemptDataUploadResume(s.ctx, s.logger.WithField("node", s.nodeName), s.namespace); err != nil {
s.logger.WithError(errors.WithStack(err)).Error("Failed to attempt data upload resume")
}
if err := dataDownloadReconciler.AttemptDataDownloadResume(s.ctx, s.logger.WithField("node", s.nodeName), s.namespace); err != nil {
s.logger.WithError(errors.WithStack(err)).Error("Failed to attempt data download resume")
}
}()
s.logger.Info("Controllers starting...")
if err := s.mgr.Start(ctrl.SetupSignalHandler()); err != nil {
@ -308,6 +347,29 @@ func (s *nodeAgentServer) run() {
}
}
func (s *nodeAgentServer) waitCacheForResume() error {
podInformer, err := s.mgr.GetCache().GetInformer(s.ctx, &v1.Pod{})
if err != nil {
return errors.Wrap(err, "error getting pod informer")
}
duInformer, err := s.mgr.GetCache().GetInformer(s.ctx, &velerov2alpha1api.DataUpload{})
if err != nil {
return errors.Wrap(err, "error getting du informer")
}
ddInformer, err := s.mgr.GetCache().GetInformer(s.ctx, &velerov2alpha1api.DataDownload{})
if err != nil {
return errors.Wrap(err, "error getting dd informer")
}
if !cacheutil.WaitForCacheSync(s.ctx.Done(), podInformer.HasSynced, duInformer.HasSynced, ddInformer.HasSynced) {
return errors.New("error waiting informer synced")
}
return nil
}
// validatePodVolumesHostPath validates that the pod volumes path contains a
// directory for each Pod running on this node
func (s *nodeAgentServer) validatePodVolumesHostPath(client kubernetes.Interface) error {
@ -370,31 +432,6 @@ func (s *nodeAgentServer) markInProgressCRsFailed() {
s.markInProgressPVRsFailed(client)
}
func (s *nodeAgentServer) attemptDataUploadResume(r *controller.DataUploadReconciler) {
// the function is called before starting the controller manager, the embedded client isn't ready to use, so create a new one here
client, err := ctrlclient.New(s.mgr.GetConfig(), ctrlclient.Options{Scheme: s.mgr.GetScheme()})
if err != nil {
s.logger.WithError(errors.WithStack(err)).Error("failed to create client")
return
}
if err := r.AttemptDataUploadResume(s.ctx, client, s.logger.WithField("node", s.nodeName), s.namespace); err != nil {
s.logger.WithError(errors.WithStack(err)).Error("failed to attempt data upload resume")
}
}
func (s *nodeAgentServer) attemptDataDownloadResume(r *controller.DataDownloadReconciler) {
// the function is called before starting the controller manager, the embedded client isn't ready to use, so create a new one here
client, err := ctrlclient.New(s.mgr.GetConfig(), ctrlclient.Options{Scheme: s.mgr.GetScheme()})
if err != nil {
s.logger.WithError(errors.WithStack(err)).Error("failed to create client")
return
}
if err := r.AttemptDataDownloadResume(s.ctx, client, s.logger.WithField("node", s.nodeName), s.namespace); err != nil {
s.logger.WithError(errors.WithStack(err)).Error("failed to attempt data download resume")
}
}
func (s *nodeAgentServer) markInProgressPVBsFailed(client ctrlclient.Client) {
pvbs := &velerov1api.PodVolumeBackupList{}
if err := client.List(s.ctx, pvbs, &ctrlclient.ListOptions{Namespace: s.namespace}); err != nil {
@ -412,7 +449,7 @@ func (s *nodeAgentServer) markInProgressPVBsFailed(client ctrlclient.Client) {
}
if err := controller.UpdatePVBStatusToFailed(s.ctx, client, &pvbs.Items[i],
fmt.Errorf("get a podvolumebackup with status %q during the server starting, mark it as %q", velerov1api.PodVolumeBackupPhaseInProgress, velerov1api.PodVolumeBackupPhaseFailed),
fmt.Errorf("found a podvolumebackup with status %q during the server starting, mark it as %q", velerov1api.PodVolumeBackupPhaseInProgress, velerov1api.PodVolumeBackupPhaseFailed),
"", time.Now(), s.logger); err != nil {
s.logger.WithError(errors.WithStack(err)).Errorf("failed to patch podvolumebackup %q", pvb.GetName())
continue
@ -460,14 +497,14 @@ func (s *nodeAgentServer) markInProgressPVRsFailed(client ctrlclient.Client) {
var getConfigsFunc = nodeagent.GetConfigs
func (s *nodeAgentServer) getDataPathConfigs() {
configs, err := getConfigsFunc(s.ctx, s.namespace, s.kubeClient)
if err != nil {
s.logger.WithError(err).Warn("Failed to get node agent configs")
if s.config.nodeAgentConfig == "" {
s.logger.Info("No node-agent configMap is specified")
return
}
if configs == nil {
s.logger.Infof("Node agent configs are not found")
configs, err := getConfigsFunc(s.ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig)
if err != nil {
s.logger.WithError(err).Warnf("Failed to get node agent configs from configMap %s, ignore it", s.config.nodeAgentConfig)
return
}

View File

@ -17,13 +17,12 @@ package nodeagent
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -123,28 +122,36 @@ func Test_getDataPathConfigs(t *testing.T) {
tests := []struct {
name string
getFunc func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error)
getFunc func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error)
configMapName string
expectConfigs *nodeagent.Configs
expectLog string
}{
{
name: "failed to get configs",
getFunc: func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error) {
return nil, errors.New("fake-get-error")
},
expectLog: "Failed to get node agent configs",
name: "no config specified",
expectLog: "No node-agent configMap is specified",
},
{
name: "configs cm not found",
getFunc: func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error) {
return nil, nil
name: "failed to get configs",
configMapName: "node-agent-config",
getFunc: func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error) {
return nil, errors.New("fake-get-error")
},
expectLog: "Node agent configs are not found",
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
},
{
name: "configs cm not found",
configMapName: "node-agent-config",
getFunc: func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error) {
return nil, errors.New("fake-not-found-error")
},
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
},
{
name: "succeed",
getFunc: func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error) {
name: "succeed",
configMapName: "node-agent-config",
getFunc: func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error) {
return configs, nil
},
expectConfigs: configs,
@ -156,6 +163,9 @@ func Test_getDataPathConfigs(t *testing.T) {
logBuffer := ""
s := &nodeAgentServer{
config: nodeAgentServerConfig{
nodeAgentConfig: test.configMapName,
},
logger: testutil.NewSingleLogger(&logBuffer),
}
@ -166,7 +176,7 @@ func Test_getDataPathConfigs(t *testing.T) {
if test.expectLog == "" {
assert.Equal(t, "", logBuffer)
} else {
assert.True(t, strings.Contains(logBuffer, test.expectLog))
assert.Contains(t, logBuffer, test.expectLog)
}
})
}
@ -384,7 +394,7 @@ func Test_getDataPathConcurrentNum(t *testing.T) {
if test.expectLog == "" {
assert.Equal(t, "", logBuffer)
} else {
assert.True(t, strings.Contains(logBuffer, test.expectLog))
assert.Contains(t, logBuffer, test.expectLog)
}
})
}

View File

@ -30,10 +30,12 @@ import (
"github.com/vmware-tanzu/velero/pkg/client"
velerodiscovery "github.com/vmware-tanzu/velero/pkg/discovery"
"github.com/vmware-tanzu/velero/pkg/features"
iba "github.com/vmware-tanzu/velero/pkg/itemblock/actions"
veleroplugin "github.com/vmware-tanzu/velero/pkg/plugin/framework"
plugincommon "github.com/vmware-tanzu/velero/pkg/plugin/framework/common"
ria "github.com/vmware-tanzu/velero/pkg/restore/actions"
csiria "github.com/vmware-tanzu/velero/pkg/restore/actions/csi"
"github.com/vmware-tanzu/velero/pkg/util/actionhelpers"
)
func NewCommand(f client.Factory) *cobra.Command {
@ -171,6 +173,18 @@ func NewCommand(f client.Factory) *cobra.Command {
RegisterRestoreItemActionV2(
"velero.io/csi-volumesnapshotclass-restorer",
newVolumeSnapshotClassRestoreItemAction,
).
RegisterItemBlockAction(
"velero.io/pvc",
newPVCItemBlockAction(f),
).
RegisterItemBlockAction(
"velero.io/pod",
newPodItemBlockAction,
).
RegisterItemBlockAction(
"velero.io/service-account",
newServiceAccountItemBlockAction(f),
)
if !features.IsEnabled(velerov1api.APIGroupVersionsFeatureFlag) {
@ -211,7 +225,7 @@ func newServiceAccountBackupItemAction(f client.Factory) plugincommon.HandlerIni
action, err := bia.NewServiceAccountAction(
logger,
bia.NewClusterRoleBindingListerMap(clientset),
actionhelpers.NewClusterRoleBindingListerMap(clientset),
discoveryHelper)
if err != nil {
return nil, err
@ -431,3 +445,38 @@ func newVolumeSnapshotContentRestoreItemAction(logger logrus.FieldLogger) (inter
func newVolumeSnapshotClassRestoreItemAction(logger logrus.FieldLogger) (interface{}, error) {
return csiria.NewVolumeSnapshotClassRestoreItemAction(logger)
}
// ItemBlockAction plugins
func newPVCItemBlockAction(f client.Factory) plugincommon.HandlerInitializer {
return iba.NewPVCAction(f)
}
func newPodItemBlockAction(logger logrus.FieldLogger) (interface{}, error) {
return iba.NewPodAction(logger), nil
}
func newServiceAccountItemBlockAction(f client.Factory) plugincommon.HandlerInitializer {
return func(logger logrus.FieldLogger) (interface{}, error) {
// TODO(ncdc): consider a k8s style WantsKubernetesClientSet initialization approach
clientset, err := f.KubeClient()
if err != nil {
return nil, err
}
discoveryHelper, err := velerodiscovery.NewHelper(clientset.Discovery(), logger)
if err != nil {
return nil, err
}
action, err := iba.NewServiceAccountAction(
logger,
actionhelpers.NewClusterRoleBindingListerMap(clientset),
discoveryHelper)
if err != nil {
return nil, err
}
return action, nil
}
}

View File

@ -140,6 +140,7 @@ type serverConfig struct {
disableInformerCache bool
scheduleSkipImmediately bool
maintenanceCfg repository.MaintenanceConfig
backukpRepoConfig string
}
func NewCommand(f client.Factory) *cobra.Command {
@ -253,6 +254,8 @@ func NewCommand(f client.Factory) *cobra.Command {
command.Flags().StringVar(&config.maintenanceCfg.CPULimit, "maintenance-job-cpu-limit", config.maintenanceCfg.CPULimit, "CPU limit for maintenance job. Default is no limit.")
command.Flags().StringVar(&config.maintenanceCfg.MemLimit, "maintenance-job-mem-limit", config.maintenanceCfg.MemLimit, "Memory limit for maintenance job. Default is no limit.")
command.Flags().StringVar(&config.backukpRepoConfig, "backup-repository-config", config.backukpRepoConfig, "The name of configMap containing backup repository configurations.")
// maintenance job log setting inherited from velero server
config.maintenanceCfg.FormatFlag = config.formatFlag
config.maintenanceCfg.LogLevelFlag = logLevelFlag
@ -288,8 +291,10 @@ type server struct {
}
func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*server, error) {
if err := uploader.ValidateUploaderType(config.uploaderType); err != nil {
if msg, err := uploader.ValidateUploaderType(config.uploaderType); err != nil {
return nil, err
} else if msg != "" {
logger.Warn(msg)
}
if config.clientQPS < 0.0 {
@ -876,7 +881,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}
if _, ok := enabledRuntimeControllers[controller.BackupRepo]; ok {
if err := controller.NewBackupRepoReconciler(s.namespace, s.logger, s.mgr.GetClient(), s.config.repoMaintenanceFrequency, s.repoManager).SetupWithManager(s.mgr); err != nil {
if err := controller.NewBackupRepoReconciler(s.namespace, s.logger, s.mgr.GetClient(), s.config.repoMaintenanceFrequency, s.config.backukpRepoConfig, s.repoManager).SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", controller.BackupRepo)
}
}
@ -1022,6 +1027,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.metrics,
s.crClient,
multiHookTracker,
s.config.resourceTimeout,
).SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", controller.RestoreFinalizer)
}
@ -1147,9 +1153,15 @@ func markDataUploadsCancel(ctx context.Context, client ctrlclient.Client, backup
du.Status.Phase == velerov2alpha1api.DataUploadPhaseNew ||
du.Status.Phase == "" {
err := controller.UpdateDataUploadWithRetry(ctx, client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, log.WithField("dataupload", du.Name),
func(dataUpload *velerov2alpha1api.DataUpload) {
func(dataUpload *velerov2alpha1api.DataUpload) bool {
if dataUpload.Spec.Cancel {
return false
}
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = fmt.Sprintf("found a dataupload with status %q during the velero server starting, mark it as cancel", du.Status.Phase)
dataUpload.Status.Message = fmt.Sprintf("Dataupload is in status %q during the velero server starting, mark it as cancel", du.Status.Phase)
return true
})
if err != nil {
@ -1182,9 +1194,15 @@ func markDataDownloadsCancel(ctx context.Context, client ctrlclient.Client, rest
dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseNew ||
dd.Status.Phase == "" {
err := controller.UpdateDataDownloadWithRetry(ctx, client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, log.WithField("datadownload", dd.Name),
func(dataDownload *velerov2alpha1api.DataDownload) {
func(dataDownload *velerov2alpha1api.DataDownload) bool {
if dataDownload.Spec.Cancel {
return false
}
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = fmt.Sprintf("found a datadownload with status %q during the velero server starting, mark it as cancel", dd.Status.Phase)
dataDownload.Status.Message = fmt.Sprintf("Datadownload is in status %q during the velero server starting, mark it as cancel", dd.Status.Phase)
return true
})
if err != nil {

View File

@ -203,6 +203,13 @@ func Test_newServer(t *testing.T) {
}, logger)
assert.Error(t, err)
// invalid clientQPS Restic uploader
_, err = newServer(factory, serverConfig{
uploaderType: uploader.ResticType,
clientQPS: -1,
}, logger)
assert.Error(t, err)
// invalid clientBurst
factory.On("SetClientQPS", mock.Anything).Return()
_, err = newServer(factory, serverConfig{

View File

@ -27,6 +27,7 @@ import (
"github.com/vmware-tanzu/velero/internal/volume"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
@ -39,7 +40,15 @@ import (
"github.com/vmware-tanzu/velero/pkg/util/results"
)
func DescribeRestore(ctx context.Context, kbClient kbclient.Client, restore *velerov1api.Restore, podVolumeRestores []velerov1api.PodVolumeRestore, details bool, insecureSkipTLSVerify bool, caCertFile string) string {
func DescribeRestore(
ctx context.Context,
kbClient kbclient.Client,
restore *velerov1api.Restore,
podVolumeRestores []velerov1api.PodVolumeRestore,
details bool,
insecureSkipTLSVerify bool,
caCertFile string,
) string {
return Describe(func(d *Describer) {
d.DescribeMetadata(restore.ObjectMeta)
@ -196,6 +205,11 @@ func DescribeRestore(ctx context.Context, kbClient kbclient.Client, restore *vel
d.Println()
d.Printf("Preserve Service NodePorts:\t%s\n", BoolPointerString(restore.Spec.PreserveNodePorts, "false", "true", "auto"))
if restore.Spec.ResourceModifier != nil {
d.Println()
DescribeResourceModifier(d, restore.Spec.ResourceModifier)
}
describeUploaderConfigForRestore(d, restore.Spec)
d.Println()
@ -472,3 +486,10 @@ func describeRestoreResourceList(ctx context.Context, kbClient kbclient.Client,
d.Printf("\t%s:\n\t\t- %s\n", gvk, strings.Join(resourceList[gvk], "\n\t\t- "))
}
}
// DescribeResourceModifier describes resource policies in human-readable format
func DescribeResourceModifier(d *Describer, resModifier *v1.TypedLocalObjectReference) {
d.Printf("Resource modifier:\n")
d.Printf("\tType:\t%s\n", resModifier.Kind)
d.Printf("\tName:\t%s\n", resModifier.Name)
}

View File

@ -2,15 +2,16 @@ package output
import (
"bytes"
"fmt"
"testing"
"text/tabwriter"
"time"
"github.com/vmware-tanzu/velero/internal/volume"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
"github.com/vmware-tanzu/velero/internal/volume"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/builder"
"github.com/vmware-tanzu/velero/pkg/itemoperation"
@ -389,3 +390,28 @@ CSI Snapshot Restores:
})
}
}
func TestDescribeResourceModifier(t *testing.T) {
d := &Describer{
Prefix: "",
out: &tabwriter.Writer{},
buf: &bytes.Buffer{},
}
d.out.Init(d.buf, 0, 8, 2, ' ', 0)
DescribeResourceModifier(d, &v1.TypedLocalObjectReference{
APIGroup: &v1.SchemeGroupVersion.Group,
Kind: "ConfigMap",
Name: "resourceModifier",
})
d.out.Flush()
expectOutput := `Resource modifier:
Type: ConfigMap
Name: resourceModifier
`
fmt.Println(d.buf.String())
require.Equal(t, expectOutput, d.buf.String())
}

View File

@ -51,6 +51,7 @@ import (
veleroflag "github.com/vmware-tanzu/velero/pkg/cmd/util/flag"
"github.com/vmware-tanzu/velero/pkg/features"
"github.com/vmware-tanzu/velero/pkg/cmd/cli/datamover"
"github.com/vmware-tanzu/velero/pkg/cmd/cli/nodeagent"
)
@ -124,6 +125,7 @@ operations can also be performed as 'velero backup get' and 'velero schedule cre
snapshotlocation.NewCommand(f),
debug.NewCommand(f),
repomantenance.NewCommand(f),
datamover.NewCommand(f),
)
// init and add the klog flags

View File

@ -139,7 +139,7 @@ func TestProcessBackupNonProcessedItems(t *testing.T) {
require.NoError(t, c.kbClient.Create(context.Background(), test.backup))
}
actualResult, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: test.backup.Namespace, Name: test.backup.Name}})
assert.Equal(t, actualResult, ctrl.Result{})
assert.Equal(t, ctrl.Result{}, actualResult)
assert.NoError(t, err)
// Any backup that would actually proceed to validation will cause a segfault because this
@ -229,7 +229,7 @@ func TestProcessBackupValidationFailures(t *testing.T) {
require.NoError(t, c.kbClient.Create(context.Background(), test.backup))
actualResult, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: test.backup.Namespace, Name: test.backup.Name}})
assert.Equal(t, actualResult, ctrl.Result{})
assert.Equal(t, ctrl.Result{}, actualResult)
assert.NoError(t, err)
res := &velerov1api.Backup{}
err = c.kbClient.Get(context.Background(), kbclient.ObjectKey{Namespace: test.backup.Namespace, Name: test.backup.Name}, res)
@ -1377,7 +1377,7 @@ func TestProcessBackupCompletions(t *testing.T) {
}
actualResult, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: test.backup.Namespace, Name: test.backup.Name}})
assert.Equal(t, actualResult, ctrl.Result{})
assert.Equal(t, ctrl.Result{}, actualResult)
assert.NoError(t, err)
// Disable CSI feature to not impact other test cases.

View File

@ -146,10 +146,7 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
// Make sure we have the backup name
if dbr.Spec.BackupName == "" {
_, err := r.patchDeleteBackupRequest(ctx, dbr, func(res *velerov1api.DeleteBackupRequest) {
res.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
res.Status.Errors = []string{"spec.backupName is required"}
})
err := r.patchDeleteBackupRequestWithError(ctx, dbr, errors.New("spec.backupName is required"))
return ctrl.Result{}, err
}
@ -163,10 +160,7 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
// Don't allow deleting an in-progress backup
if r.backupTracker.Contains(dbr.Namespace, dbr.Spec.BackupName) {
_, err := r.patchDeleteBackupRequest(ctx, dbr, func(r *velerov1api.DeleteBackupRequest) {
r.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
r.Status.Errors = []string{"backup is still in progress"}
})
err := r.patchDeleteBackupRequestWithError(ctx, dbr, errors.New("backup is still in progress"))
return ctrl.Result{}, err
}
@ -177,10 +171,7 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
Name: dbr.Spec.BackupName,
}, backup); apierrors.IsNotFound(err) {
// Couldn't find backup - update status to Processed and record the not-found error
_, err = r.patchDeleteBackupRequest(ctx, dbr, func(r *velerov1api.DeleteBackupRequest) {
r.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
r.Status.Errors = []string{"backup not found"}
})
err = r.patchDeleteBackupRequestWithError(ctx, dbr, errors.New("backup not found"))
return ctrl.Result{}, err
} else if err != nil {
return ctrl.Result{}, errors.Wrap(err, "error getting backup")
@ -193,20 +184,14 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
Name: backup.Spec.StorageLocation,
}, location); err != nil {
if apierrors.IsNotFound(err) {
_, err := r.patchDeleteBackupRequest(ctx, dbr, func(r *velerov1api.DeleteBackupRequest) {
r.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
r.Status.Errors = append(r.Status.Errors, fmt.Sprintf("backup storage location %s not found", backup.Spec.StorageLocation))
})
err := r.patchDeleteBackupRequestWithError(ctx, dbr, fmt.Errorf("backup storage location %s not found", backup.Spec.StorageLocation))
return ctrl.Result{}, err
}
return ctrl.Result{}, errors.Wrap(err, "error getting backup storage location")
}
if location.Spec.AccessMode == velerov1api.BackupStorageLocationAccessModeReadOnly {
_, err := r.patchDeleteBackupRequest(ctx, dbr, func(r *velerov1api.DeleteBackupRequest) {
r.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
r.Status.Errors = append(r.Status.Errors, fmt.Sprintf("cannot delete backup because backup storage location %s is currently in read-only mode", location.Name))
})
err := r.patchDeleteBackupRequestWithError(ctx, dbr, fmt.Errorf("cannot delete backup because backup storage location %s is currently in read-only mode", location.Name))
return ctrl.Result{}, err
}
@ -236,8 +221,9 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
b.Status.Phase = velerov1api.BackupPhaseDeleting
})
if err != nil {
log.WithError(errors.WithStack(err)).Error("Error setting backup phase to deleting")
return ctrl.Result{}, err
log.WithError(err).Error("Error setting backup phase to deleting")
err2 := r.patchDeleteBackupRequestWithError(ctx, dbr, errors.Wrap(err, "error setting backup phase to deleting"))
return ctrl.Result{}, err2
}
backupScheduleName := backup.GetLabels()[velerov1api.ScheduleNameLabel]
@ -248,17 +234,17 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
backupStore, err := r.backupStoreGetter.Get(location, pluginManager, log)
if err != nil {
_, patchErr := r.patchDeleteBackupRequest(ctx, dbr, func(r *velerov1api.DeleteBackupRequest) {
r.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
r.Status.Errors = append(r.Status.Errors, fmt.Sprintf("cannot delete backup because backup storage location %s is currently unavailable, error: %s", location.Name, err.Error()))
})
return ctrl.Result{}, patchErr
log.WithError(err).Error("Error getting the backup store")
err2 := r.patchDeleteBackupRequestWithError(ctx, dbr, errors.Wrap(err, "error getting the backup store"))
return ctrl.Result{}, err2
}
actions, err := pluginManager.GetDeleteItemActions()
log.Debugf("%d actions before invoking actions", len(actions))
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "error getting delete item actions")
log.WithError(err).Error("Error getting delete item actions")
err2 := r.patchDeleteBackupRequestWithError(ctx, dbr, errors.New("error getting delete item actions"))
return ctrl.Result{}, err2
}
// don't defer CleanupClients here, since it was already called above.
@ -270,7 +256,7 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
log.WithError(err).Errorf("Unable to download tarball for backup %s, skipping associated DeleteItemAction plugins", backup.Name)
} else {
defer closeAndRemoveFile(backupFile, r.logger)
ctx := &delete.Context{
deleteCtx := &delete.Context{
Backup: backup,
BackupReader: backupFile,
Actions: actions,
@ -281,9 +267,11 @@ func (r *backupDeletionReconciler) Reconcile(ctx context.Context, req ctrl.Reque
// Optimization: wrap in a gofunc? Would be useful for large backups with lots of objects.
// but what do we do with the error returned? We can't just swallow it as that may lead to dangling resources.
err = delete.InvokeDeleteActions(ctx)
err = delete.InvokeDeleteActions(deleteCtx)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "error invoking delete item actions")
log.WithError(err).Error("Error invoking delete item actions")
err2 := r.patchDeleteBackupRequestWithError(ctx, dbr, errors.New("error invoking delete item actions"))
return ctrl.Result{}, err2
}
}
}
@ -593,6 +581,14 @@ func (r *backupDeletionReconciler) patchDeleteBackupRequest(ctx context.Context,
return req, nil
}
func (r *backupDeletionReconciler) patchDeleteBackupRequestWithError(ctx context.Context, req *velerov1api.DeleteBackupRequest, err error) error {
_, err = r.patchDeleteBackupRequest(ctx, req, func(r *velerov1api.DeleteBackupRequest) {
r.Status.Phase = velerov1api.DeleteBackupRequestPhaseProcessed
r.Status.Errors = []string{err.Error()}
})
return err
}
func (r *backupDeletionReconciler) patchBackup(ctx context.Context, backup *velerov1api.Backup, mutate func(*velerov1api.Backup)) (*velerov1api.Backup, error) {
//TODO: The patchHelper can't be used here because the `backup/xxx/status` does not exist, until the backup resource is refactored

View File

@ -122,16 +122,16 @@ func TestBackupDeletionControllerReconcile(t *testing.T) {
},
},
}
td := setupBackupDeletionControllerTest(t, defaultTestDbr(), location, backup)
dbr := defaultTestDbr()
td := setupBackupDeletionControllerTest(t, dbr, location, backup)
td.controller.backupStoreGetter = &fakeErrorBackupStoreGetter{}
_, err := td.controller.Reconcile(ctx, td.req)
require.NoError(t, err)
res := &velerov1api.DeleteBackupRequest{}
err = td.fakeClient.Get(ctx, td.req.NamespacedName, res)
require.NoError(t, err)
td.fakeClient.Get(ctx, td.req.NamespacedName, res)
assert.Equal(t, "Processed", string(res.Status.Phase))
assert.Len(t, res.Status.Errors, 1)
assert.True(t, strings.HasPrefix(res.Status.Errors[0], fmt.Sprintf("cannot delete backup because backup storage location %s is currently unavailable", location.Name)))
assert.True(t, strings.HasPrefix(res.Status.Errors[0], "error getting the backup store"))
})
t.Run("missing spec.backupName", func(t *testing.T) {

View File

@ -19,6 +19,8 @@ package controller
import (
"bytes"
"context"
"encoding/json"
"fmt"
"reflect"
"time"
@ -38,6 +40,8 @@ import (
"github.com/vmware-tanzu/velero/pkg/repository"
repoconfig "github.com/vmware-tanzu/velero/pkg/repository/config"
"github.com/vmware-tanzu/velero/pkg/util/kube"
corev1api "k8s.io/api/core/v1"
)
const (
@ -51,17 +55,19 @@ type BackupRepoReconciler struct {
logger logrus.FieldLogger
clock clocks.WithTickerAndDelayedExecution
maintenanceFrequency time.Duration
backukpRepoConfig string
repositoryManager repository.Manager
}
func NewBackupRepoReconciler(namespace string, logger logrus.FieldLogger, client client.Client,
maintenanceFrequency time.Duration, repositoryManager repository.Manager) *BackupRepoReconciler {
maintenanceFrequency time.Duration, backukpRepoConfig string, repositoryManager repository.Manager) *BackupRepoReconciler {
c := &BackupRepoReconciler{
client,
namespace,
logger,
clocks.RealClock{},
maintenanceFrequency,
backukpRepoConfig,
repositoryManager,
}
@ -223,7 +229,7 @@ func (r *BackupRepoReconciler) getIdentiferByBSL(ctx context.Context, req *veler
}
func (r *BackupRepoReconciler) initializeRepo(ctx context.Context, req *velerov1api.BackupRepository, log logrus.FieldLogger) error {
log.Info("Initializing backup repository")
log.WithField("repoConfig", r.backukpRepoConfig).Info("Initializing backup repository")
// confirm the repo's BackupStorageLocation is valid
repoIdentifier, err := r.getIdentiferByBSL(ctx, req)
@ -238,6 +244,13 @@ func (r *BackupRepoReconciler) initializeRepo(ctx context.Context, req *velerov1
})
}
config, err := getBackupRepositoryConfig(ctx, r, r.backukpRepoConfig, r.namespace, req.Name, req.Spec.RepositoryType, log)
if err != nil {
log.WithError(err).Warn("Failed to get repo config, repo config is ignored")
} else if config != nil {
log.Infof("Init repo with config %v", config)
}
// defaulting - if the patch fails, return an error so the item is returned to the queue
if err := r.patchBackupRepository(ctx, req, func(rr *velerov1api.BackupRepository) {
rr.Spec.ResticIdentifier = repoIdentifier
@ -245,6 +258,8 @@ func (r *BackupRepoReconciler) initializeRepo(ctx context.Context, req *velerov1
if rr.Spec.MaintenanceFrequency.Duration <= 0 {
rr.Spec.MaintenanceFrequency = metav1.Duration{Duration: r.getRepositoryMaintenanceFrequency(req)}
}
rr.Spec.RepositoryConfig = config
}); err != nil {
return err
}
@ -366,3 +381,35 @@ func (r *BackupRepoReconciler) patchBackupRepository(ctx context.Context, req *v
}
return nil
}
func getBackupRepositoryConfig(ctx context.Context, ctrlClient client.Client, configName, namespace, repoName, repoType string, log logrus.FieldLogger) (map[string]string, error) {
if configName == "" {
return nil, nil
}
loc := &corev1api.ConfigMap{}
if err := ctrlClient.Get(ctx, client.ObjectKey{
Namespace: namespace,
Name: configName,
}, loc); err != nil {
return nil, errors.Wrapf(err, "error getting configMap %s", configName)
}
jsonData, found := loc.Data[repoType]
if !found {
log.Info("No data for repo type %s in config map %s", repoType, configName)
return nil, nil
}
var unmarshalled map[string]interface{}
if err := json.Unmarshal([]byte(jsonData), &unmarshalled); err != nil {
return nil, errors.Wrapf(err, "error unmarshalling config data from %s for repo %s, repo type %s", configName, repoName, repoType)
}
result := map[string]string{}
for k, v := range unmarshalled {
result[k] = fmt.Sprintf("%v", v)
}
return result, nil
}

View File

@ -21,7 +21,9 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
@ -29,11 +31,13 @@ import (
"github.com/vmware-tanzu/velero/pkg/repository"
repomokes "github.com/vmware-tanzu/velero/pkg/repository/mocks"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake"
)
const testMaintenanceFrequency = 10 * time.Minute
func mockBackupRepoReconciler(t *testing.T, rr *velerov1api.BackupRepository, mockOn string, arg interface{}, ret interface{}) *BackupRepoReconciler {
func mockBackupRepoReconciler(t *testing.T, mockOn string, arg interface{}, ret interface{}) *BackupRepoReconciler {
mgr := &repomokes.Manager{}
if mockOn != "" {
mgr.On(mockOn, arg).Return(ret)
@ -43,6 +47,7 @@ func mockBackupRepoReconciler(t *testing.T, rr *velerov1api.BackupRepository, mo
velerotest.NewLogger(),
velerotest.NewFakeControllerRuntimeClient(t),
testMaintenanceFrequency,
"fake-repo-config",
mgr,
)
}
@ -61,15 +66,15 @@ func mockBackupRepositoryCR() *velerov1api.BackupRepository {
func TestPatchBackupRepository(t *testing.T) {
rr := mockBackupRepositoryCR()
reconciler := mockBackupRepoReconciler(t, rr, "", nil, nil)
reconciler := mockBackupRepoReconciler(t, "", nil, nil)
err := reconciler.Client.Create(context.TODO(), rr)
assert.NoError(t, err)
err = reconciler.patchBackupRepository(context.Background(), rr, repoReady())
assert.NoError(t, err)
assert.Equal(t, rr.Status.Phase, velerov1api.BackupRepositoryPhaseReady)
assert.Equal(t, velerov1api.BackupRepositoryPhaseReady, rr.Status.Phase)
err = reconciler.patchBackupRepository(context.Background(), rr, repoNotReady("not ready"))
assert.NoError(t, err)
assert.NotEqual(t, rr.Status.Phase, velerov1api.BackupRepositoryPhaseReady)
assert.NotEqual(t, velerov1api.BackupRepositoryPhaseReady, rr.Status.Phase)
}
func TestCheckNotReadyRepo(t *testing.T) {
@ -77,7 +82,7 @@ func TestCheckNotReadyRepo(t *testing.T) {
rr.Spec.BackupStorageLocation = "default"
rr.Spec.ResticIdentifier = "fake-identifier"
rr.Spec.VolumeNamespace = "volume-ns-1"
reconciler := mockBackupRepoReconciler(t, rr, "PrepareRepo", rr, nil)
reconciler := mockBackupRepoReconciler(t, "PrepareRepo", rr, nil)
err := reconciler.Client.Create(context.TODO(), rr)
assert.NoError(t, err)
locations := &velerov1api.BackupStorageLocation{
@ -94,13 +99,13 @@ func TestCheckNotReadyRepo(t *testing.T) {
assert.NoError(t, err)
_, err = reconciler.checkNotReadyRepo(context.TODO(), rr, reconciler.logger)
assert.NoError(t, err)
assert.Equal(t, rr.Status.Phase, velerov1api.BackupRepositoryPhaseReady)
assert.Equal(t, velerov1api.BackupRepositoryPhaseReady, rr.Status.Phase)
assert.Equal(t, "s3:test.amazonaws.com/bucket/restic/volume-ns-1", rr.Spec.ResticIdentifier)
}
func TestRunMaintenanceIfDue(t *testing.T) {
rr := mockBackupRepositoryCR()
reconciler := mockBackupRepoReconciler(t, rr, "PruneRepo", rr, nil)
reconciler := mockBackupRepoReconciler(t, "PruneRepo", rr, nil)
err := reconciler.Client.Create(context.TODO(), rr)
assert.NoError(t, err)
lastTm := rr.Status.LastMaintenanceTime
@ -118,7 +123,7 @@ func TestRunMaintenanceIfDue(t *testing.T) {
func TestInitializeRepo(t *testing.T) {
rr := mockBackupRepositoryCR()
rr.Spec.BackupStorageLocation = "default"
reconciler := mockBackupRepoReconciler(t, rr, "PrepareRepo", rr, nil)
reconciler := mockBackupRepoReconciler(t, "PrepareRepo", rr, nil)
err := reconciler.Client.Create(context.TODO(), rr)
assert.NoError(t, err)
locations := &velerov1api.BackupStorageLocation{
@ -135,7 +140,7 @@ func TestInitializeRepo(t *testing.T) {
assert.NoError(t, err)
err = reconciler.initializeRepo(context.TODO(), rr, reconciler.logger)
assert.NoError(t, err)
assert.Equal(t, rr.Status.Phase, velerov1api.BackupRepositoryPhaseReady)
assert.Equal(t, velerov1api.BackupRepositoryPhaseReady, rr.Status.Phase)
}
func TestBackupRepoReconcile(t *testing.T) {
@ -189,7 +194,7 @@ func TestBackupRepoReconcile(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
reconciler := mockBackupRepoReconciler(t, test.repo, "", test.repo, nil)
reconciler := mockBackupRepoReconciler(t, "", test.repo, nil)
err := reconciler.Client.Create(context.TODO(), test.repo)
assert.NoError(t, err)
_, err = reconciler.Reconcile(context.TODO(), ctrl.Request{NamespacedName: types.NamespacedName{Namespace: test.repo.Namespace, Name: test.repo.Name}})
@ -243,6 +248,7 @@ func TestGetRepositoryMaintenanceFrequency(t *testing.T) {
velerotest.NewLogger(),
velerotest.NewFakeControllerRuntimeClient(t),
test.userDefinedFreq,
"",
&mgr,
)
@ -370,10 +376,112 @@ func TestNeedInvalidBackupRepo(t *testing.T) {
velerov1api.DefaultNamespace,
velerotest.NewLogger(),
velerotest.NewFakeControllerRuntimeClient(t),
time.Duration(0), nil)
time.Duration(0), "", nil)
need := reconciler.needInvalidBackupRepo(test.oldBSL, test.newBSL)
assert.Equal(t, test.expect, need)
})
}
}
func TestGetBackupRepositoryConfig(t *testing.T) {
configWithNoData := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "config-1",
Namespace: velerov1api.DefaultNamespace,
},
}
configWithWrongData := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "config-1",
Namespace: velerov1api.DefaultNamespace,
},
Data: map[string]string{
"fake-repo-type": "",
},
}
configWithData := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "config-1",
Namespace: velerov1api.DefaultNamespace,
},
Data: map[string]string{
"fake-repo-type": "{\"cacheLimitMB\": 1000, \"enableCompression\": true}",
"fake-repo-type-1": "{\"cacheLimitMB\": 1, \"enableCompression\": false}",
},
}
tests := []struct {
name string
congiName string
repoName string
repoType string
kubeClientObj []runtime.Object
expectedErr string
expectedResult map[string]string
}{
{
name: "empty configName",
},
{
name: "get error",
congiName: "config-1",
expectedErr: "error getting configMap config-1: configmaps \"config-1\" not found",
},
{
name: "no config for repo",
congiName: "config-1",
repoName: "fake-repo",
repoType: "fake-repo-type",
kubeClientObj: []runtime.Object{
configWithNoData,
},
},
{
name: "unmarshall error",
congiName: "config-1",
repoName: "fake-repo",
repoType: "fake-repo-type",
kubeClientObj: []runtime.Object{
configWithWrongData,
},
expectedErr: "error unmarshalling config data from config-1 for repo fake-repo, repo type fake-repo-type: unexpected end of JSON input",
},
{
name: "succeed",
congiName: "config-1",
repoName: "fake-repo",
repoType: "fake-repo-type",
kubeClientObj: []runtime.Object{
configWithData,
},
expectedResult: map[string]string{
"cacheLimitMB": "1000",
"enableCompression": "true",
},
},
}
scheme := runtime.NewScheme()
corev1.AddToScheme(scheme)
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeClientBuilder := clientFake.NewClientBuilder()
fakeClientBuilder = fakeClientBuilder.WithScheme(scheme)
fakeClient := fakeClientBuilder.WithRuntimeObjects(test.kubeClientObj...).Build()
result, err := getBackupRepositoryConfig(context.Background(), fakeClient, test.congiName, velerov1api.DefaultNamespace, test.repoName, test.repoType, velerotest.NewLogger())
if test.expectedErr != "" {
assert.EqualError(t, err, test.expectedErr)
} else {
assert.NoError(t, err)
assert.Equal(t, test.expectedResult, result)
}
})
}
}

View File

@ -144,7 +144,7 @@ func defaultLocationWithLongerLocationName(namespace string) *velerov1api.Backup
}
}
func numBackups(c ctrlClient.WithWatch, ns string) (int, error) {
func numBackups(c ctrlClient.WithWatch) (int, error) {
var existingK8SBackups velerov1api.BackupList
err := c.List(context.TODO(), &existingK8SBackups, &ctrlClient.ListOptions{})
if err != nil {
@ -692,7 +692,7 @@ var _ = Describe("Backup Sync Reconciler", func() {
}
r.deleteOrphanedBackups(ctx, bslName, test.cloudBackups, velerotest.NewLogger())
numBackups, err := numBackups(client, r.namespace)
numBackups, err := numBackups(client)
Expect(err).ShouldNot(HaveOccurred())
fmt.Println("")

View File

@ -35,10 +35,10 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"github.com/vmware-tanzu/velero/internal/credentials"
"github.com/vmware-tanzu/velero/pkg/apis/velero/shared"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
@ -46,43 +46,39 @@ import (
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/exposer"
"github.com/vmware-tanzu/velero/pkg/metrics"
repository "github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
// DataDownloadReconciler reconciles a DataDownload object
type DataDownloadReconciler struct {
client client.Client
kubeClient kubernetes.Interface
logger logrus.FieldLogger
credentialGetter *credentials.CredentialGetter
fileSystem filesystem.Interface
Clock clock.WithTickerAndDelayedExecution
restoreExposer exposer.GenericRestoreExposer
nodeName string
repositoryEnsurer *repository.Ensurer
dataPathMgr *datapath.Manager
preparingTimeout time.Duration
metrics *metrics.ServerMetrics
client client.Client
kubeClient kubernetes.Interface
mgr manager.Manager
logger logrus.FieldLogger
Clock clock.WithTickerAndDelayedExecution
restoreExposer exposer.GenericRestoreExposer
nodeName string
dataPathMgr *datapath.Manager
podResources v1.ResourceRequirements
preparingTimeout time.Duration
metrics *metrics.ServerMetrics
}
func NewDataDownloadReconciler(client client.Client, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager,
repoEnsurer *repository.Ensurer, credentialGetter *credentials.CredentialGetter, nodeName string, preparingTimeout time.Duration, logger logrus.FieldLogger, metrics *metrics.ServerMetrics) *DataDownloadReconciler {
func NewDataDownloadReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager,
podResources v1.ResourceRequirements, nodeName string, preparingTimeout time.Duration, logger logrus.FieldLogger, metrics *metrics.ServerMetrics) *DataDownloadReconciler {
return &DataDownloadReconciler{
client: client,
kubeClient: kubeClient,
logger: logger.WithField("controller", "DataDownload"),
credentialGetter: credentialGetter,
fileSystem: filesystem.NewFileSystem(),
Clock: &clock.RealClock{},
nodeName: nodeName,
repositoryEnsurer: repoEnsurer,
restoreExposer: exposer.NewGenericRestoreExposer(kubeClient, logger),
dataPathMgr: dataPathMgr,
preparingTimeout: preparingTimeout,
metrics: metrics,
client: client,
kubeClient: kubeClient,
mgr: mgr,
logger: logger.WithField("controller", "DataDownload"),
Clock: &clock.RealClock{},
nodeName: nodeName,
restoreExposer: exposer.NewGenericRestoreExposer(kubeClient, logger),
dataPathMgr: dataPathMgr,
podResources: podResources,
preparingTimeout: preparingTimeout,
metrics: metrics,
}
}
@ -137,9 +133,17 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else if controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
if err := UpdateDataDownloadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataDownload *velerov2alpha1api.DataDownload) {
log.Warnf("Cancel dd under phase %s because it is being deleted", dd.Status.Phase)
if err := UpdateDataDownloadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataDownload *velerov2alpha1api.DataDownload) bool {
if dataDownload.Spec.Cancel {
return false
}
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = fmt.Sprintf("found a datadownload %s/%s is being deleted, mark it as cancel", dd.Namespace, dd.Name)
dataDownload.Status.Message = "Cancel datadownload because it is being deleted"
return true
}); err != nil {
log.Errorf("failed to set cancel flag with error %s for %s/%s", err.Error(), dd.Namespace, dd.Name)
return ctrl.Result{}, err
@ -177,7 +181,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
// Expose() will trigger to create one pod whose volume is restored by a given volume snapshot,
// but the pod maybe is not in the same node of the current controller, so we need to return it here.
// And then only the controller who is in the same node could do the rest work.
err = r.restoreExposer.Expose(ctx, getDataDownloadOwnerObject(dd), dd.Spec.TargetVolume.PVC, dd.Spec.TargetVolume.Namespace, hostingPodLabels, dd.Spec.OperationTimeout.Duration)
err = r.restoreExposer.Expose(ctx, getDataDownloadOwnerObject(dd), dd.Spec.TargetVolume.PVC, dd.Spec.TargetVolume.Namespace, hostingPodLabels, r.podResources, dd.Spec.OperationTimeout.Duration)
if err != nil {
if err := r.client.Get(ctx, req.NamespacedName, dd); err != nil {
if !apierrors.IsNotFound(err) {
@ -214,9 +218,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseAccepted {
if dd.Spec.Cancel {
log.Debugf("Data download is been canceled %s in Phase %s", dd.GetName(), dd.Status.Phase)
r.TryCancelDataDownload(ctx, dd, "")
r.tryCancelAcceptedDataDownload(ctx, dd, "")
} else if peekErr := r.restoreExposer.PeekExposed(ctx, getDataDownloadOwnerObject(dd)); peekErr != nil {
r.TryCancelDataDownload(ctx, dd, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
r.tryCancelAcceptedDataDownload(ctx, dd, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", dd.Namespace, dd.Name, peekErr))
log.Errorf("Cancel dd %s/%s because of expose error %s", dd.Namespace, dd.Name, peekErr)
} else if dd.Status.StartTimestamp != nil {
if time.Since(dd.Status.StartTimestamp.Time) >= r.preparingTimeout {
@ -234,9 +238,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, nil
}
fsRestore := r.dataPathMgr.GetAsyncBR(dd.Name)
asyncBR := r.dataPathMgr.GetAsyncBR(dd.Name)
if fsRestore != nil {
if asyncBR != nil {
log.Info("Cancellable data path is already started")
return ctrl.Result{}, nil
}
@ -259,7 +263,8 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
OnProgress: r.OnDataDownloadProgress,
}
fsRestore, err = r.dataPathMgr.CreateFileSystemBR(dd.Name, dataUploadDownloadRequestor, ctx, r.client, dd.Namespace, callbacks, log)
asyncBR, err = r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, r.kubeClient, r.mgr, datapath.TaskTypeRestore,
dd.Name, dd.Namespace, result.ByPod.HostingPod.Name, result.ByPod.HostingContainer, dd.Name, callbacks, false, log)
if err != nil {
if err == datapath.ConcurrentLimitExceed {
log.Info("Data path instance is concurrent limited requeue later")
@ -268,29 +273,41 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return r.errorOut(ctx, dd, err, "error to create data path", log)
}
}
if err := r.initCancelableDataPath(ctx, asyncBR, result, log); err != nil {
log.WithError(err).Errorf("Failed to init cancelable data path for %s", dd.Name)
r.closeDataPath(ctx, dd.Name)
return r.errorOut(ctx, dd, err, "error initializing data path", log)
}
// Update status to InProgress
original := dd.DeepCopy()
dd.Status.Phase = velerov2alpha1api.DataDownloadPhaseInProgress
dd.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
if err := r.client.Patch(ctx, dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("Unable to update status to in progress")
return ctrl.Result{}, err
log.WithError(err).Warnf("Failed to update datadownload %s to InProgress, will close data path and retry", dd.Name)
r.closeDataPath(ctx, dd.Name)
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
}
log.Info("Data download is marked as in progress")
reconcileResult, err := r.runCancelableDataPath(ctx, fsRestore, dd, result, log)
if err != nil {
log.Errorf("Failed to run cancelable data path for %s with err %v", dd.Name, err)
if err := r.startCancelableDataPath(asyncBR, dd, result, log); err != nil {
log.WithError(err).Errorf("Failed to start cancelable data path for %s", dd.Name)
r.closeDataPath(ctx, dd.Name)
return r.errorOut(ctx, dd, err, "error starting data path", log)
}
return reconcileResult, err
return ctrl.Result{}, nil
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
log.Info("Data download is in progress")
if dd.Spec.Cancel {
log.Info("Data download is being canceled")
fsRestore := r.dataPathMgr.GetAsyncBR(dd.Name)
if fsRestore == nil {
asyncBR := r.dataPathMgr.GetAsyncBR(dd.Name)
if asyncBR == nil {
if r.nodeName == dd.Status.Node {
r.OnDataDownloadCancelled(ctx, dd.GetNamespace(), dd.GetName())
} else {
@ -306,7 +323,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
log.WithError(err).Error("error updating data download status")
return ctrl.Result{}, err
}
fsRestore.Cancel()
asyncBR.Cancel()
return ctrl.Result{}, nil
}
@ -327,38 +344,33 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
}
}
func (r *DataDownloadReconciler) runCancelableDataPath(ctx context.Context, fsRestore datapath.AsyncBR, dd *velerov2alpha1api.DataDownload, res *exposer.ExposeResult, log logrus.FieldLogger) (reconcile.Result, error) {
path, err := exposer.GetPodVolumeHostPath(ctx, res.ByPod.HostingPod, res.ByPod.VolumeName, r.client, r.fileSystem, log)
if err != nil {
return r.errorOut(ctx, dd, err, "error exposing host path for pod volume", log)
func (r *DataDownloadReconciler) initCancelableDataPath(ctx context.Context, asyncBR datapath.AsyncBR, res *exposer.ExposeResult, log logrus.FieldLogger) error {
log.Info("Init cancelable dataDownload")
if err := asyncBR.Init(ctx, nil); err != nil {
return errors.Wrap(err, "error initializing asyncBR")
}
log.WithField("path", path.ByPath).Debug("Found host path")
log.Infof("async restore init for pod %s, volume %s", res.ByPod.HostingPod.Name, res.ByPod.VolumeName)
if err := fsRestore.Init(ctx, &datapath.FSBRInitParam{
BSLName: dd.Spec.BackupStorageLocation,
SourceNamespace: dd.Spec.SourceNamespace,
UploaderType: datamover.GetUploaderType(dd.Spec.DataMover),
RepositoryType: velerov1api.BackupRepositoryTypeKopia,
RepoIdentifier: "",
RepositoryEnsurer: r.repositoryEnsurer,
CredentialGetter: r.credentialGetter,
}); err != nil {
return r.errorOut(ctx, dd, err, "error to initialize data path", log)
return nil
}
func (r *DataDownloadReconciler) startCancelableDataPath(asyncBR datapath.AsyncBR, dd *velerov2alpha1api.DataDownload, res *exposer.ExposeResult, log logrus.FieldLogger) error {
log.Info("Start cancelable dataDownload")
if err := asyncBR.StartRestore(dd.Spec.SnapshotID, datapath.AccessPoint{
ByPath: res.ByPod.VolumeName,
}, dd.Spec.DataMoverConfig); err != nil {
return errors.Wrapf(err, "error starting async restore for pod %s, volume %s", res.ByPod.HostingPod.Name, res.ByPod.VolumeName)
}
log.WithField("path", path.ByPath).Info("fs init")
if err := fsRestore.StartRestore(dd.Spec.SnapshotID, path, dd.Spec.DataMoverConfig); err != nil {
return r.errorOut(ctx, dd, err, fmt.Sprintf("error starting data path %s restore", path.ByPath), log)
}
log.WithField("path", path.ByPath).Info("Async fs restore data path started")
return ctrl.Result{}, nil
log.Infof("Async restore started for pod %s, volume %s", res.ByPod.HostingPod.Name, res.ByPod.VolumeName)
return nil
}
func (r *DataDownloadReconciler) OnDataDownloadCompleted(ctx context.Context, namespace string, ddName string, result datapath.Result) {
defer r.closeDataPath(ctx, ddName)
defer r.dataPathMgr.RemoveAsyncBR(ddName)
log := r.logger.WithField("datadownload", ddName)
log.Info("Async fs restore data path completed")
@ -391,7 +403,7 @@ func (r *DataDownloadReconciler) OnDataDownloadCompleted(ctx context.Context, na
}
func (r *DataDownloadReconciler) OnDataDownloadFailed(ctx context.Context, namespace string, ddName string, err error) {
defer r.closeDataPath(ctx, ddName)
defer r.dataPathMgr.RemoveAsyncBR(ddName)
log := r.logger.WithField("datadownload", ddName)
@ -401,14 +413,12 @@ func (r *DataDownloadReconciler) OnDataDownloadFailed(ctx context.Context, names
if getErr := r.client.Get(ctx, types.NamespacedName{Name: ddName, Namespace: namespace}, &dd); getErr != nil {
log.WithError(getErr).Warn("Failed to get data download on failure")
} else {
if _, errOut := r.errorOut(ctx, &dd, err, "data path restore failed", log); err != nil {
log.WithError(err).Warnf("Failed to patch data download with err %v", errOut)
}
_, _ = r.errorOut(ctx, &dd, err, "data path restore failed", log)
}
}
func (r *DataDownloadReconciler) OnDataDownloadCancelled(ctx context.Context, namespace string, ddName string) {
defer r.closeDataPath(ctx, ddName)
defer r.dataPathMgr.RemoveAsyncBR(ddName)
log := r.logger.WithField("datadownload", ddName)
@ -435,9 +445,9 @@ func (r *DataDownloadReconciler) OnDataDownloadCancelled(ctx context.Context, na
}
}
func (r *DataDownloadReconciler) TryCancelDataDownload(ctx context.Context, dd *velerov2alpha1api.DataDownload, message string) {
func (r *DataDownloadReconciler) tryCancelAcceptedDataDownload(ctx context.Context, dd *velerov2alpha1api.DataDownload, message string) {
log := r.logger.WithField("datadownload", dd.Name)
log.Warn("Async fs backup data path canceled")
log.Warn("Accepted data download is canceled")
succeeded, err := r.exclusiveUpdateDataDownload(ctx, dd, func(dataDownload *velerov2alpha1api.DataDownload) {
dataDownload.Status.Phase = velerov2alpha1api.DataDownloadPhaseCanceled
@ -445,7 +455,10 @@ func (r *DataDownloadReconciler) TryCancelDataDownload(ctx context.Context, dd *
dataDownload.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
dataDownload.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
dataDownload.Status.Message = message
if message != "" {
dataDownload.Status.Message = message
}
})
if err != nil {
@ -459,7 +472,6 @@ func (r *DataDownloadReconciler) TryCancelDataDownload(ctx context.Context, dd *
// success update
r.metrics.RegisterDataDownloadCancel(r.nodeName)
r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd))
r.closeDataPath(ctx, dd.Name)
}
func (r *DataDownloadReconciler) OnDataDownloadProgress(ctx context.Context, namespace string, ddName string, progress *uploader.Progress) {
@ -552,16 +564,22 @@ func (r *DataDownloadReconciler) findSnapshotRestoreForPod(ctx context.Context,
}
} else if unrecoverable, reason := kube.IsPodUnrecoverable(pod, log); unrecoverable {
err := UpdateDataDownloadWithRetry(context.Background(), r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, r.logger.WithField("datadownlad", dd.Name),
func(dataDownload *velerov2alpha1api.DataDownload) {
func(dataDownload *velerov2alpha1api.DataDownload) bool {
if dataDownload.Spec.Cancel {
return false
}
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = fmt.Sprintf("datadownload mark as cancel to failed early for exposing pod %s/%s is in abnormal status for %s", pod.Namespace, pod.Name, reason)
dataDownload.Status.Message = fmt.Sprintf("Cancel datadownload because the exposing pod %s/%s is in abnormal status for reason %s", pod.Namespace, pod.Name, reason)
return true
})
if err != nil {
log.WithError(err).Warn("failed to cancel datadownload, and it will wait for prepare timeout")
return []reconcile.Request{}
}
log.Info("Exposed pod is in abnormal status, and datadownload is marked as cancel")
log.Infof("Exposed pod is in abnormal status(reason %s) and datadownload is marked as cancel", reason)
} else {
return []reconcile.Request{}
}
@ -575,75 +593,6 @@ func (r *DataDownloadReconciler) findSnapshotRestoreForPod(ctx context.Context,
return []reconcile.Request{request}
}
func (r *DataDownloadReconciler) FindDataDownloads(ctx context.Context, cli client.Client, ns string) ([]*velerov2alpha1api.DataDownload, error) {
pods := &v1.PodList{}
var dataDownloads []*velerov2alpha1api.DataDownload
if err := cli.List(ctx, pods, &client.ListOptions{Namespace: ns}); err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to list pods on current node")
return nil, errors.Wrapf(err, "failed to list pods on current node")
}
for _, pod := range pods.Items {
if pod.Spec.NodeName != r.nodeName {
r.logger.Debugf("Pod %s related data download will not handled by %s nodes", pod.GetName(), r.nodeName)
continue
}
dd, err := findDataDownloadByPod(cli, pod)
if err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to get dataDownload by pod")
continue
} else if dd != nil {
dataDownloads = append(dataDownloads, dd)
}
}
return dataDownloads, nil
}
func (r *DataDownloadReconciler) findAcceptDataDownloadsByNodeLabel(ctx context.Context, cli client.Client, ns string) ([]velerov2alpha1api.DataDownload, error) {
dataDownloads := &velerov2alpha1api.DataDownloadList{}
if err := cli.List(ctx, dataDownloads, &client.ListOptions{Namespace: ns}); err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to list datauploads")
return nil, errors.Wrapf(err, "failed to list datauploads")
}
var result []velerov2alpha1api.DataDownload
for _, dd := range dataDownloads.Items {
if dd.Status.Phase != velerov2alpha1api.DataDownloadPhaseAccepted {
continue
}
if dd.Labels[acceptNodeLabelKey] == r.nodeName {
result = append(result, dd)
}
}
return result, nil
}
// CancelAcceptedDataDownload will cancel the accepted data download
func (r *DataDownloadReconciler) CancelAcceptedDataDownload(ctx context.Context, cli client.Client, ns string) {
r.logger.Infof("Canceling accepted data for node %s", r.nodeName)
dataDownloads, err := r.findAcceptDataDownloadsByNodeLabel(ctx, cli, ns)
if err != nil {
r.logger.WithError(err).Error("failed to find data downloads")
return
}
for _, dd := range dataDownloads {
if dd.Spec.Cancel {
continue
}
err = UpdateDataDownloadWithRetry(ctx, cli, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name},
r.logger.WithField("dataupload", dd.Name), func(dataDownload *velerov2alpha1api.DataDownload) {
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = fmt.Sprintf("found a datadownload with status %q during the node-agent starting, mark it as cancel", dd.Status.Phase)
})
r.logger.Warn(dd.Status.Message)
if err != nil {
r.logger.WithError(err).Errorf("failed to set cancel flag with error %s", err.Error())
}
}
}
func (r *DataDownloadReconciler) prepareDataDownload(ssb *velerov2alpha1api.DataDownload) {
ssb.Status.Phase = velerov2alpha1api.DataDownloadPhasePrepared
ssb.Status.Node = r.nodeName
@ -754,9 +703,9 @@ func (r *DataDownloadReconciler) getTargetPVC(ctx context.Context, dd *velerov2a
}
func (r *DataDownloadReconciler) closeDataPath(ctx context.Context, ddName string) {
fsBackup := r.dataPathMgr.GetAsyncBR(ddName)
if fsBackup != nil {
fsBackup.Close(ctx)
asyncBR := r.dataPathMgr.GetAsyncBR(ddName)
if asyncBR != nil {
asyncBR.Close(ctx)
}
r.dataPathMgr.RemoveAsyncBR(ddName)
@ -795,56 +744,140 @@ func isDataDownloadInFinalState(dd *velerov2alpha1api.DataDownload) bool {
dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseCompleted
}
func UpdateDataDownloadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log *logrus.Entry, updateFunc func(dataDownload *velerov2alpha1api.DataDownload)) error {
return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (done bool, err error) {
func UpdateDataDownloadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log *logrus.Entry, updateFunc func(*velerov2alpha1api.DataDownload) bool) error {
return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) {
dd := &velerov2alpha1api.DataDownload{}
if err := client.Get(ctx, namespacedName, dd); err != nil {
return false, errors.Wrap(err, "getting DataDownload")
}
updateFunc(dd)
updateErr := client.Update(ctx, dd)
if updateErr != nil {
if apierrors.IsConflict(updateErr) {
log.Warnf("failed to update datadownload for %s/%s and will retry it", dd.Namespace, dd.Name)
return false, nil
if updateFunc(dd) {
err := client.Update(ctx, dd)
if err != nil {
if apierrors.IsConflict(err) {
log.Warnf("failed to update datadownload for %s/%s and will retry it", dd.Namespace, dd.Name)
return false, nil
} else {
return false, errors.Wrapf(err, "error updating datadownload %s/%s", dd.Namespace, dd.Name)
}
}
log.Errorf("failed to update datadownload with error %s for %s/%s", updateErr.Error(), dd.Namespace, dd.Name)
return false, err
}
return true, nil
})
}
func (r *DataDownloadReconciler) AttemptDataDownloadResume(ctx context.Context, cli client.Client, logger *logrus.Entry, ns string) error {
if dataDownloads, err := r.FindDataDownloads(ctx, cli, ns); err != nil {
return errors.Wrapf(err, "failed to find data downloads")
} else {
for i := range dataDownloads {
dd := dataDownloads[i]
if dd.Status.Phase == velerov2alpha1api.DataDownloadPhasePrepared {
// keep doing nothing let controller re-download the data
// the Prepared CR could be still handled by datadownload controller after node-agent restart
logger.WithField("datadownload", dd.GetName()).Debug("find a datadownload with status prepared")
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
err = UpdateDataDownloadWithRetry(ctx, cli, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, logger.WithField("datadownload", dd.Name),
func(dataDownload *velerov2alpha1api.DataDownload) {
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = fmt.Sprintf("found a datadownload with status %q during the node-agent starting, mark it as cancel", dd.Status.Phase)
})
var funcResumeCancellableDataRestore = (*DataDownloadReconciler).resumeCancellableDataPath
if err != nil {
logger.WithError(errors.WithStack(err)).Errorf("failed to mark datadownload %q into canceled", dd.GetName())
continue
}
logger.WithField("datadownload", dd.GetName()).Debug("mark datadownload into canceled")
func (r *DataDownloadReconciler) AttemptDataDownloadResume(ctx context.Context, logger *logrus.Entry, ns string) error {
dataDownloads := &velerov2alpha1api.DataDownloadList{}
if err := r.client.List(ctx, dataDownloads, &client.ListOptions{Namespace: ns}); err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to list datadownloads")
return errors.Wrapf(err, "error to list datadownloads")
}
for i := range dataDownloads.Items {
dd := &dataDownloads.Items[i]
if dd.Status.Phase == velerov2alpha1api.DataDownloadPhasePrepared {
// keep doing nothing let controller re-download the data
// the Prepared CR could be still handled by datadownload controller after node-agent restart
logger.WithField("datadownload", dd.GetName()).Debug("find a datadownload with status prepared")
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
if dd.Status.Node != r.nodeName {
logger.WithField("dd", dd.Name).WithField("current node", r.nodeName).Infof("DD should be resumed by another node %s", dd.Status.Node)
continue
}
err := funcResumeCancellableDataRestore(r, ctx, dd, logger)
if err == nil {
logger.WithField("dd", dd.Name).WithField("current node", r.nodeName).Info("Completed to resume in progress DD")
continue
}
logger.WithField("datadownload", dd.GetName()).WithError(err).Warn("Failed to resume data path for dd, have to cancel it")
resumeErr := err
err = UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name}, logger.WithField("datadownload", dd.Name),
func(dataDownload *velerov2alpha1api.DataDownload) bool {
if dataDownload.Spec.Cancel {
return false
}
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = fmt.Sprintf("Resume InProgress datadownload failed with error %v, mark it as cancel", resumeErr)
return true
})
if err != nil {
logger.WithError(errors.WithStack(err)).WithError(errors.WithStack(err)).Error("Failed to trigger dataupload cancel")
}
} else if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseAccepted {
r.logger.WithField("datadownload", dd.GetName()).Warn("Cancel dd under Accepted phase")
err := UpdateDataDownloadWithRetry(ctx, r.client, types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name},
r.logger.WithField("datadownload", dd.Name), func(dataDownload *velerov2alpha1api.DataDownload) bool {
if dataDownload.Spec.Cancel {
return false
}
dataDownload.Spec.Cancel = true
dataDownload.Status.Message = "Datadownload is in Accepted status during the node-agent starting, mark it as cancel"
return true
})
if err != nil {
r.logger.WithField("datadownload", dd.GetName()).WithError(err).Errorf("Failed to trigger dataupload cancel")
}
}
}
//If the data download is in Accepted status, the expoded PVC may be not created
// so we need to mark the data download as canceled for it may not be recoverable
r.CancelAcceptedDataDownload(ctx, cli, ns)
return nil
}
func (r *DataDownloadReconciler) resumeCancellableDataPath(ctx context.Context, dd *velerov2alpha1api.DataDownload, log logrus.FieldLogger) error {
log.Info("Resume cancelable dataDownload")
res, err := r.restoreExposer.GetExposed(ctx, getDataDownloadOwnerObject(dd), r.client, r.nodeName, dd.Spec.OperationTimeout.Duration)
if err != nil {
return errors.Wrapf(err, "error to get exposed volume for dd %s", dd.Name)
}
if res == nil {
return errors.Errorf("expose info missed for dd %s", dd.Name)
}
callbacks := datapath.Callbacks{
OnCompleted: r.OnDataDownloadCompleted,
OnFailed: r.OnDataDownloadFailed,
OnCancelled: r.OnDataDownloadCancelled,
OnProgress: r.OnDataDownloadProgress,
}
asyncBR, err := r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, r.kubeClient, r.mgr, datapath.TaskTypeBackup, dd.Name, dd.Namespace, res.ByPod.HostingPod.Name, res.ByPod.HostingContainer, dd.Name, callbacks, true, log)
if err != nil {
return errors.Wrapf(err, "error to create asyncBR watcher for dd %s", dd.Name)
}
resumeComplete := false
defer func() {
if !resumeComplete {
r.closeDataPath(ctx, dd.Name)
}
}()
if err := asyncBR.Init(ctx, nil); err != nil {
return errors.Wrapf(err, "error to init asyncBR watcher for dd %s", dd.Name)
}
if err := asyncBR.StartRestore(dd.Spec.SnapshotID, datapath.AccessPoint{
ByPath: res.ByPod.VolumeName,
}, nil); err != nil {
return errors.Wrapf(err, "error to resume asyncBR watcher for dd %s", dd.Name)
}
resumeComplete = true
log.Infof("asyncBR is resumed for dd %s", dd.Name)
return nil
}

View File

@ -33,15 +33,16 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
clientgofake "k8s.io/client-go/kubernetes/fake"
ctrl "sigs.k8s.io/controller-runtime"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/builder"
@ -137,19 +138,9 @@ func initDataDownloadReconcilerWithError(objects []runtime.Object, needError ...
return nil, err
}
credentialFileStore, err := credentials.NewNamespacedFileStore(
fakeClient,
velerov1api.DefaultNamespace,
"/tmp/credentials",
fakeFS,
)
if err != nil {
return nil, err
}
dataPathMgr := datapath.NewManager(1)
return NewDataDownloadReconciler(fakeClient, fakeKubeClient, dataPathMgr, nil, &credentials.CredentialGetter{FromFile: credentialFileStore}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics()), nil
return NewDataDownloadReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, corev1.ResourceRequirements{}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics()), nil
}
func TestDataDownloadReconcile(t *testing.T) {
@ -162,7 +153,17 @@ func TestDataDownloadReconcile(t *testing.T) {
Kind: "DaemonSet",
APIVersion: appsv1.SchemeGroupVersion.String(),
},
Spec: appsv1.DaemonSetSpec{},
Spec: appsv1.DaemonSetSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Image: "fake-image",
},
},
},
},
},
}
tests := []struct {
@ -180,6 +181,10 @@ func TestDataDownloadReconcile(t *testing.T) {
isFSBRRestoreErr bool
notNilExpose bool
notMockCleanUp bool
mockInit bool
mockInitErr error
mockStart bool
mockStartErr error
mockCancel bool
mockClose bool
expected *velerov2alpha1api.DataDownload
@ -252,21 +257,36 @@ func TestDataDownloadReconcile(t *testing.T) {
expectedResult: &ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
},
{
name: "Error getting volume directory name for pvc in pod",
name: "data path init error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
notNilExpose: true,
mockInit: true,
mockInitErr: errors.New("fake-data-path-init-error"),
mockClose: true,
expectedStatusMsg: "error identifying unique volume path on host",
notNilExpose: true,
expectedStatusMsg: "error initializing asyncBR: fake-data-path-init-error",
},
{
name: "Unable to update status to in progress for data download",
name: "Unable to update status to in progress for data download",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needErrs: []bool{false, false, false, true},
mockInit: true,
mockClose: true,
notNilExpose: true,
notMockCleanUp: true,
expectedResult: &ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
},
{
name: "data path start error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
targetPVC: builder.ForPersistentVolumeClaim("test-ns", "test-pvc").Result(),
needErrs: []bool{false, false, false, true},
mockInit: true,
mockStart: true,
mockStartErr: errors.New("fake-data-path-start-error"),
mockClose: true,
notNilExpose: true,
notMockCleanUp: true,
expectedStatusMsg: "Patch error",
expectedStatusMsg: "error starting async restore for pod test-name, volume test-pvc: fake-data-path-start-error",
},
{
name: "accept DataDownload error",
@ -392,17 +412,26 @@ func TestDataDownloadReconcile(t *testing.T) {
r.dataPathMgr = datapath.NewManager(1)
}
datapath.FSBRCreator = func(string, string, kbclient.Client, string, datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
fsBR := datapathmockes.NewAsyncBR(t)
datapath.MicroServiceBRWatcherCreator = func(kbclient.Client, kubernetes.Interface, manager.Manager, string, string,
string, string, string, string, datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
asyncBR := datapathmockes.NewAsyncBR(t)
if test.mockInit {
asyncBR.On("Init", mock.Anything, mock.Anything).Return(test.mockInitErr)
}
if test.mockStart {
asyncBR.On("StartRestore", mock.Anything, mock.Anything, mock.Anything).Return(test.mockStartErr)
}
if test.mockCancel {
fsBR.On("Cancel").Return()
asyncBR.On("Cancel").Return()
}
if test.mockClose {
fsBR.On("Close", mock.Anything).Return()
asyncBR.On("Close", mock.Anything).Return()
}
return fsBR
return asyncBR
}
if test.isExposeErr || test.isGetExposeErr || test.isPeekExposeErr || test.isNilExposer || test.notNilExpose {
@ -412,7 +441,7 @@ func TestDataDownloadReconcile(t *testing.T) {
r.restoreExposer = func() exposer.GenericRestoreExposer {
ep := exposermockes.NewGenericRestoreExposer(t)
if test.isExposeErr {
ep.On("Expose", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(errors.New("Error to expose restore exposer"))
ep.On("Expose", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(errors.New("Error to expose restore exposer"))
} else if test.notNilExpose {
hostingPod := builder.ForPod("test-ns", "test-name").Volumes(&corev1.Volume{Name: "test-pvc"}).Result()
hostingPod.ObjectMeta.SetUID("test-uid")
@ -433,7 +462,8 @@ func TestDataDownloadReconcile(t *testing.T) {
if test.needCreateFSBR {
if fsBR := r.dataPathMgr.GetAsyncBR(test.dd.Name); fsBR == nil {
_, err := r.dataPathMgr.CreateFileSystemBR(test.dd.Name, pVBRRequestor, ctx, r.client, velerov1api.DefaultNamespace, datapath.Callbacks{OnCancelled: r.OnDataDownloadCancelled}, velerotest.NewLogger())
_, err := r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, nil, nil, datapath.TaskTypeRestore, test.dd.Name, pVBRRequestor,
velerov1api.DefaultNamespace, "", "", datapath.Callbacks{OnCancelled: r.OnDataDownloadCancelled}, false, velerotest.NewLogger())
require.NoError(t, err)
}
}
@ -446,7 +476,7 @@ func TestDataDownloadReconcile(t *testing.T) {
})
if test.expectedStatusMsg != "" {
assert.Contains(t, err.Error(), test.expectedStatusMsg)
require.ErrorContains(t, err, test.expectedStatusMsg)
} else {
require.NoError(t, err)
}
@ -482,6 +512,10 @@ func TestDataDownloadReconcile(t *testing.T) {
assert.True(t, true, apierrors.IsNotFound(err))
}
if !test.needCreateFSBR {
assert.Nil(t, r.dataPathMgr.GetAsyncBR(test.dd.Name))
}
t.Logf("%s: \n %v \n", test.name, dd)
})
}
@ -839,7 +873,7 @@ func TestTryCancelDataDownload(t *testing.T) {
err = r.client.Create(ctx, test.dd)
require.NoError(t, err)
r.TryCancelDataDownload(ctx, test.dd, "")
r.tryCancelAcceptedDataDownload(ctx, test.dd, "")
if test.expectedErr == "" {
assert.NoError(t, err)
@ -859,12 +893,11 @@ func TestUpdateDataDownloadWithRetry(t *testing.T) {
testCases := []struct {
Name string
needErrs []bool
noChange bool
ExpectErr bool
}{
{
Name: "SuccessOnFirstAttempt",
needErrs: []bool{false, false, false, false},
ExpectErr: false,
Name: "SuccessOnFirstAttempt",
},
{
Name: "Error get",
@ -876,6 +909,11 @@ func TestUpdateDataDownloadWithRetry(t *testing.T) {
needErrs: []bool{false, false, true, false, false},
ExpectErr: true,
},
{
Name: "no change",
noChange: true,
needErrs: []bool{false, false, true, false, false},
},
{
Name: "Conflict with error timeout",
needErrs: []bool{false, false, false, false, true},
@ -891,8 +929,14 @@ func TestUpdateDataDownloadWithRetry(t *testing.T) {
require.NoError(t, err)
err = r.client.Create(ctx, dataDownloadBuilder().Result())
require.NoError(t, err)
updateFunc := func(dataDownload *velerov2alpha1api.DataDownload) {
updateFunc := func(dataDownload *velerov2alpha1api.DataDownload) bool {
if tc.noChange {
return false
}
dataDownload.Spec.Cancel = true
return true
}
err = UpdateDataDownloadWithRetry(ctx, r.client, namespacedName, velerotest.NewLogger().WithField("name", tc.Name), updateFunc)
if tc.ExpectErr {
@ -904,136 +948,115 @@ func TestUpdateDataDownloadWithRetry(t *testing.T) {
}
}
func TestFindDataDownloads(t *testing.T) {
tests := []struct {
name string
pod corev1.Pod
du *velerov2alpha1api.DataDownload
expectedUploads []velerov2alpha1api.DataDownload
expectedError bool
}{
// Test case 1: Pod with matching nodeName and DataDownload label
{
name: "MatchingPod",
pod: corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: "pod-1",
Labels: map[string]string{
velerov1api.DataDownloadLabel: dataDownloadName,
},
},
Spec: corev1.PodSpec{
NodeName: "node-1",
},
},
du: dataDownloadBuilder().Result(),
expectedUploads: []velerov2alpha1api.DataDownload{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: dataDownloadName,
},
},
},
expectedError: false,
},
// Test case 2: Pod with non-matching nodeName
{
name: "NonMatchingNodePod",
pod: corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: "pod-2",
Labels: map[string]string{
velerov1api.DataDownloadLabel: dataDownloadName,
},
},
Spec: corev1.PodSpec{
NodeName: "node-2",
},
},
du: dataDownloadBuilder().Result(),
expectedUploads: []velerov2alpha1api.DataDownload{},
expectedError: false,
},
}
type ddResumeTestHelper struct {
resumeErr error
getExposeErr error
exposeResult *exposer.ExposeResult
asyncBR datapath.AsyncBR
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r, err := initDataDownloadReconcilerWithError(nil)
require.NoError(t, err)
r.nodeName = "node-1"
err = r.client.Create(ctx, test.du)
require.NoError(t, err)
err = r.client.Create(ctx, &test.pod)
require.NoError(t, err)
uploads, err := r.FindDataDownloads(context.Background(), r.client, "velero")
func (dt *ddResumeTestHelper) resumeCancellableDataPath(_ *DataUploadReconciler, _ context.Context, _ *velerov2alpha1api.DataUpload, _ logrus.FieldLogger) error {
return dt.resumeErr
}
if test.expectedError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, len(test.expectedUploads), len(uploads))
}
})
}
func (dt *ddResumeTestHelper) Expose(context.Context, corev1.ObjectReference, string, string, map[string]string, corev1.ResourceRequirements, time.Duration) error {
return nil
}
func (dt *ddResumeTestHelper) GetExposed(context.Context, corev1.ObjectReference, kbclient.Client, string, time.Duration) (*exposer.ExposeResult, error) {
return dt.exposeResult, dt.getExposeErr
}
func (dt *ddResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReference) error {
return nil
}
func (dt *ddResumeTestHelper) RebindVolume(context.Context, corev1.ObjectReference, string, string, time.Duration) error {
return nil
}
func (dt *ddResumeTestHelper) CleanUp(context.Context, corev1.ObjectReference) {}
func (dt *ddResumeTestHelper) newMicroServiceBRWatcher(kbclient.Client, kubernetes.Interface, manager.Manager, string, string, string, string, string, string,
datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
return dt.asyncBR
}
func TestAttemptDataDownloadResume(t *testing.T) {
tests := []struct {
name string
dataUploads []velerov2alpha1api.DataDownload
du *velerov2alpha1api.DataDownload
pod *corev1.Pod
needErrs []bool
acceptedDataDownloads []string
prepareddDataDownloads []string
cancelledDataDownloads []string
expectedError bool
name string
dataUploads []velerov2alpha1api.DataDownload
dd *velerov2alpha1api.DataDownload
needErrs []bool
resumeErr error
acceptedDataDownloads []string
prepareddDataDownloads []string
cancelledDataDownloads []string
inProgressDataDownloads []string
expectedError string
}{
// Test case 1: Process Accepted DataDownload
{
name: "AcceptedDataDownload",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataDownloadName).Volumes(&corev1.Volume{Name: dataDownloadName}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataDownloadLabel: dataDownloadName,
name: "accepted DataDownload with no dd label",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
cancelledDataDownloads: []string{dataDownloadName},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload in the current node",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
cancelledDataDownloads: []string{dataDownloadName},
acceptedDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but is canceled",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Cancel(true).Labels(map[string]string{
acceptNodeLabelKey: "node-1",
}).Result(),
du: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
acceptedDataDownloads: []string{dataDownloadName},
cancelledDataDownloads: []string{dataDownloadName},
},
{
name: "accepted DataDownload with dd label but cancel fail",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Labels(map[string]string{
acceptNodeLabelKey: "node-1",
}).Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataDownloads: []string{dataDownloadName},
expectedError: false,
},
// Test case 2: Cancel an Accepted DataDownload
{
name: "CancelAcceptedDataDownload",
du: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result(),
},
// Test case 3: Process Accepted Prepared DataDownload
{
name: "PreparedDataDownload",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataDownloadName).Volumes(&corev1.Volume{Name: dataDownloadName}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataDownloadLabel: dataDownloadName,
}).Result(),
du: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
name: "prepared DataDownload",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
prepareddDataDownloads: []string{dataDownloadName},
},
// Test case 4: Process Accepted InProgress DataDownload
{
name: "InProgressDataDownload",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataDownloadName).Volumes(&corev1.Volume{Name: dataDownloadName}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataDownloadLabel: dataDownloadName,
}).Result(),
du: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
prepareddDataDownloads: []string{dataDownloadName},
name: "InProgress DataDownload, not the current node",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Result(),
inProgressDataDownloads: []string{dataDownloadName},
},
// Test case 5: get resume error
{
name: "ResumeError",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataDownloadName).Volumes(&corev1.Volume{Name: dataDownloadName}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataDownloadLabel: dataDownloadName,
}).Result(),
name: "InProgress DataDownload, no resume error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Node("node-1").Result(),
inProgressDataDownloads: []string{dataDownloadName},
},
{
name: "InProgress DataDownload, resume error, cancel error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Node("node-1").Result(),
resumeErr: errors.New("fake-resume-error"),
needErrs: []bool{false, false, true, false, false, false},
inProgressDataDownloads: []string{dataDownloadName},
},
{
name: "InProgress DataDownload, resume error, cancel succeed",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Node("node-1").Result(),
resumeErr: errors.New("fake-resume-error"),
cancelledDataDownloads: []string{dataDownloadName},
inProgressDataDownloads: []string{dataDownloadName},
},
{
name: "Error",
needErrs: []bool{false, false, false, false, false, true},
du: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
expectedError: true,
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhasePrepared).Result(),
expectedError: "error to list datadownloads: List error",
},
}
@ -1044,30 +1067,31 @@ func TestAttemptDataDownloadResume(t *testing.T) {
r.nodeName = "node-1"
require.NoError(t, err)
defer func() {
r.client.Delete(ctx, test.du, &kbclient.DeleteOptions{})
if test.pod != nil {
r.client.Delete(ctx, test.pod, &kbclient.DeleteOptions{})
}
r.client.Delete(ctx, test.dd, &kbclient.DeleteOptions{})
}()
assert.NoError(t, r.client.Create(ctx, test.du))
if test.pod != nil {
assert.NoError(t, r.client.Create(ctx, test.pod))
}
// Run the test
err = r.AttemptDataDownloadResume(ctx, r.client, r.logger.WithField("name", test.name), test.du.Namespace)
assert.NoError(t, r.client.Create(ctx, test.dd))
if test.expectedError {
assert.Error(t, err)
dt := &duResumeTestHelper{
resumeErr: test.resumeErr,
}
funcResumeCancellableDataBackup = dt.resumeCancellableDataPath
// Run the test
err = r.AttemptDataDownloadResume(ctx, r.logger.WithField("name", test.name), test.dd.Namespace)
if test.expectedError != "" {
assert.EqualError(t, err, test.expectedError)
} else {
assert.NoError(t, err)
// Verify DataDownload marked as Canceled
for _, duName := range test.cancelledDataDownloads {
dataUpload := &velerov2alpha1api.DataDownload{}
err := r.client.Get(context.Background(), types.NamespacedName{Namespace: "velero", Name: duName}, dataUpload)
dataDownload := &velerov2alpha1api.DataDownload{}
err := r.client.Get(context.Background(), types.NamespacedName{Namespace: "velero", Name: duName}, dataDownload)
require.NoError(t, err)
assert.Equal(t, velerov2alpha1api.DataDownloadPhaseCanceled, dataUpload.Status.Phase)
assert.True(t, dataDownload.Spec.Cancel)
}
// Verify DataDownload marked as Accepted
for _, duName := range test.acceptedDataDownloads {
@ -1087,3 +1111,108 @@ func TestAttemptDataDownloadResume(t *testing.T) {
})
}
}
func TestResumeCancellableRestore(t *testing.T) {
tests := []struct {
name string
dataDownloads []velerov2alpha1api.DataDownload
dd *velerov2alpha1api.DataDownload
getExposeErr error
exposeResult *exposer.ExposeResult
createWatcherErr error
initWatcherErr error
startWatcherErr error
mockInit bool
mockStart bool
mockClose bool
expectedError string
}{
{
name: "get expose failed",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseInProgress).Result(),
getExposeErr: errors.New("fake-expose-error"),
expectedError: fmt.Sprintf("error to get exposed volume for dd %s: fake-expose-error", dataDownloadName),
},
{
name: "no expose",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Node("node-1").Result(),
expectedError: fmt.Sprintf("expose info missed for dd %s", dataDownloadName),
},
{
name: "watcher init error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Node("node-1").Result(),
exposeResult: &exposer.ExposeResult{
ByPod: exposer.ExposeByPod{
HostingPod: &corev1.Pod{},
},
},
mockInit: true,
mockClose: true,
initWatcherErr: errors.New("fake-init-watcher-error"),
expectedError: fmt.Sprintf("error to init asyncBR watcher for dd %s: fake-init-watcher-error", dataDownloadName),
},
{
name: "start watcher error",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Node("node-1").Result(),
exposeResult: &exposer.ExposeResult{
ByPod: exposer.ExposeByPod{
HostingPod: &corev1.Pod{},
},
},
mockInit: true,
mockStart: true,
mockClose: true,
startWatcherErr: errors.New("fake-start-watcher-error"),
expectedError: fmt.Sprintf("error to resume asyncBR watcher for dd %s: fake-start-watcher-error", dataDownloadName),
},
{
name: "succeed",
dd: dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Node("node-1").Result(),
exposeResult: &exposer.ExposeResult{
ByPod: exposer.ExposeByPod{
HostingPod: &corev1.Pod{},
},
},
mockInit: true,
mockStart: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ctx := context.TODO()
r, err := initDataDownloadReconciler(nil, false)
r.nodeName = "node-1"
require.NoError(t, err)
mockAsyncBR := datapathmockes.NewAsyncBR(t)
if test.mockInit {
mockAsyncBR.On("Init", mock.Anything, mock.Anything).Return(test.initWatcherErr)
}
if test.mockStart {
mockAsyncBR.On("StartRestore", mock.Anything, mock.Anything, mock.Anything).Return(test.startWatcherErr)
}
if test.mockClose {
mockAsyncBR.On("Close", mock.Anything).Return()
}
dt := &ddResumeTestHelper{
getExposeErr: test.getExposeErr,
exposeResult: test.exposeResult,
asyncBR: mockAsyncBR,
}
r.restoreExposer = dt
datapath.MicroServiceBRWatcherCreator = dt.newMicroServiceBRWatcher
err = r.resumeCancellableDataPath(ctx, test.dd, velerotest.NewLogger())
if test.expectedError != "" {
assert.EqualError(t, err, test.expectedError)
}
})
}
}

View File

@ -35,12 +35,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
snapshotter "github.com/kubernetes-csi/external-snapshotter/client/v7/clientset/versioned/typed/volumesnapshot/v1"
"github.com/vmware-tanzu/velero/internal/credentials"
"github.com/vmware-tanzu/velero/pkg/apis/velero/shared"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
@ -49,9 +49,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/exposer"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
@ -67,35 +65,35 @@ type DataUploadReconciler struct {
client client.Client
kubeClient kubernetes.Interface
csiSnapshotClient snapshotter.SnapshotV1Interface
repoEnsurer *repository.Ensurer
mgr manager.Manager
Clock clocks.WithTickerAndDelayedExecution
credentialGetter *credentials.CredentialGetter
nodeName string
fileSystem filesystem.Interface
logger logrus.FieldLogger
snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer
dataPathMgr *datapath.Manager
loadAffinity *nodeagent.LoadAffinity
backupPVCConfig map[string]nodeagent.BackupPVC
podResources corev1.ResourceRequirements
preparingTimeout time.Duration
metrics *metrics.ServerMetrics
}
func NewDataUploadReconciler(client client.Client, kubeClient kubernetes.Interface, csiSnapshotClient snapshotter.SnapshotV1Interface,
dataPathMgr *datapath.Manager, loadAffinity *nodeagent.LoadAffinity, repoEnsurer *repository.Ensurer, clock clocks.WithTickerAndDelayedExecution,
cred *credentials.CredentialGetter, nodeName string, fs filesystem.Interface, preparingTimeout time.Duration, log logrus.FieldLogger, metrics *metrics.ServerMetrics) *DataUploadReconciler {
func NewDataUploadReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, csiSnapshotClient snapshotter.SnapshotV1Interface,
dataPathMgr *datapath.Manager, loadAffinity *nodeagent.LoadAffinity, backupPVCConfig map[string]nodeagent.BackupPVC, podResources corev1.ResourceRequirements,
clock clocks.WithTickerAndDelayedExecution, nodeName string, preparingTimeout time.Duration, log logrus.FieldLogger, metrics *metrics.ServerMetrics) *DataUploadReconciler {
return &DataUploadReconciler{
client: client,
mgr: mgr,
kubeClient: kubeClient,
csiSnapshotClient: csiSnapshotClient,
Clock: clock,
credentialGetter: cred,
nodeName: nodeName,
fileSystem: fs,
logger: log,
repoEnsurer: repoEnsurer,
snapshotExposerList: map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{velerov2alpha1api.SnapshotTypeCSI: exposer.NewCSISnapshotExposer(kubeClient, csiSnapshotClient, log)},
dataPathMgr: dataPathMgr,
loadAffinity: loadAffinity,
backupPVCConfig: backupPVCConfig,
podResources: podResources,
preparingTimeout: preparingTimeout,
metrics: metrics,
}
@ -150,9 +148,17 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
} else if controllerutil.ContainsFinalizer(du, DataUploadDownloadFinalizer) && !du.Spec.Cancel && !isDataUploadInFinalState(du) {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
if err := UpdateDataUploadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataUpload *velerov2alpha1api.DataUpload) {
log.Warnf("Cancel du under phase %s because it is being deleted", du.Status.Phase)
if err := UpdateDataUploadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataUpload *velerov2alpha1api.DataUpload) bool {
if dataUpload.Spec.Cancel {
return false
}
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = fmt.Sprintf("found a dataupload %s/%s is being deleted, mark it as cancel", du.Namespace, du.Name)
dataUpload.Status.Message = "Cancel dataupload because it is being deleted"
return true
}); err != nil {
log.Errorf("failed to set cancel flag with error %s for %s/%s", err.Error(), du.Namespace, du.Name)
return ctrl.Result{}, err
@ -227,9 +233,9 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// we don't want to update CR into cancel status forcely as it may conflict with CR update in Expose action
// we could retry when the CR requeue in periodcally
log.Debugf("Data upload is been canceled %s in Phase %s", du.GetName(), du.Status.Phase)
r.TryCancelDataUpload(ctx, du, "")
r.tryCancelAcceptedDataUpload(ctx, du, "")
} else if peekErr := ep.PeekExposed(ctx, getOwnerObject(du)); peekErr != nil {
r.TryCancelDataUpload(ctx, du, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
r.tryCancelAcceptedDataUpload(ctx, du, fmt.Sprintf("found a dataupload %s/%s with expose error: %s. mark it as cancel", du.Namespace, du.Name, peekErr))
log.Errorf("Cancel du %s/%s because of expose error %s", du.Namespace, du.Name, peekErr)
} else if du.Status.StartTimestamp != nil {
if time.Since(du.Status.StartTimestamp.Time) >= r.preparingTimeout {
@ -246,8 +252,8 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}
fsBackup := r.dataPathMgr.GetAsyncBR(du.Name)
if fsBackup != nil {
asyncBR := r.dataPathMgr.GetAsyncBR(du.Name)
if asyncBR != nil {
log.Info("Cancellable data path is already started")
return ctrl.Result{}, nil
}
@ -270,7 +276,8 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
OnProgress: r.OnDataUploadProgress,
}
fsBackup, err = r.dataPathMgr.CreateFileSystemBR(du.Name, dataUploadDownloadRequestor, ctx, r.client, du.Namespace, callbacks, log)
asyncBR, err = r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, r.kubeClient, r.mgr, datapath.TaskTypeBackup,
du.Name, du.Namespace, res.ByPod.HostingPod.Name, res.ByPod.HostingContainer, du.Name, callbacks, false, log)
if err != nil {
if err == datapath.ConcurrentLimitExceed {
log.Info("Data path instance is concurrent limited requeue later")
@ -279,28 +286,42 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return r.errorOut(ctx, du, err, "error to create data path", log)
}
}
if err := r.initCancelableDataPath(ctx, asyncBR, res, log); err != nil {
log.WithError(err).Errorf("Failed to init cancelable data path for %s", du.Name)
r.closeDataPath(ctx, du.Name)
return r.errorOut(ctx, du, err, "error initializing data path", log)
}
// Update status to InProgress
original := du.DeepCopy()
du.Status.Phase = velerov2alpha1api.DataUploadPhaseInProgress
du.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
if err := r.client.Patch(ctx, du, client.MergeFrom(original)); err != nil {
return r.errorOut(ctx, du, err, "error updating dataupload status", log)
log.WithError(err).Warnf("Failed to update dataupload %s to InProgress, will data path close and retry", du.Name)
r.closeDataPath(ctx, du.Name)
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
}
log.Info("Data upload is marked as in progress")
result, err := r.runCancelableDataUpload(ctx, fsBackup, du, res, log)
if err != nil {
log.Errorf("Failed to run cancelable data path for %s with err %v", du.Name, err)
if err := r.startCancelableDataPath(asyncBR, du, res, log); err != nil {
log.WithError(err).Errorf("Failed to start cancelable data path for %s", du.Name)
r.closeDataPath(ctx, du.Name)
return r.errorOut(ctx, du, err, "error starting data path", log)
}
return result, err
return ctrl.Result{}, nil
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
log.Info("Data upload is in progress")
if du.Spec.Cancel {
log.Info("Data upload is being canceled")
fsBackup := r.dataPathMgr.GetAsyncBR(du.Name)
if fsBackup == nil {
asyncBR := r.dataPathMgr.GetAsyncBR(du.Name)
if asyncBR == nil {
if du.Status.Node == r.nodeName {
r.OnDataUploadCancelled(ctx, du.GetNamespace(), du.GetName())
} else {
@ -316,7 +337,7 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
log.WithError(err).Error("error updating data upload into canceling status")
return ctrl.Result{}, err
}
fsBackup.Cancel()
asyncBR.Cancel()
return ctrl.Result{}, nil
}
return ctrl.Result{}, nil
@ -336,49 +357,33 @@ func (r *DataUploadReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}
}
func (r *DataUploadReconciler) runCancelableDataUpload(ctx context.Context, fsBackup datapath.AsyncBR, du *velerov2alpha1api.DataUpload, res *exposer.ExposeResult, log logrus.FieldLogger) (reconcile.Result, error) {
log.Info("Run cancelable dataUpload")
func (r *DataUploadReconciler) initCancelableDataPath(ctx context.Context, asyncBR datapath.AsyncBR, res *exposer.ExposeResult, log logrus.FieldLogger) error {
log.Info("Init cancelable dataUpload")
path, err := exposer.GetPodVolumeHostPath(ctx, res.ByPod.HostingPod, res.ByPod.VolumeName, r.client, r.fileSystem, log)
if err != nil {
return r.errorOut(ctx, du, err, "error exposing host path for pod volume", log)
if err := asyncBR.Init(ctx, nil); err != nil {
return errors.Wrap(err, "error initializing asyncBR")
}
log.WithField("path", path.ByPath).Debug("Found host path")
log.Infof("async backup init for pod %s, volume %s", res.ByPod.HostingPod.Name, res.ByPod.VolumeName)
if err := fsBackup.Init(ctx, &datapath.FSBRInitParam{
BSLName: du.Spec.BackupStorageLocation,
SourceNamespace: du.Spec.SourceNamespace,
UploaderType: datamover.GetUploaderType(du.Spec.DataMover),
RepositoryType: velerov1api.BackupRepositoryTypeKopia,
RepoIdentifier: "",
RepositoryEnsurer: r.repoEnsurer,
CredentialGetter: r.credentialGetter,
}); err != nil {
return r.errorOut(ctx, du, err, "error to initialize data path", log)
return nil
}
func (r *DataUploadReconciler) startCancelableDataPath(asyncBR datapath.AsyncBR, du *velerov2alpha1api.DataUpload, res *exposer.ExposeResult, log logrus.FieldLogger) error {
log.Info("Start cancelable dataUpload")
if err := asyncBR.StartBackup(datapath.AccessPoint{
ByPath: res.ByPod.VolumeName,
}, du.Spec.DataMoverConfig, nil); err != nil {
return errors.Wrapf(err, "error starting async backup for pod %s, volume %s", res.ByPod.HostingPod.Name, res.ByPod.VolumeName)
}
log.WithField("path", path.ByPath).Info("fs init")
tags := map[string]string{
velerov1api.AsyncOperationIDLabel: du.Labels[velerov1api.AsyncOperationIDLabel],
}
if err := fsBackup.StartBackup(path, du.Spec.DataMoverConfig, &datapath.FSBRStartParam{
RealSource: datamover.GetRealSource(du.Spec.SourceNamespace, du.Spec.SourcePVC),
ParentSnapshot: "",
ForceFull: false,
Tags: tags,
}); err != nil {
return r.errorOut(ctx, du, err, "error starting data path backup", log)
}
log.WithField("path", path.ByPath).Info("Async fs backup data path started")
return ctrl.Result{}, nil
log.Infof("Async backup started for pod %s, volume %s", res.ByPod.HostingPod.Name, res.ByPod.VolumeName)
return nil
}
func (r *DataUploadReconciler) OnDataUploadCompleted(ctx context.Context, namespace string, duName string, result datapath.Result) {
defer r.closeDataPath(ctx, duName)
defer r.dataPathMgr.RemoveAsyncBR(duName)
log := r.logger.WithField("dataupload", duName)
@ -422,7 +427,7 @@ func (r *DataUploadReconciler) OnDataUploadCompleted(ctx context.Context, namesp
}
func (r *DataUploadReconciler) OnDataUploadFailed(ctx context.Context, namespace, duName string, err error) {
defer r.closeDataPath(ctx, duName)
defer r.dataPathMgr.RemoveAsyncBR(duName)
log := r.logger.WithField("dataupload", duName)
@ -432,14 +437,12 @@ func (r *DataUploadReconciler) OnDataUploadFailed(ctx context.Context, namespace
if getErr := r.client.Get(ctx, types.NamespacedName{Name: duName, Namespace: namespace}, &du); getErr != nil {
log.WithError(getErr).Warn("Failed to get dataupload on failure")
} else {
if _, errOut := r.errorOut(ctx, &du, err, "data path backup failed", log); err != nil {
log.WithError(err).Warnf("Failed to patch dataupload with err %v", errOut)
}
_, _ = r.errorOut(ctx, &du, err, "data path backup failed", log)
}
}
func (r *DataUploadReconciler) OnDataUploadCancelled(ctx context.Context, namespace string, duName string) {
defer r.closeDataPath(ctx, duName)
defer r.dataPathMgr.RemoveAsyncBR(duName)
log := r.logger.WithField("dataupload", duName)
@ -465,17 +468,19 @@ func (r *DataUploadReconciler) OnDataUploadCancelled(ctx context.Context, namesp
}
}
// TryCancelDataUpload clear up resources only when update success
func (r *DataUploadReconciler) TryCancelDataUpload(ctx context.Context, du *velerov2alpha1api.DataUpload, message string) {
func (r *DataUploadReconciler) tryCancelAcceptedDataUpload(ctx context.Context, du *velerov2alpha1api.DataUpload, message string) {
log := r.logger.WithField("dataupload", du.Name)
log.Warn("Async fs backup data path canceled")
log.Warn("Accepted data upload is canceled")
succeeded, err := r.exclusiveUpdateDataUpload(ctx, du, func(dataUpload *velerov2alpha1api.DataUpload) {
dataUpload.Status.Phase = velerov2alpha1api.DataUploadPhaseCanceled
if dataUpload.Status.StartTimestamp.IsZero() {
dataUpload.Status.StartTimestamp = &metav1.Time{Time: r.Clock.Now()}
}
dataUpload.Status.CompletionTimestamp = &metav1.Time{Time: r.Clock.Now()}
dataUpload.Status.Message = message
if message != "" {
dataUpload.Status.Message = message
}
})
if err != nil {
@ -490,10 +495,9 @@ func (r *DataUploadReconciler) TryCancelDataUpload(ctx context.Context, du *vele
r.metrics.RegisterDataUploadCancel(r.nodeName)
// cleans up any objects generated during the snapshot expose
r.cleanUp(ctx, du, log)
r.closeDataPath(ctx, du.Name)
}
func (r *DataUploadReconciler) cleanUp(ctx context.Context, du *velerov2alpha1api.DataUpload, log *logrus.Entry) {
func (r *DataUploadReconciler) cleanUp(ctx context.Context, du *velerov2alpha1api.DataUpload, log logrus.FieldLogger) {
ep, ok := r.snapshotExposerList[du.Spec.SnapshotType]
if !ok {
log.WithError(fmt.Errorf("%v type of snapshot exposer is not exist", du.Spec.SnapshotType)).
@ -599,16 +603,22 @@ func (r *DataUploadReconciler) findDataUploadForPod(ctx context.Context, podObj
}
} else if unrecoverable, reason := kube.IsPodUnrecoverable(pod, log); unrecoverable { // let the abnormal backup pod failed early
err := UpdateDataUploadWithRetry(context.Background(), r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, r.logger.WithField("dataupload", du.Name),
func(dataUpload *velerov2alpha1api.DataUpload) {
func(dataUpload *velerov2alpha1api.DataUpload) bool {
if dataUpload.Spec.Cancel {
return false
}
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = fmt.Sprintf("dataupload mark as cancel to failed early for exposing pod %s/%s is in abnormal status for reason %s", pod.Namespace, pod.Name, reason)
dataUpload.Status.Message = fmt.Sprintf("Cancel dataupload because the exposing pod %s/%s is in abnormal status for reason %s", pod.Namespace, pod.Name, reason)
return true
})
if err != nil {
log.WithError(err).Warn("failed to cancel dataupload, and it will wait for prepare timeout")
return []reconcile.Request{}
}
log.Info("Exposed pod is in abnormal status and dataupload is marked as cancel")
log.Infof("Exposed pod is in abnormal status(reason %s) and dataupload is marked as cancel", reason)
} else {
return []reconcile.Request{}
}
@ -622,75 +632,6 @@ func (r *DataUploadReconciler) findDataUploadForPod(ctx context.Context, podObj
return []reconcile.Request{request}
}
func (r *DataUploadReconciler) FindDataUploadsByPod(ctx context.Context, cli client.Client, ns string) ([]velerov2alpha1api.DataUpload, error) {
pods := &corev1.PodList{}
var dataUploads []velerov2alpha1api.DataUpload
if err := cli.List(ctx, pods, &client.ListOptions{Namespace: ns}); err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to list pods on current node")
return nil, errors.Wrapf(err, "failed to list pods on current node")
}
for _, pod := range pods.Items {
if pod.Spec.NodeName != r.nodeName {
r.logger.Debugf("Pod %s related data upload will not handled by %s nodes", pod.GetName(), r.nodeName)
continue
}
du, err := findDataUploadByPod(cli, pod)
if err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to get dataUpload by pod")
continue
} else if du != nil {
dataUploads = append(dataUploads, *du)
}
}
return dataUploads, nil
}
func (r *DataUploadReconciler) findAcceptDataUploadsByNodeLabel(ctx context.Context, cli client.Client, ns string) ([]velerov2alpha1api.DataUpload, error) {
dataUploads := &velerov2alpha1api.DataUploadList{}
if err := cli.List(ctx, dataUploads, &client.ListOptions{Namespace: ns}); err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to list datauploads")
return nil, errors.Wrapf(err, "failed to list datauploads")
}
var result []velerov2alpha1api.DataUpload
for _, du := range dataUploads.Items {
if du.Status.Phase != velerov2alpha1api.DataUploadPhaseAccepted {
continue
}
if du.Labels[acceptNodeLabelKey] == r.nodeName {
result = append(result, du)
}
}
return result, nil
}
func (r *DataUploadReconciler) CancelAcceptedDataupload(ctx context.Context, cli client.Client, ns string) {
r.logger.Infof("Reset accepted dataupload for node %s", r.nodeName)
dataUploads, err := r.findAcceptDataUploadsByNodeLabel(ctx, cli, ns)
if err != nil {
r.logger.WithError(err).Error("failed to find dataupload")
return
}
for _, du := range dataUploads {
if du.Spec.Cancel {
continue
}
err = UpdateDataUploadWithRetry(ctx, cli, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, r.logger.WithField("dataupload", du.Name),
func(dataUpload *velerov2alpha1api.DataUpload) {
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = fmt.Sprintf("found a dataupload with status %q during the node-agent starting, mark it as cancel", du.Status.Phase)
})
r.logger.WithField("dataupload", du.GetName()).Warn(du.Status.Message)
if err != nil {
r.logger.WithError(errors.WithStack(err)).Errorf("failed to mark dataupload %q cancel", du.GetName())
continue
}
}
}
func (r *DataUploadReconciler) prepareDataUpload(du *velerov2alpha1api.DataUpload) {
du.Status.Phase = velerov2alpha1api.DataUploadPhasePrepared
du.Status.Node = r.nodeName
@ -704,7 +645,7 @@ func (r *DataUploadReconciler) errorOut(ctx context.Context, du *velerov2alpha1a
}
se.CleanUp(ctx, getOwnerObject(du), volumeSnapshotName, du.Spec.SourceNamespace)
} else {
err = errors.Wrapf(err, "failed to clean up exposed snapshot with could not find %s snapshot exposer", du.Spec.SnapshotType)
log.Errorf("failed to clean up exposed snapshot could not find %s snapshot exposer", du.Spec.SnapshotType)
}
return ctrl.Result{}, r.updateStatusToFailed(ctx, du, err, msg, log)
@ -820,9 +761,9 @@ func (r *DataUploadReconciler) exclusiveUpdateDataUpload(ctx context.Context, du
}
func (r *DataUploadReconciler) closeDataPath(ctx context.Context, duName string) {
fsBackup := r.dataPathMgr.GetAsyncBR(duName)
if fsBackup != nil {
fsBackup.Close(ctx)
asyncBR := r.dataPathMgr.GetAsyncBR(duName)
if asyncBR != nil {
asyncBR.Close(ctx)
}
r.dataPathMgr.RemoveAsyncBR(duName)
@ -855,6 +796,8 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload
ExposeTimeout: r.preparingTimeout,
VolumeSize: pvc.Spec.Resources.Requests[corev1.ResourceStorage],
Affinity: r.loadAffinity,
BackupPVCConfig: r.backupPVCConfig,
Resources: r.podResources,
}, nil
}
return nil, nil
@ -902,54 +845,146 @@ func isDataUploadInFinalState(du *velerov2alpha1api.DataUpload) bool {
du.Status.Phase == velerov2alpha1api.DataUploadPhaseCompleted
}
func UpdateDataUploadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log *logrus.Entry, updateFunc func(dataUpload *velerov2alpha1api.DataUpload)) error {
return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (done bool, err error) {
func UpdateDataUploadWithRetry(ctx context.Context, client client.Client, namespacedName types.NamespacedName, log *logrus.Entry, updateFunc func(*velerov2alpha1api.DataUpload) bool) error {
return wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) {
du := &velerov2alpha1api.DataUpload{}
if err := client.Get(ctx, namespacedName, du); err != nil {
return false, errors.Wrap(err, "getting DataUpload")
}
updateFunc(du)
updateErr := client.Update(ctx, du)
if updateErr != nil {
if apierrors.IsConflict(updateErr) {
log.Warnf("failed to update dataupload for %s/%s and will retry it", du.Namespace, du.Name)
return false, nil
if updateFunc(du) {
err := client.Update(ctx, du)
if err != nil {
if apierrors.IsConflict(err) {
log.Warnf("failed to update dataupload for %s/%s and will retry it", du.Namespace, du.Name)
return false, nil
} else {
return false, errors.Wrapf(err, "error updating dataupload with error %s/%s", du.Namespace, du.Name)
}
}
log.Errorf("failed to update dataupload with error %s for %s/%s", updateErr.Error(), du.Namespace, du.Name)
return false, err
}
return true, nil
})
}
func (r *DataUploadReconciler) AttemptDataUploadResume(ctx context.Context, cli client.Client, logger *logrus.Entry, ns string) error {
if dataUploads, err := r.FindDataUploadsByPod(ctx, cli, ns); err != nil {
return errors.Wrap(err, "failed to find data uploads")
} else {
for _, du := range dataUploads {
if du.Status.Phase == velerov2alpha1api.DataUploadPhasePrepared {
// keep doing nothing let controller re-download the data
// the Prepared CR could be still handled by dataupload controller after node-agent restart
logger.WithField("dataupload", du.GetName()).Debug("find a dataupload with status prepared")
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
err = UpdateDataUploadWithRetry(ctx, cli, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, logger.WithField("dataupload", du.Name),
func(dataUpload *velerov2alpha1api.DataUpload) {
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = fmt.Sprintf("found a dataupload with status %q during the node-agent starting, mark it as cancel", du.Status.Phase)
})
var funcResumeCancellableDataBackup = (*DataUploadReconciler).resumeCancellableDataPath
if err != nil {
logger.WithError(errors.WithStack(err)).Errorf("failed to mark dataupload %q into canceled", du.GetName())
continue
}
logger.WithField("dataupload", du.GetName()).Debug("mark dataupload into canceled")
func (r *DataUploadReconciler) AttemptDataUploadResume(ctx context.Context, logger *logrus.Entry, ns string) error {
dataUploads := &velerov2alpha1api.DataUploadList{}
if err := r.client.List(ctx, dataUploads, &client.ListOptions{Namespace: ns}); err != nil {
r.logger.WithError(errors.WithStack(err)).Error("failed to list datauploads")
return errors.Wrapf(err, "error to list datauploads")
}
for i := range dataUploads.Items {
du := &dataUploads.Items[i]
if du.Status.Phase == velerov2alpha1api.DataUploadPhasePrepared {
// keep doing nothing let controller re-download the data
// the Prepared CR could be still handled by dataupload controller after node-agent restart
logger.WithField("dataupload", du.GetName()).Debug("find a dataupload with status prepared")
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
if du.Status.Node != r.nodeName {
logger.WithField("du", du.Name).WithField("current node", r.nodeName).Infof("DU should be resumed by another node %s", du.Status.Node)
continue
}
err := funcResumeCancellableDataBackup(r, ctx, du, logger)
if err == nil {
logger.WithField("du", du.Name).WithField("current node", r.nodeName).Info("Completed to resume in progress DU")
continue
}
logger.WithField("dataupload", du.GetName()).WithError(err).Warn("Failed to resume data path for du, have to cancel it")
resumeErr := err
err = UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, logger.WithField("dataupload", du.Name),
func(dataUpload *velerov2alpha1api.DataUpload) bool {
if dataUpload.Spec.Cancel {
return false
}
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = fmt.Sprintf("Resume InProgress dataupload failed with error %v, mark it as cancel", resumeErr)
return true
})
if err != nil {
logger.WithField("dataupload", du.GetName()).WithError(errors.WithStack(err)).Error("Failed to trigger dataupload cancel")
}
} else if du.Status.Phase == velerov2alpha1api.DataUploadPhaseAccepted {
r.logger.WithField("dataupload", du.GetName()).Warn("Cancel du under Accepted phase")
err := UpdateDataUploadWithRetry(ctx, r.client, types.NamespacedName{Namespace: du.Namespace, Name: du.Name}, r.logger.WithField("dataupload", du.Name),
func(dataUpload *velerov2alpha1api.DataUpload) bool {
if dataUpload.Spec.Cancel {
return false
}
dataUpload.Spec.Cancel = true
dataUpload.Status.Message = "Dataupload is in Accepted status during the node-agent starting, mark it as cancel"
return true
})
if err != nil {
r.logger.WithField("dataupload", du.GetName()).WithError(errors.WithStack(err)).Error("Failed to trigger dataupload cancel")
}
}
}
//If the data upload is in Accepted status, the volume snapshot may be deleted and the exposed pod may not be created
// so we need to mark the data upload as canceled for it may not be recoverable
r.CancelAcceptedDataupload(ctx, cli, ns)
return nil
}
func (r *DataUploadReconciler) resumeCancellableDataPath(ctx context.Context, du *velerov2alpha1api.DataUpload, log logrus.FieldLogger) error {
log.Info("Resume cancelable dataUpload")
ep, ok := r.snapshotExposerList[du.Spec.SnapshotType]
if !ok {
return errors.Errorf("error to find exposer for du %s", du.Name)
}
waitExposePara := r.setupWaitExposePara(du)
res, err := ep.GetExposed(ctx, getOwnerObject(du), du.Spec.OperationTimeout.Duration, waitExposePara)
if err != nil {
return errors.Wrapf(err, "error to get exposed snapshot for du %s", du.Name)
}
if res == nil {
return errors.Errorf("expose info missed for du %s", du.Name)
}
callbacks := datapath.Callbacks{
OnCompleted: r.OnDataUploadCompleted,
OnFailed: r.OnDataUploadFailed,
OnCancelled: r.OnDataUploadCancelled,
OnProgress: r.OnDataUploadProgress,
}
asyncBR, err := r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, r.kubeClient, r.mgr, datapath.TaskTypeBackup, du.Name, du.Namespace, res.ByPod.HostingPod.Name, res.ByPod.HostingContainer, du.Name, callbacks, true, log)
if err != nil {
return errors.Wrapf(err, "error to create asyncBR watcher for du %s", du.Name)
}
resumeComplete := false
defer func() {
if !resumeComplete {
r.closeDataPath(ctx, du.Name)
}
}()
if err := asyncBR.Init(ctx, nil); err != nil {
return errors.Wrapf(err, "error to init asyncBR watcher for du %s", du.Name)
}
if err := asyncBR.StartBackup(datapath.AccessPoint{
ByPath: res.ByPod.VolumeName,
}, du.Spec.DataMoverConfig, nil); err != nil {
return errors.Wrapf(err, "error to resume asyncBR watcher for du %s", du.Name)
}
resumeComplete = true
log.Infof("asyncBR is resumed for du %s", du.Name)
return nil
}

View File

@ -22,11 +22,14 @@ import (
"testing"
"time"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1"
snapshotFake "github.com/kubernetes-csi/external-snapshotter/client/v7/clientset/versioned/fake"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
@ -35,6 +38,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
clientgofake "k8s.io/client-go/kubernetes/fake"
"k8s.io/utils/clock"
testclocks "k8s.io/utils/clock/testing"
@ -42,13 +46,14 @@ import (
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/builder"
"github.com/vmware-tanzu/velero/pkg/datapath"
datapathmocks "github.com/vmware-tanzu/velero/pkg/datapath/mocks"
"github.com/vmware-tanzu/velero/pkg/exposer"
"github.com/vmware-tanzu/velero/pkg/metrics"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
@ -169,7 +174,17 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci
Kind: "DaemonSet",
APIVersion: appsv1.SchemeGroupVersion.String(),
},
Spec: appsv1.DaemonSetSpec{},
Spec: appsv1.DaemonSetSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Image: "fake-image",
},
},
},
},
},
}
dataPathMgr := datapath.NewManager(1)
@ -215,24 +230,9 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci
fakeSnapshotClient := snapshotFake.NewSimpleClientset(vsObject, vscObj)
fakeKubeClient := clientgofake.NewSimpleClientset(daemonSet)
fakeFS := velerotest.NewFakeFileSystem()
pathGlob := fmt.Sprintf("/host_pods/%s/volumes/*/%s", "", dataUploadName)
_, err = fakeFS.Create(pathGlob)
if err != nil {
return nil, err
}
credentialFileStore, err := credentials.NewNamespacedFileStore(
fakeClient,
velerov1api.DefaultNamespace,
"/tmp/credentials",
fakeFS,
)
if err != nil {
return nil, err
}
return NewDataUploadReconciler(fakeClient, fakeKubeClient, fakeSnapshotClient.SnapshotV1(), dataPathMgr, nil, nil,
testclocks.NewFakeClock(now), &credentials.CredentialGetter{FromFile: credentialFileStore}, "test-node", fakeFS, time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics()), nil
return NewDataUploadReconciler(fakeClient, nil, fakeKubeClient, fakeSnapshotClient.SnapshotV1(), dataPathMgr, nil, map[string]nodeagent.BackupPVC{},
corev1.ResourceRequirements{}, testclocks.NewFakeClock(now), "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics()), nil
}
func dataUploadBuilder() *builder.DataUploadBuilder {
@ -294,20 +294,16 @@ type fakeDataUploadFSBR struct {
du *velerov2alpha1api.DataUpload
kubeClient kbclient.Client
clock clock.WithTickerAndDelayedExecution
initErr error
startErr error
}
func (f *fakeDataUploadFSBR) Init(ctx context.Context, param interface{}) error {
return nil
return f.initErr
}
func (f *fakeDataUploadFSBR) StartBackup(source datapath.AccessPoint, uploaderConfigs map[string]string, param interface{}) error {
du := f.du
original := f.du.DeepCopy()
du.Status.Phase = velerov2alpha1api.DataUploadPhaseCompleted
du.Status.CompletionTimestamp = &metav1.Time{Time: f.clock.Now()}
f.kubeClient.Patch(context.Background(), du, kbclient.MergeFrom(original))
return nil
return f.startErr
}
func (f *fakeDataUploadFSBR) StartRestore(snapshotID string, target datapath.AccessPoint, uploaderConfigs map[string]string) error {
@ -336,27 +332,24 @@ func TestReconcile(t *testing.T) {
needErrs []bool
peekErr error
notCreateFSBR bool
fsBRInitErr error
fsBRStartErr error
}{
{
name: "Dataupload is not initialized",
du: builder.ForDataUpload("unknown-ns", "unknown-name").Result(),
expectedProcessed: false,
expected: nil,
expectedRequeue: ctrl.Result{},
name: "Dataupload is not initialized",
du: builder.ForDataUpload("unknown-ns", "unknown-name").Result(),
expectedRequeue: ctrl.Result{},
}, {
name: "Error get Dataupload",
du: builder.ForDataUpload(velerov1api.DefaultNamespace, "unknown-name").Result(),
expectedProcessed: false,
expected: nil,
expectedRequeue: ctrl.Result{},
expectedErrMsg: "getting DataUpload: Get error",
needErrs: []bool{true, false, false, false},
name: "Error get Dataupload",
du: builder.ForDataUpload(velerov1api.DefaultNamespace, "unknown-name").Result(),
expectedRequeue: ctrl.Result{},
expectedErrMsg: "getting DataUpload: Get error",
needErrs: []bool{true, false, false, false},
}, {
name: "Unsupported data mover type",
du: dataUploadBuilder().DataMover("unknown type").Result(),
expectedProcessed: false,
expected: dataUploadBuilder().Phase("").Result(),
expectedRequeue: ctrl.Result{},
name: "Unsupported data mover type",
du: dataUploadBuilder().DataMover("unknown type").Result(),
expected: dataUploadBuilder().Phase("").Result(),
expectedRequeue: ctrl.Result{},
}, {
name: "Unknown type of snapshot exposer is not initialized",
du: dataUploadBuilder().SnapshotType("unknown type").Result(),
@ -365,13 +358,12 @@ func TestReconcile(t *testing.T) {
expectedRequeue: ctrl.Result{},
expectedErrMsg: "unknown type type of snapshot exposer is not exist",
}, {
name: "Dataupload should be accepted",
du: dataUploadBuilder().Result(),
pod: builder.ForPod("fake-ns", dataUploadName).Volumes(&corev1.Volume{Name: "test-pvc"}).Result(),
pvc: builder.ForPersistentVolumeClaim("fake-ns", "test-pvc").Result(),
expectedProcessed: false,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
expectedRequeue: ctrl.Result{},
name: "Dataupload should be accepted",
du: dataUploadBuilder().Result(),
pod: builder.ForPod("fake-ns", dataUploadName).Volumes(&corev1.Volume{Name: "test-pvc"}).Result(),
pvc: builder.ForPersistentVolumeClaim("fake-ns", "test-pvc").Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
expectedRequeue: ctrl.Result{},
},
{
name: "Dataupload should fail to get PVC information",
@ -383,34 +375,31 @@ func TestReconcile(t *testing.T) {
expectedErrMsg: "failed to get PVC",
},
{
name: "Dataupload should be prepared",
du: dataUploadBuilder().SnapshotType(fakeSnapshotType).Result(),
expectedProcessed: false,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
expectedRequeue: ctrl.Result{},
}, {
name: "Dataupload prepared should be completed",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCompleted).Result(),
expectedRequeue: ctrl.Result{},
name: "Dataupload should be prepared",
du: dataUploadBuilder().SnapshotType(fakeSnapshotType).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
expectedRequeue: ctrl.Result{},
},
{
name: "Dataupload with not enabled cancel",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(false).Result(),
expectedProcessed: false,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
name: "Dataupload prepared should be completed",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
},
{
name: "Dataupload should be cancel",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(true).Result(),
expectedProcessed: false,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceling).Result(),
expectedRequeue: ctrl.Result{},
name: "Dataupload with not enabled cancel",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(false).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
},
{
name: "Dataupload should be cancel",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(fakeSnapshotType).Cancel(true).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseCanceling).Result(),
expectedRequeue: ctrl.Result{},
},
{
name: "Dataupload should be cancel with match node",
@ -433,19 +422,43 @@ func TestReconcile(t *testing.T) {
du.Status.Node = "different_node"
return du
}(),
expectedProcessed: false,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
notCreateFSBR: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
expectedRequeue: ctrl.Result{},
notCreateFSBR: true,
},
{
name: "runCancelableDataUpload is concurrent limited",
dataMgr: datapath.NewManager(0),
name: "runCancelableDataUpload is concurrent limited",
dataMgr: datapath.NewManager(0),
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
expectedRequeue: ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
},
{
name: "data path init error",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expectedProcessed: false,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
expectedRequeue: ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
fsBRInitErr: errors.New("fake-data-path-init-error"),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).SnapshotType(fakeSnapshotType).Result(),
expectedErrMsg: "error initializing asyncBR: fake-data-path-init-error",
},
{
name: "Unable to update status to in progress for data download",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
needErrs: []bool{false, false, false, true},
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
expectedRequeue: ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5},
},
{
name: "data path start error",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).SnapshotType(fakeSnapshotType).Result(),
fsBRStartErr: errors.New("fake-data-path-start-error"),
expectedProcessed: true,
expected: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseFailed).SnapshotType(fakeSnapshotType).Result(),
expectedErrMsg: "error starting async backup for pod dataupload-1, volume dataupload-1: fake-data-path-start-error",
},
{
name: "prepare timeout",
@ -468,7 +481,6 @@ func TestReconcile(t *testing.T) {
du.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return du
}(),
expectedProcessed: false,
checkFunc: func(du velerov2alpha1api.DataUpload) bool {
return du.Spec.Cancel
},
@ -484,7 +496,6 @@ func TestReconcile(t *testing.T) {
du.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return du
}(),
expectedProcessed: false,
checkFunc: func(du velerov2alpha1api.DataUpload) bool {
return !controllerutil.ContainsFinalizer(&du, DataUploadDownloadFinalizer)
},
@ -538,18 +549,22 @@ func TestReconcile(t *testing.T) {
r.snapshotExposerList = map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer{velerov2alpha1api.SnapshotTypeCSI: exposer.NewCSISnapshotExposer(r.kubeClient, r.csiSnapshotClient, velerotest.NewLogger())}
}
if !test.notCreateFSBR {
datapath.FSBRCreator = func(string, string, kbclient.Client, string, datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
datapath.MicroServiceBRWatcherCreator = func(kbclient.Client, kubernetes.Interface, manager.Manager, string, string, string, string, string, string, datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
return &fakeDataUploadFSBR{
du: test.du,
kubeClient: r.client,
clock: r.Clock,
initErr: test.fsBRInitErr,
startErr: test.fsBRStartErr,
}
}
}
testCreateFsBR := false
if test.du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress && !test.notCreateFSBR {
if fsBR := r.dataPathMgr.GetAsyncBR(test.du.Name); fsBR == nil {
_, err := r.dataPathMgr.CreateFileSystemBR(test.du.Name, pVBRRequestor, ctx, r.client, velerov1api.DefaultNamespace, datapath.Callbacks{OnCancelled: r.OnDataUploadCancelled}, velerotest.NewLogger())
testCreateFsBR = true
_, err := r.dataPathMgr.CreateMicroServiceBRWatcher(ctx, r.client, nil, nil, datapath.TaskTypeBackup, test.du.Name, velerov1api.DefaultNamespace, "", "", "", datapath.Callbacks{OnCancelled: r.OnDataUploadCancelled}, false, velerotest.NewLogger())
require.NoError(t, err)
}
}
@ -561,11 +576,11 @@ func TestReconcile(t *testing.T) {
},
})
assert.Equal(t, actualResult, test.expectedRequeue)
assert.Equal(t, test.expectedRequeue, actualResult)
if test.expectedErrMsg == "" {
require.NoError(t, err)
} else {
assert.Contains(t, err.Error(), test.expectedErrMsg)
require.ErrorContains(t, err, test.expectedErrMsg)
}
du := velerov2alpha1api.DataUpload{}
@ -593,6 +608,10 @@ func TestReconcile(t *testing.T) {
if test.checkFunc != nil {
assert.True(t, test.checkFunc(du))
}
if !testCreateFsBR && du.Status.Phase != velerov2alpha1api.DataUploadPhaseInProgress {
assert.Nil(t, r.dataPathMgr.GetAsyncBR(test.du.Name))
}
})
}
}
@ -914,7 +933,7 @@ func TestTryCancelDataUpload(t *testing.T) {
err = r.client.Create(ctx, test.dd)
require.NoError(t, err)
r.TryCancelDataUpload(ctx, test.dd, "")
r.tryCancelAcceptedDataUpload(ctx, test.dd, "")
if test.expectedErr == "" {
assert.NoError(t, err)
@ -934,12 +953,11 @@ func TestUpdateDataUploadWithRetry(t *testing.T) {
testCases := []struct {
Name string
needErrs []bool
noChange bool
ExpectErr bool
}{
{
Name: "SuccessOnFirstAttempt",
needErrs: []bool{false, false, false, false},
ExpectErr: false,
Name: "SuccessOnFirstAttempt",
},
{
Name: "Error get",
@ -951,6 +969,11 @@ func TestUpdateDataUploadWithRetry(t *testing.T) {
needErrs: []bool{false, false, true, false, false},
ExpectErr: true,
},
{
Name: "no change",
noChange: true,
needErrs: []bool{false, false, true, false, false},
},
{
Name: "Conflict with error timeout",
needErrs: []bool{false, false, false, false, true},
@ -966,8 +989,13 @@ func TestUpdateDataUploadWithRetry(t *testing.T) {
require.NoError(t, err)
err = r.client.Create(ctx, dataUploadBuilder().Result())
require.NoError(t, err)
updateFunc := func(dataDownload *velerov2alpha1api.DataUpload) {
updateFunc := func(dataDownload *velerov2alpha1api.DataUpload) bool {
if tc.noChange {
return false
}
dataDownload.Spec.Cancel = true
return true
}
err = UpdateDataUploadWithRetry(ctx, r.client, namespacedName, velerotest.NewLogger().WithField("name", tc.Name), updateFunc)
if tc.ExpectErr {
@ -979,135 +1007,107 @@ func TestUpdateDataUploadWithRetry(t *testing.T) {
}
}
func TestFindDataUploads(t *testing.T) {
tests := []struct {
name string
pod corev1.Pod
du *velerov2alpha1api.DataUpload
expectedUploads []velerov2alpha1api.DataUpload
expectedError bool
}{
// Test case 1: Pod with matching nodeName and DataUpload label
{
name: "MatchingPod",
pod: corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: "pod-1",
Labels: map[string]string{
velerov1api.DataUploadLabel: dataUploadName,
},
},
Spec: corev1.PodSpec{
NodeName: "node-1",
},
},
du: dataUploadBuilder().Result(),
expectedUploads: []velerov2alpha1api.DataUpload{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: dataUploadName,
},
},
},
expectedError: false,
},
// Test case 2: Pod with non-matching nodeName
{
name: "NonMatchingNodePod",
pod: corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: "pod-2",
Labels: map[string]string{
velerov1api.DataUploadLabel: dataUploadName,
},
},
Spec: corev1.PodSpec{
NodeName: "node-2",
},
},
du: dataUploadBuilder().Result(),
expectedUploads: []velerov2alpha1api.DataUpload{},
expectedError: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r, err := initDataUploaderReconcilerWithError()
require.NoError(t, err)
r.nodeName = "node-1"
err = r.client.Create(ctx, test.du)
require.NoError(t, err)
err = r.client.Create(ctx, &test.pod)
require.NoError(t, err)
uploads, err := r.FindDataUploadsByPod(context.Background(), r.client, "velero")
if test.expectedError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, len(test.expectedUploads), len(uploads))
}
})
}
type duResumeTestHelper struct {
resumeErr error
getExposeErr error
exposeResult *exposer.ExposeResult
asyncBR datapath.AsyncBR
}
func (dt *duResumeTestHelper) resumeCancellableDataPath(_ *DataUploadReconciler, _ context.Context, _ *velerov2alpha1api.DataUpload, _ logrus.FieldLogger) error {
return dt.resumeErr
}
func (dt *duResumeTestHelper) Expose(context.Context, corev1.ObjectReference, interface{}) error {
return nil
}
func (dt *duResumeTestHelper) GetExposed(context.Context, corev1.ObjectReference, time.Duration, interface{}) (*exposer.ExposeResult, error) {
return dt.exposeResult, dt.getExposeErr
}
func (dt *duResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReference) error {
return nil
}
func (dt *duResumeTestHelper) CleanUp(context.Context, corev1.ObjectReference, string, string) {}
func (dt *duResumeTestHelper) newMicroServiceBRWatcher(kbclient.Client, kubernetes.Interface, manager.Manager, string, string, string, string, string, string,
datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
return dt.asyncBR
}
func TestAttemptDataUploadResume(t *testing.T) {
tests := []struct {
name string
dataUploads []velerov2alpha1api.DataUpload
du *velerov2alpha1api.DataUpload
pod *corev1.Pod
needErrs []bool
acceptedDataUploads []string
prepareddDataUploads []string
cancelledDataUploads []string
expectedError bool
name string
dataUploads []velerov2alpha1api.DataUpload
du *velerov2alpha1api.DataUpload
needErrs []bool
acceptedDataUploads []string
prepareddDataUploads []string
cancelledDataUploads []string
inProgressDataUploads []string
resumeErr error
expectedError string
}{
// Test case 1: Process Accepted DataUpload
{
name: "AcceptedDataUpload",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataUploadLabel: dataUploadName,
}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
name: "accepted DataUpload in other node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but canceled",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Cancel(true).Result(),
cancelledDataUploads: []string{dataUploadName},
acceptedDataUploads: []string{dataUploadName},
},
{
name: "accepted DataUpload in the current node but update error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Labels(map[string]string{acceptNodeLabelKey: "node-1"}).Result(),
needErrs: []bool{false, false, true, false, false, false},
acceptedDataUploads: []string{dataUploadName},
expectedError: false,
},
// Test case 2: Cancel an Accepted DataUpload
{
name: "CancelAcceptedDataUpload",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Result(),
},
// Test case 3: Process Accepted Prepared DataUpload
{
name: "PreparedDataUpload",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataUploadLabel: dataUploadName,
}).Result(),
name: "prepared DataUpload",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
prepareddDataUploads: []string{dataUploadName},
},
// Test case 4: Process Accepted InProgress DataUpload
{
name: "InProgressDataUpload",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataUploadLabel: dataUploadName,
}).Result(),
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
prepareddDataUploads: []string{dataUploadName},
name: "InProgress DataUpload, not the current node",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result(),
inProgressDataUploads: []string{dataUploadName},
},
// Test case 5: get resume error
{
name: "ResumeError",
pod: builder.ForPod(velerov1api.DefaultNamespace, dataUploadName).Volumes(&corev1.Volume{Name: "dataupload-1"}).NodeName("node-1").Labels(map[string]string{
velerov1api.DataUploadLabel: dataUploadName,
}).Result(),
name: "InProgress DataUpload, resume error and update error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Node("node-1").Result(),
needErrs: []bool{false, false, true, false, false, false},
resumeErr: errors.New("fake-resume-error"),
inProgressDataUploads: []string{dataUploadName},
},
{
name: "InProgress DataUpload, resume error and update succeed",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Node("node-1").Result(),
resumeErr: errors.New("fake-resume-error"),
cancelledDataUploads: []string{dataUploadName},
inProgressDataUploads: []string{dataUploadName},
},
{
name: "InProgress DataUpload and resume succeed",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).Node("node-1").Result(),
inProgressDataUploads: []string{dataUploadName},
},
{
name: "Error",
needErrs: []bool{false, false, false, false, false, true},
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhasePrepared).Result(),
expectedError: true,
expectedError: "error to list datauploads: List error",
},
}
@ -1117,22 +1117,20 @@ func TestAttemptDataUploadResume(t *testing.T) {
r, err := initDataUploaderReconciler(test.needErrs...)
r.nodeName = "node-1"
require.NoError(t, err)
defer func() {
r.client.Delete(ctx, test.du, &kbclient.DeleteOptions{})
if test.pod != nil {
r.client.Delete(ctx, test.pod, &kbclient.DeleteOptions{})
}
}()
assert.NoError(t, r.client.Create(ctx, test.du))
if test.pod != nil {
assert.NoError(t, r.client.Create(ctx, test.pod))
}
// Run the test
err = r.AttemptDataUploadResume(ctx, r.client, r.logger.WithField("name", test.name), test.du.Namespace)
if test.expectedError {
assert.Error(t, err)
dt := &duResumeTestHelper{
resumeErr: test.resumeErr,
}
funcResumeCancellableDataBackup = dt.resumeCancellableDataPath
// Run the test
err = r.AttemptDataUploadResume(ctx, r.logger.WithField("name", test.name), test.du.Namespace)
if test.expectedError != "" {
assert.EqualError(t, err, test.expectedError)
} else {
assert.NoError(t, err)
@ -1141,7 +1139,7 @@ func TestAttemptDataUploadResume(t *testing.T) {
dataUpload := &velerov2alpha1api.DataUpload{}
err := r.client.Get(context.Background(), types.NamespacedName{Namespace: "velero", Name: duName}, dataUpload)
require.NoError(t, err)
assert.Equal(t, velerov2alpha1api.DataUploadPhaseCanceled, dataUpload.Status.Phase)
assert.True(t, dataUpload.Spec.Cancel)
}
// Verify DataUploads marked as Accepted
for _, duName := range test.acceptedDataUploads {
@ -1157,6 +1155,123 @@ func TestAttemptDataUploadResume(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, velerov2alpha1api.DataUploadPhasePrepared, dataUpload.Status.Phase)
}
// Verify DataUploads marked as InProgress
for _, duName := range test.inProgressDataUploads {
dataUpload := &velerov2alpha1api.DataUpload{}
err := r.client.Get(context.Background(), types.NamespacedName{Namespace: "velero", Name: duName}, dataUpload)
require.NoError(t, err)
assert.Equal(t, velerov2alpha1api.DataUploadPhaseInProgress, dataUpload.Status.Phase)
}
}
})
}
}
func TestResumeCancellableBackup(t *testing.T) {
tests := []struct {
name string
dataUploads []velerov2alpha1api.DataUpload
du *velerov2alpha1api.DataUpload
getExposeErr error
exposeResult *exposer.ExposeResult
createWatcherErr error
initWatcherErr error
startWatcherErr error
mockInit bool
mockStart bool
mockClose bool
expectedError string
}{
{
name: "not find exposer",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType("").Result(),
expectedError: fmt.Sprintf("error to find exposer for du %s", dataUploadName),
},
{
name: "get expose failed",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseInProgress).SnapshotType(velerov2alpha1api.SnapshotTypeCSI).Result(),
getExposeErr: errors.New("fake-expose-error"),
expectedError: fmt.Sprintf("error to get exposed snapshot for du %s: fake-expose-error", dataUploadName),
},
{
name: "no expose",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Node("node-1").Result(),
expectedError: fmt.Sprintf("expose info missed for du %s", dataUploadName),
},
{
name: "watcher init error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Node("node-1").Result(),
exposeResult: &exposer.ExposeResult{
ByPod: exposer.ExposeByPod{
HostingPod: &corev1.Pod{},
},
},
mockInit: true,
mockClose: true,
initWatcherErr: errors.New("fake-init-watcher-error"),
expectedError: fmt.Sprintf("error to init asyncBR watcher for du %s: fake-init-watcher-error", dataUploadName),
},
{
name: "start watcher error",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Node("node-1").Result(),
exposeResult: &exposer.ExposeResult{
ByPod: exposer.ExposeByPod{
HostingPod: &corev1.Pod{},
},
},
mockInit: true,
mockStart: true,
mockClose: true,
startWatcherErr: errors.New("fake-start-watcher-error"),
expectedError: fmt.Sprintf("error to resume asyncBR watcher for du %s: fake-start-watcher-error", dataUploadName),
},
{
name: "succeed",
du: dataUploadBuilder().Phase(velerov2alpha1api.DataUploadPhaseAccepted).Node("node-1").Result(),
exposeResult: &exposer.ExposeResult{
ByPod: exposer.ExposeByPod{
HostingPod: &corev1.Pod{},
},
},
mockInit: true,
mockStart: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ctx := context.TODO()
r, err := initDataUploaderReconciler()
r.nodeName = "node-1"
require.NoError(t, err)
mockAsyncBR := datapathmocks.NewAsyncBR(t)
if test.mockInit {
mockAsyncBR.On("Init", mock.Anything, mock.Anything).Return(test.initWatcherErr)
}
if test.mockStart {
mockAsyncBR.On("StartBackup", mock.Anything, mock.Anything, mock.Anything).Return(test.startWatcherErr)
}
if test.mockClose {
mockAsyncBR.On("Close", mock.Anything).Return()
}
dt := &duResumeTestHelper{
getExposeErr: test.getExposeErr,
exposeResult: test.exposeResult,
asyncBR: mockAsyncBR,
}
r.snapshotExposerList[velerov2alpha1api.SnapshotTypeCSI] = dt
datapath.MicroServiceBRWatcherCreator = dt.newMicroServiceBRWatcher
err = r.resumeCancellableDataPath(ctx, test.du, velerotest.NewLogger())
if test.expectedError != "" {
assert.EqualError(t, err, test.expectedError)
}
})
}

View File

@ -19,6 +19,7 @@ package controller
import (
"context"
"fmt"
"strings"
"time"
"github.com/pkg/errors"
@ -201,7 +202,7 @@ func (r *PodVolumeBackupReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
func (r *PodVolumeBackupReconciler) OnDataPathCompleted(ctx context.Context, namespace string, pvbName string, result datapath.Result) {
defer r.closeDataPath(ctx, pvbName)
defer r.dataPathMgr.RemoveAsyncBR(pvbName)
log := r.logger.WithField("pvb", pvbName)
@ -239,7 +240,7 @@ func (r *PodVolumeBackupReconciler) OnDataPathCompleted(ctx context.Context, nam
}
func (r *PodVolumeBackupReconciler) OnDataPathFailed(ctx context.Context, namespace, pvbName string, err error) {
defer r.closeDataPath(ctx, pvbName)
defer r.dataPathMgr.RemoveAsyncBR(pvbName)
log := r.logger.WithField("pvb", pvbName)
@ -254,7 +255,7 @@ func (r *PodVolumeBackupReconciler) OnDataPathFailed(ctx context.Context, namesp
}
func (r *PodVolumeBackupReconciler) OnDataPathCancelled(ctx context.Context, namespace string, pvbName string) {
defer r.closeDataPath(ctx, pvbName)
defer r.dataPathMgr.RemoveAsyncBR(pvbName)
log := r.logger.WithField("pvb", pvbName)
@ -373,7 +374,11 @@ func UpdatePVBStatusToFailed(ctx context.Context, c client.Client, pvb *velerov1
if dataPathError, ok := errOut.(datapath.DataPathError); ok {
pvb.Status.SnapshotID = dataPathError.GetSnapshotID()
}
pvb.Status.Message = errors.WithMessage(errOut, msg).Error()
if len(strings.TrimSpace(msg)) == 0 {
pvb.Status.Message = errOut.Error()
} else {
pvb.Status.Message = errors.WithMessage(errOut, msg).Error()
}
err := c.Patch(ctx, pvb, client.MergeFrom(original))
if err != nil {
log.WithError(err).Error("error updating PodVolumeBackup status")

View File

@ -265,7 +265,7 @@ func getInitContainerIndex(pod *corev1api.Pod) int {
}
func (c *PodVolumeRestoreReconciler) OnDataPathCompleted(ctx context.Context, namespace string, pvrName string, result datapath.Result) {
defer c.closeDataPath(ctx, pvrName)
defer c.dataPathMgr.RemoveAsyncBR(pvrName)
log := c.logger.WithField("pvr", pvrName)
@ -325,7 +325,7 @@ func (c *PodVolumeRestoreReconciler) OnDataPathCompleted(ctx context.Context, na
}
func (c *PodVolumeRestoreReconciler) OnDataPathFailed(ctx context.Context, namespace string, pvrName string, err error) {
defer c.closeDataPath(ctx, pvrName)
defer c.dataPathMgr.RemoveAsyncBR(pvrName)
log := c.logger.WithField("pvr", pvrName)
@ -340,7 +340,7 @@ func (c *PodVolumeRestoreReconciler) OnDataPathFailed(ctx context.Context, names
}
func (c *PodVolumeRestoreReconciler) OnDataPathCancelled(ctx context.Context, namespace string, pvrName string) {
defer c.closeDataPath(ctx, pvrName)
defer c.dataPathMgr.RemoveAsyncBR(pvrName)
log := c.logger.WithField("pvr", pvrName)

View File

@ -613,12 +613,6 @@ func TestRestoreReconcile(t *testing.T) {
},
}
if test.restore.Spec.ScheduleName != "" && test.backup != nil {
expected.Spec = SpecPatch{
BackupName: test.backup.Name,
}
}
if test.expectedStartTime != nil {
expected.Status.StartTimestamp = test.expectedStartTime
}

View File

@ -45,10 +45,6 @@ import (
"github.com/vmware-tanzu/velero/pkg/util/results"
)
const (
PVPatchMaximumDuration = 10 * time.Minute
)
type restoreFinalizerReconciler struct {
client.Client
namespace string
@ -59,6 +55,7 @@ type restoreFinalizerReconciler struct {
clock clock.WithTickerAndDelayedExecution
crClient client.Client
multiHookTracker *hook.MultiHookTracker
resourceTimeout time.Duration
}
func NewRestoreFinalizerReconciler(
@ -70,6 +67,7 @@ func NewRestoreFinalizerReconciler(
metrics *metrics.ServerMetrics,
crClient client.Client,
multiHookTracker *hook.MultiHookTracker,
resourceTimeout time.Duration,
) *restoreFinalizerReconciler {
return &restoreFinalizerReconciler{
Client: client,
@ -81,6 +79,7 @@ func NewRestoreFinalizerReconciler(
clock: &clock.RealClock{},
crClient: crClient,
multiHookTracker: multiHookTracker,
resourceTimeout: resourceTimeout,
}
}
@ -163,6 +162,7 @@ func (r *restoreFinalizerReconciler) Reconcile(ctx context.Context, req ctrl.Req
volumeInfo: volumeInfo,
restoredPVCList: restoredPVCList,
multiHookTracker: r.multiHookTracker,
resourceTimeout: r.resourceTimeout,
}
warnings, errs := finalizerCtx.execute()
@ -246,6 +246,7 @@ type finalizerContext struct {
volumeInfo []*volume.BackupVolumeInfo
restoredPVCList map[string]struct{}
multiHookTracker *hook.MultiHookTracker
resourceTimeout time.Duration
}
func (ctx *finalizerContext) execute() (results.Result, results.Result) { //nolint:unparam //temporarily ignore the lint report: result 0 is always nil (unparam)
@ -268,6 +269,7 @@ func (ctx *finalizerContext) patchDynamicPVWithVolumeInfo() (errs results.Result
var pvWaitGroup sync.WaitGroup
var resultLock sync.Mutex
maxConcurrency := 3
semaphore := make(chan struct{}, maxConcurrency)
@ -294,7 +296,7 @@ func (ctx *finalizerContext) patchDynamicPVWithVolumeInfo() (errs results.Result
log := ctx.logger.WithField("PVC", volInfo.PVCName).WithField("PVCNamespace", restoredNamespace)
log.Debug("patching dynamic PV is in progress")
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, PVPatchMaximumDuration, true, func(context.Context) (bool, error) {
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, ctx.resourceTimeout, true, func(context.Context) (bool, error) {
// wait for PVC to be bound
pvc := &v1.PersistentVolumeClaim{}
err := ctx.crClient.Get(context.Background(), client.ObjectKey{Name: volInfo.PVCName, Namespace: restoredNamespace}, pvc)
@ -309,7 +311,7 @@ func (ctx *finalizerContext) patchDynamicPVWithVolumeInfo() (errs results.Result
// We are handling a common but specific scenario where a PVC is in a pending state and uses a storage class with
// VolumeBindingMode set to WaitForFirstConsumer. In this case, the PV patch step is skipped to avoid
// failures due to the PVC not being bound, which could cause a timeout and result in a failed restore.
if pvc != nil && pvc.Status.Phase == v1.ClaimPending {
if pvc.Status.Phase == v1.ClaimPending {
// check if storage class used has VolumeBindingMode as WaitForFirstConsumer
scName := *pvc.Spec.StorageClassName
sc := &storagev1api.StorageClass{}

View File

@ -138,6 +138,7 @@ func TestRestoreFinalizerReconcile(t *testing.T) {
metrics.NewServerMetrics(),
fakeClient,
hook.NewMultiHookTracker(),
10*time.Minute,
)
r.clock = testclocks.NewFakeClock(now)
@ -200,6 +201,7 @@ func TestUpdateResult(t *testing.T) {
metrics.NewServerMetrics(),
fakeClient,
hook.NewMultiHookTracker(),
10*time.Minute,
)
restore := builder.ForRestore(velerov1api.DefaultNamespace, "restore-1").Result()
res := map[string]results.Result{"warnings": {}, "errors": {}}

View File

@ -0,0 +1,314 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"context"
"encoding/json"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
cachetool "k8s.io/client-go/tools/cache"
"sigs.k8s.io/controller-runtime/pkg/cache"
"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/kube"
apierrors "k8s.io/apimachinery/pkg/api/errors"
)
const (
dataUploadDownloadRequestor = "snapshot-data-upload-download"
)
// BackupMicroService process data mover backups inside the backup pod
type BackupMicroService struct {
ctx context.Context
client client.Client
kubeClient kubernetes.Interface
repoEnsurer *repository.Ensurer
credentialGetter *credentials.CredentialGetter
logger logrus.FieldLogger
dataPathMgr *datapath.Manager
eventRecorder kube.EventRecorder
namespace string
dataUploadName string
dataUpload *velerov2alpha1api.DataUpload
sourceTargetPath datapath.AccessPoint
resultSignal chan dataPathResult
duInformer cache.Informer
duHandler cachetool.ResourceEventHandlerRegistration
nodeName string
}
type dataPathResult struct {
err error
result string
}
func NewBackupMicroService(ctx context.Context, client client.Client, kubeClient kubernetes.Interface, dataUploadName string, namespace string, nodeName string,
sourceTargetPath datapath.AccessPoint, dataPathMgr *datapath.Manager, repoEnsurer *repository.Ensurer, cred *credentials.CredentialGetter,
duInformer cache.Informer, log logrus.FieldLogger) *BackupMicroService {
return &BackupMicroService{
ctx: ctx,
client: client,
kubeClient: kubeClient,
credentialGetter: cred,
logger: log,
repoEnsurer: repoEnsurer,
dataPathMgr: dataPathMgr,
namespace: namespace,
dataUploadName: dataUploadName,
sourceTargetPath: sourceTargetPath,
nodeName: nodeName,
resultSignal: make(chan dataPathResult),
duInformer: duInformer,
}
}
func (r *BackupMicroService) Init() error {
r.eventRecorder = kube.NewEventRecorder(r.kubeClient, r.client.Scheme(), r.dataUploadName, r.nodeName)
handler, err := r.duInformer.AddEventHandler(
cachetool.ResourceEventHandlerFuncs{
UpdateFunc: func(oldObj interface{}, newObj interface{}) {
oldDu := oldObj.(*velerov2alpha1api.DataUpload)
newDu := newObj.(*velerov2alpha1api.DataUpload)
if newDu.Name != r.dataUploadName {
return
}
if newDu.Status.Phase != velerov2alpha1api.DataUploadPhaseInProgress {
return
}
if newDu.Spec.Cancel && !oldDu.Spec.Cancel {
r.cancelDataUpload(newDu)
}
},
},
)
if err != nil {
return errors.Wrap(err, "error adding du handler")
}
r.duHandler = handler
return err
}
func (r *BackupMicroService) RunCancelableDataPath(ctx context.Context) (string, error) {
log := r.logger.WithFields(logrus.Fields{
"dataupload": r.dataUploadName,
})
du := &velerov2alpha1api.DataUpload{}
err := wait.PollUntilContextCancel(ctx, 500*time.Millisecond, true, func(ctx context.Context) (bool, error) {
err := r.client.Get(ctx, types.NamespacedName{
Namespace: r.namespace,
Name: r.dataUploadName,
}, du)
if apierrors.IsNotFound(err) {
return false, nil
}
if err != nil {
return true, errors.Wrapf(err, "error to get du %s", r.dataUploadName)
}
if du.Status.Phase == velerov2alpha1api.DataUploadPhaseInProgress {
return true, nil
} else {
return false, nil
}
})
if err != nil {
log.WithError(err).Error("Failed to wait du")
return "", errors.Wrap(err, "error waiting for du")
}
r.dataUpload = du
log.Info("Run cancelable dataUpload")
callbacks := datapath.Callbacks{
OnCompleted: r.OnDataUploadCompleted,
OnFailed: r.OnDataUploadFailed,
OnCancelled: r.OnDataUploadCancelled,
OnProgress: r.OnDataUploadProgress,
}
fsBackup, err := r.dataPathMgr.CreateFileSystemBR(du.Name, dataUploadDownloadRequestor, ctx, r.client, du.Namespace, callbacks, log)
if err != nil {
return "", errors.Wrap(err, "error to create data path")
}
log.Debug("Async fs br created")
if err := fsBackup.Init(ctx, &datapath.FSBRInitParam{
BSLName: du.Spec.BackupStorageLocation,
SourceNamespace: du.Spec.SourceNamespace,
UploaderType: GetUploaderType(du.Spec.DataMover),
RepositoryType: velerov1api.BackupRepositoryTypeKopia,
RepoIdentifier: "",
RepositoryEnsurer: r.repoEnsurer,
CredentialGetter: r.credentialGetter,
}); err != nil {
return "", errors.Wrap(err, "error to initialize data path")
}
log.Info("Async fs br init")
tags := map[string]string{
velerov1api.AsyncOperationIDLabel: du.Labels[velerov1api.AsyncOperationIDLabel],
}
if err := fsBackup.StartBackup(r.sourceTargetPath, du.Spec.DataMoverConfig, &datapath.FSBRStartParam{
RealSource: GetRealSource(du.Spec.SourceNamespace, du.Spec.SourcePVC),
ParentSnapshot: "",
ForceFull: false,
Tags: tags,
}); err != nil {
return "", errors.Wrap(err, "error starting data path backup")
}
log.Info("Async fs backup data path started")
r.eventRecorder.Event(du, false, datapath.EventReasonStarted, "Data path for %s started", du.Name)
result := ""
select {
case <-ctx.Done():
err = errors.New("timed out waiting for fs backup to complete")
break
case res := <-r.resultSignal:
err = res.err
result = res.result
break
}
if err != nil {
log.WithError(err).Error("Async fs backup was not completed")
}
return result, err
}
func (r *BackupMicroService) Shutdown() {
r.eventRecorder.Shutdown()
r.closeDataPath(r.ctx, r.dataUploadName)
if r.duHandler != nil {
if err := r.duInformer.RemoveEventHandler(r.duHandler); err != nil {
r.logger.WithError(err).Warn("Failed to remove pod handler")
}
}
}
var funcMarshal = json.Marshal
func (r *BackupMicroService) OnDataUploadCompleted(ctx context.Context, namespace string, duName string, result datapath.Result) {
log := r.logger.WithField("dataupload", duName)
backupBytes, err := funcMarshal(result.Backup)
if err != nil {
log.WithError(err).Errorf("Failed to marshal backup result %v", result.Backup)
r.resultSignal <- dataPathResult{
err: errors.Wrapf(err, "Failed to marshal backup result %v", result.Backup),
}
} else {
r.eventRecorder.Event(r.dataUpload, false, datapath.EventReasonCompleted, string(backupBytes))
r.resultSignal <- dataPathResult{
result: string(backupBytes),
}
}
log.Info("Async fs backup completed")
}
func (r *BackupMicroService) OnDataUploadFailed(ctx context.Context, namespace string, duName string, err error) {
log := r.logger.WithField("dataupload", duName)
log.WithError(err).Error("Async fs backup data path failed")
r.eventRecorder.Event(r.dataUpload, false, datapath.EventReasonFailed, "Data path for data upload %s failed, error %v", r.dataUploadName, err)
r.resultSignal <- dataPathResult{
err: errors.Wrapf(err, "Data path for data upload %s failed", r.dataUploadName),
}
}
func (r *BackupMicroService) OnDataUploadCancelled(ctx context.Context, namespace string, duName string) {
log := r.logger.WithField("dataupload", duName)
log.Warn("Async fs backup data path canceled")
r.eventRecorder.Event(r.dataUpload, false, datapath.EventReasonCancelled, "Data path for data upload %s canceled", duName)
r.resultSignal <- dataPathResult{
err: errors.New(datapath.ErrCancelled),
}
}
func (r *BackupMicroService) OnDataUploadProgress(ctx context.Context, namespace string, duName string, progress *uploader.Progress) {
log := r.logger.WithFields(logrus.Fields{
"dataupload": duName,
})
progressBytes, err := funcMarshal(progress)
if err != nil {
log.WithError(err).Errorf("Failed to marshal progress %v", progress)
return
}
r.eventRecorder.Event(r.dataUpload, false, datapath.EventReasonProgress, string(progressBytes))
}
func (r *BackupMicroService) closeDataPath(ctx context.Context, duName string) {
fsBackup := r.dataPathMgr.GetAsyncBR(duName)
if fsBackup != nil {
fsBackup.Close(ctx)
}
r.dataPathMgr.RemoveAsyncBR(duName)
}
func (r *BackupMicroService) cancelDataUpload(du *velerov2alpha1api.DataUpload) {
r.logger.WithField("DataUpload", du.Name).Info("Data upload is being canceled")
r.eventRecorder.Event(du, false, datapath.EventReasonCancelling, "Canceling for data upload %s", du.Name)
fsBackup := r.dataPathMgr.GetAsyncBR(du.Name)
if fsBackup == nil {
r.OnDataUploadCancelled(r.ctx, du.GetNamespace(), du.GetName())
} else {
fsBackup.Cancel()
}
}

View File

@ -0,0 +1,439 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"k8s.io/apimachinery/pkg/runtime"
"github.com/vmware-tanzu/velero/pkg/builder"
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/uploader"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
datapathmockes "github.com/vmware-tanzu/velero/pkg/datapath/mocks"
)
type backupMsTestHelper struct {
eventReason string
eventMsg string
marshalErr error
marshalBytes []byte
withEvent bool
eventLock sync.Mutex
}
func (bt *backupMsTestHelper) Event(_ runtime.Object, _ bool, reason string, message string, a ...any) {
bt.eventLock.Lock()
defer bt.eventLock.Unlock()
bt.withEvent = true
bt.eventReason = reason
bt.eventMsg = fmt.Sprintf(message, a...)
}
func (bt *backupMsTestHelper) Shutdown() {}
func (bt *backupMsTestHelper) Marshal(v any) ([]byte, error) {
if bt.marshalErr != nil {
return nil, bt.marshalErr
}
return bt.marshalBytes, nil
}
func (bt *backupMsTestHelper) EventReason() string {
bt.eventLock.Lock()
defer bt.eventLock.Unlock()
return bt.eventReason
}
func (bt *backupMsTestHelper) EventMessage() string {
bt.eventLock.Lock()
defer bt.eventLock.Unlock()
return bt.eventMsg
}
func TestOnDataUploadFailed(t *testing.T) {
dataUploadName := "fake-data-upload"
bt := &backupMsTestHelper{}
bs := &BackupMicroService{
dataUploadName: dataUploadName,
dataPathMgr: datapath.NewManager(1),
eventRecorder: bt,
resultSignal: make(chan dataPathResult),
logger: velerotest.NewLogger(),
}
expectedErr := "Data path for data upload fake-data-upload failed: fake-error"
expectedEventReason := datapath.EventReasonFailed
expectedEventMsg := "Data path for data upload fake-data-upload failed, error fake-error"
go bs.OnDataUploadFailed(context.TODO(), velerov1api.DefaultNamespace, dataUploadName, errors.New("fake-error"))
result := <-bs.resultSignal
assert.EqualError(t, result.err, expectedErr)
assert.Equal(t, expectedEventReason, bt.EventReason())
assert.Equal(t, expectedEventMsg, bt.EventMessage())
}
func TestOnDataUploadCancelled(t *testing.T) {
dataUploadName := "fake-data-upload"
bt := &backupMsTestHelper{}
bs := &BackupMicroService{
dataUploadName: dataUploadName,
dataPathMgr: datapath.NewManager(1),
eventRecorder: bt,
resultSignal: make(chan dataPathResult),
logger: velerotest.NewLogger(),
}
expectedErr := datapath.ErrCancelled
expectedEventReason := datapath.EventReasonCancelled
expectedEventMsg := "Data path for data upload fake-data-upload canceled"
go bs.OnDataUploadCancelled(context.TODO(), velerov1api.DefaultNamespace, dataUploadName)
result := <-bs.resultSignal
assert.EqualError(t, result.err, expectedErr)
assert.Equal(t, expectedEventReason, bt.EventReason())
assert.Equal(t, expectedEventMsg, bt.EventMessage())
}
func TestOnDataUploadCompleted(t *testing.T) {
tests := []struct {
name string
expectedErr string
expectedEventReason string
expectedEventMsg string
marshalErr error
marshallStr string
}{
{
name: "marshal fail",
marshalErr: errors.New("fake-marshal-error"),
expectedErr: "Failed to marshal backup result { false { }}: fake-marshal-error",
},
{
name: "succeed",
marshallStr: "fake-complete-string",
expectedEventReason: datapath.EventReasonCompleted,
expectedEventMsg: "fake-complete-string",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
dataUploadName := "fake-data-upload"
bt := &backupMsTestHelper{
marshalErr: test.marshalErr,
marshalBytes: []byte(test.marshallStr),
}
bs := &BackupMicroService{
dataPathMgr: datapath.NewManager(1),
eventRecorder: bt,
resultSignal: make(chan dataPathResult),
logger: velerotest.NewLogger(),
}
funcMarshal = bt.Marshal
go bs.OnDataUploadCompleted(context.TODO(), velerov1api.DefaultNamespace, dataUploadName, datapath.Result{})
result := <-bs.resultSignal
if test.marshalErr != nil {
assert.EqualError(t, result.err, test.expectedErr)
} else {
assert.NoError(t, result.err)
assert.Equal(t, test.expectedEventReason, bt.EventReason())
assert.Equal(t, test.expectedEventMsg, bt.EventMessage())
}
})
}
}
func TestOnDataUploadProgress(t *testing.T) {
tests := []struct {
name string
expectedErr string
expectedEventReason string
expectedEventMsg string
marshalErr error
marshallStr string
}{
{
name: "marshal fail",
marshalErr: errors.New("fake-marshal-error"),
expectedErr: "Failed to marshal backup result",
},
{
name: "succeed",
marshallStr: "fake-progress-string",
expectedEventReason: datapath.EventReasonProgress,
expectedEventMsg: "fake-progress-string",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
dataUploadName := "fake-data-upload"
bt := &backupMsTestHelper{
marshalErr: test.marshalErr,
marshalBytes: []byte(test.marshallStr),
}
bs := &BackupMicroService{
dataPathMgr: datapath.NewManager(1),
eventRecorder: bt,
logger: velerotest.NewLogger(),
}
funcMarshal = bt.Marshal
bs.OnDataUploadProgress(context.TODO(), velerov1api.DefaultNamespace, dataUploadName, &uploader.Progress{})
if test.marshalErr != nil {
assert.False(t, bt.withEvent)
} else {
assert.True(t, bt.withEvent)
assert.Equal(t, test.expectedEventReason, bt.EventReason())
assert.Equal(t, test.expectedEventMsg, bt.EventMessage())
}
})
}
}
func TestCancelDataUpload(t *testing.T) {
tests := []struct {
name string
expectedEventReason string
expectedEventMsg string
expectedErr string
}{
{
name: "no fs backup",
expectedEventReason: datapath.EventReasonCancelled,
expectedEventMsg: "Data path for data upload fake-data-upload canceled",
expectedErr: datapath.ErrCancelled,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
dataUploadName := "fake-data-upload"
du := builder.ForDataUpload(velerov1api.DefaultNamespace, dataUploadName).Result()
bt := &backupMsTestHelper{}
bs := &BackupMicroService{
dataPathMgr: datapath.NewManager(1),
eventRecorder: bt,
resultSignal: make(chan dataPathResult),
logger: velerotest.NewLogger(),
}
go bs.cancelDataUpload(du)
result := <-bs.resultSignal
assert.EqualError(t, result.err, test.expectedErr)
assert.True(t, bt.withEvent)
assert.Equal(t, test.expectedEventReason, bt.EventReason())
assert.Equal(t, test.expectedEventMsg, bt.EventMessage())
})
}
}
func TestRunCancelableDataPath(t *testing.T) {
dataUploadName := "fake-data-upload"
du := builder.ForDataUpload(velerov1api.DefaultNamespace, dataUploadName).Phase(velerov2alpha1api.DataUploadPhaseNew).Result()
duInProgress := builder.ForDataUpload(velerov1api.DefaultNamespace, dataUploadName).Phase(velerov2alpha1api.DataUploadPhaseInProgress).Result()
ctxTimeout, cancel := context.WithTimeout(context.Background(), time.Second)
tests := []struct {
name string
ctx context.Context
result *dataPathResult
dataPathMgr *datapath.Manager
kubeClientObj []runtime.Object
initErr error
startErr error
dataPathStarted bool
expectedEventMsg string
expectedErr string
}{
{
name: "no du",
ctx: ctxTimeout,
expectedErr: "error waiting for du: context deadline exceeded",
},
{
name: "du not in in-progress",
ctx: ctxTimeout,
kubeClientObj: []runtime.Object{du},
expectedErr: "error waiting for du: context deadline exceeded",
},
{
name: "create data path fail",
ctx: context.Background(),
kubeClientObj: []runtime.Object{duInProgress},
dataPathMgr: datapath.NewManager(0),
expectedErr: "error to create data path: Concurrent number exceeds",
},
{
name: "init data path fail",
ctx: context.Background(),
kubeClientObj: []runtime.Object{duInProgress},
initErr: errors.New("fake-init-error"),
expectedErr: "error to initialize data path: fake-init-error",
},
{
name: "start data path fail",
ctx: context.Background(),
kubeClientObj: []runtime.Object{duInProgress},
startErr: errors.New("fake-start-error"),
expectedErr: "error starting data path backup: fake-start-error",
},
{
name: "data path timeout",
ctx: ctxTimeout,
kubeClientObj: []runtime.Object{duInProgress},
dataPathStarted: true,
expectedEventMsg: fmt.Sprintf("Data path for %s started", dataUploadName),
expectedErr: "timed out waiting for fs backup to complete",
},
{
name: "data path returns error",
ctx: context.Background(),
kubeClientObj: []runtime.Object{duInProgress},
dataPathStarted: true,
result: &dataPathResult{
err: errors.New("fake-data-path-error"),
},
expectedEventMsg: fmt.Sprintf("Data path for %s started", dataUploadName),
expectedErr: "fake-data-path-error",
},
{
name: "succeed",
ctx: context.Background(),
kubeClientObj: []runtime.Object{duInProgress},
dataPathStarted: true,
result: &dataPathResult{
result: "fake-succeed-result",
},
expectedEventMsg: fmt.Sprintf("Data path for %s started", dataUploadName),
},
}
scheme := runtime.NewScheme()
velerov2alpha1api.AddToScheme(scheme)
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fakeClientBuilder := clientFake.NewClientBuilder()
fakeClientBuilder = fakeClientBuilder.WithScheme(scheme)
fakeClient := fakeClientBuilder.WithRuntimeObjects(test.kubeClientObj...).Build()
bt := &backupMsTestHelper{}
bs := &BackupMicroService{
namespace: velerov1api.DefaultNamespace,
dataUploadName: dataUploadName,
ctx: context.Background(),
client: fakeClient,
dataPathMgr: datapath.NewManager(1),
eventRecorder: bt,
resultSignal: make(chan dataPathResult),
logger: velerotest.NewLogger(),
}
if test.ctx != nil {
bs.ctx = test.ctx
}
if test.dataPathMgr != nil {
bs.dataPathMgr = test.dataPathMgr
}
datapath.FSBRCreator = func(string, string, kbclient.Client, string, datapath.Callbacks, logrus.FieldLogger) datapath.AsyncBR {
fsBR := datapathmockes.NewAsyncBR(t)
if test.initErr != nil {
fsBR.On("Init", mock.Anything, mock.Anything).Return(test.initErr)
}
if test.startErr != nil {
fsBR.On("Init", mock.Anything, mock.Anything).Return(nil)
fsBR.On("StartBackup", mock.Anything, mock.Anything, mock.Anything).Return(test.startErr)
}
if test.dataPathStarted {
fsBR.On("Init", mock.Anything, mock.Anything).Return(nil)
fsBR.On("StartBackup", mock.Anything, mock.Anything, mock.Anything).Return(nil)
}
return fsBR
}
if test.result != nil {
go func() {
time.Sleep(time.Millisecond * 500)
bs.resultSignal <- *test.result
}()
}
result, err := bs.RunCancelableDataPath(test.ctx)
if test.expectedErr != "" {
assert.EqualError(t, err, test.expectedErr)
} else {
assert.NoError(t, err)
assert.Equal(t, test.result.result, result)
}
if test.expectedEventMsg != "" {
assert.True(t, bt.withEvent)
assert.Equal(t, test.expectedEventMsg, bt.EventMessage())
}
})
}
cancel()
}

View File

@ -0,0 +1,289 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package datamover
import (
"context"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/kube"
cachetool "k8s.io/client-go/tools/cache"
)
// RestoreMicroService process data mover restores inside the restore pod
type RestoreMicroService struct {
ctx context.Context
client client.Client
kubeClient kubernetes.Interface
repoEnsurer *repository.Ensurer
credentialGetter *credentials.CredentialGetter
logger logrus.FieldLogger
dataPathMgr *datapath.Manager
eventRecorder kube.EventRecorder
namespace string
dataDownloadName string
dataDownload *velerov2alpha1api.DataDownload
sourceTargetPath datapath.AccessPoint
resultSignal chan dataPathResult
ddInformer cache.Informer
ddHandler cachetool.ResourceEventHandlerRegistration
nodeName string
}
func NewRestoreMicroService(ctx context.Context, client client.Client, kubeClient kubernetes.Interface, dataDownloadName string, namespace string, nodeName string,
sourceTargetPath datapath.AccessPoint, dataPathMgr *datapath.Manager, repoEnsurer *repository.Ensurer, cred *credentials.CredentialGetter,
ddInformer cache.Informer, log logrus.FieldLogger) *RestoreMicroService {
return &RestoreMicroService{
ctx: ctx,
client: client,
kubeClient: kubeClient,
credentialGetter: cred,
logger: log,
repoEnsurer: repoEnsurer,
dataPathMgr: dataPathMgr,
namespace: namespace,
dataDownloadName: dataDownloadName,
sourceTargetPath: sourceTargetPath,
nodeName: nodeName,
resultSignal: make(chan dataPathResult),
ddInformer: ddInformer,
}
}
func (r *RestoreMicroService) Init() error {
r.eventRecorder = kube.NewEventRecorder(r.kubeClient, r.client.Scheme(), r.dataDownloadName, r.nodeName)
handler, err := r.ddInformer.AddEventHandler(
cachetool.ResourceEventHandlerFuncs{
UpdateFunc: func(oldObj interface{}, newObj interface{}) {
oldDd := oldObj.(*velerov2alpha1api.DataDownload)
newDd := newObj.(*velerov2alpha1api.DataDownload)
if newDd.Name != r.dataDownloadName {
return
}
if newDd.Status.Phase != velerov2alpha1api.DataDownloadPhaseInProgress {
return
}
if newDd.Spec.Cancel && !oldDd.Spec.Cancel {
r.cancelDataDownload(newDd)
}
},
},
)
if err != nil {
return errors.Wrap(err, "error adding dd handler")
}
r.ddHandler = handler
return err
}
func (r *RestoreMicroService) RunCancelableDataPath(ctx context.Context) (string, error) {
log := r.logger.WithFields(logrus.Fields{
"datadownload": r.dataDownloadName,
})
dd := &velerov2alpha1api.DataDownload{}
err := wait.PollUntilContextCancel(ctx, 500*time.Millisecond, true, func(ctx context.Context) (bool, error) {
err := r.client.Get(ctx, types.NamespacedName{
Namespace: r.namespace,
Name: r.dataDownloadName,
}, dd)
if apierrors.IsNotFound(err) {
return false, nil
}
if err != nil {
return true, errors.Wrapf(err, "error to get dd %s", r.dataDownloadName)
}
if dd.Status.Phase == velerov2alpha1api.DataDownloadPhaseInProgress {
return true, nil
} else {
return false, nil
}
})
if err != nil {
log.WithError(err).Error("Failed to wait dd")
return "", errors.Wrap(err, "error waiting for dd")
}
r.dataDownload = dd
log.Info("Run cancelable dataDownload")
callbacks := datapath.Callbacks{
OnCompleted: r.OnDataDownloadCompleted,
OnFailed: r.OnDataDownloadFailed,
OnCancelled: r.OnDataDownloadCancelled,
OnProgress: r.OnDataDownloadProgress,
}
fsRestore, err := r.dataPathMgr.CreateFileSystemBR(dd.Name, dataUploadDownloadRequestor, ctx, r.client, dd.Namespace, callbacks, log)
if err != nil {
return "", errors.Wrap(err, "error to create data path")
}
log.Debug("Found volume path")
if err := fsRestore.Init(ctx,
&datapath.FSBRInitParam{
BSLName: dd.Spec.BackupStorageLocation,
SourceNamespace: dd.Spec.SourceNamespace,
UploaderType: GetUploaderType(dd.Spec.DataMover),
RepositoryType: velerov1api.BackupRepositoryTypeKopia,
RepoIdentifier: "",
RepositoryEnsurer: r.repoEnsurer,
CredentialGetter: r.credentialGetter,
}); err != nil {
return "", errors.Wrap(err, "error to initialize data path")
}
log.Info("fs init")
if err := fsRestore.StartRestore(dd.Spec.SnapshotID, r.sourceTargetPath, dd.Spec.DataMoverConfig); err != nil {
return "", errors.Wrap(err, "error starting data path restore")
}
log.Info("Async fs restore data path started")
r.eventRecorder.Event(dd, false, datapath.EventReasonStarted, "Data path for %s started", dd.Name)
result := ""
select {
case <-ctx.Done():
err = errors.New("timed out waiting for fs restore to complete")
break
case res := <-r.resultSignal:
err = res.err
result = res.result
break
}
if err != nil {
log.WithError(err).Error("Async fs restore was not completed")
}
return result, err
}
func (r *RestoreMicroService) Shutdown() {
r.eventRecorder.Shutdown()
r.closeDataPath(r.ctx, r.dataDownloadName)
if r.ddHandler != nil {
if err := r.ddInformer.RemoveEventHandler(r.ddHandler); err != nil {
r.logger.WithError(err).Warn("Failed to remove pod handler")
}
}
}
func (r *RestoreMicroService) OnDataDownloadCompleted(ctx context.Context, namespace string, ddName string, result datapath.Result) {
log := r.logger.WithField("datadownload", ddName)
restoreBytes, err := funcMarshal(result.Restore)
if err != nil {
log.WithError(err).Errorf("Failed to marshal restore result %v", result.Restore)
r.resultSignal <- dataPathResult{
err: errors.Wrapf(err, "Failed to marshal restore result %v", result.Restore),
}
} else {
r.eventRecorder.Event(r.dataDownload, false, datapath.EventReasonCompleted, string(restoreBytes))
r.resultSignal <- dataPathResult{
result: string(restoreBytes),
}
}
log.Info("Async fs restore data path completed")
}
func (r *RestoreMicroService) OnDataDownloadFailed(ctx context.Context, namespace string, ddName string, err error) {
log := r.logger.WithField("datadownload", ddName)
log.WithError(err).Error("Async fs restore data path failed")
r.eventRecorder.Event(r.dataDownload, false, datapath.EventReasonFailed, "Data path for data download %s failed, error %v", r.dataDownloadName, err)
r.resultSignal <- dataPathResult{
err: errors.Wrapf(err, "Data path for data download %s failed", r.dataDownloadName),
}
}
func (r *RestoreMicroService) OnDataDownloadCancelled(ctx context.Context, namespace string, ddName string) {
log := r.logger.WithField("datadownload", ddName)
log.Warn("Async fs restore data path canceled")
r.eventRecorder.Event(r.dataDownload, false, datapath.EventReasonCancelled, "Data path for data download %s canceled", ddName)
r.resultSignal <- dataPathResult{
err: errors.New(datapath.ErrCancelled),
}
}
func (r *RestoreMicroService) OnDataDownloadProgress(ctx context.Context, namespace string, ddName string, progress *uploader.Progress) {
log := r.logger.WithFields(logrus.Fields{
"datadownload": ddName,
})
progressBytes, err := funcMarshal(progress)
if err != nil {
log.WithError(err).Errorf("Failed to marshal progress %v", progress)
return
}
r.eventRecorder.Event(r.dataDownload, false, datapath.EventReasonProgress, string(progressBytes))
}
func (r *RestoreMicroService) closeDataPath(ctx context.Context, ddName string) {
fsRestore := r.dataPathMgr.GetAsyncBR(ddName)
if fsRestore != nil {
fsRestore.Close(ctx)
}
r.dataPathMgr.RemoveAsyncBR(ddName)
}
func (r *RestoreMicroService) cancelDataDownload(dd *velerov2alpha1api.DataDownload) {
r.logger.WithField("DataDownload", dd.Name).Info("Data download is being canceled")
r.eventRecorder.Event(dd, false, datapath.EventReasonCancelling, "Canceling for data download %s", dd.Name)
fsBackup := r.dataPathMgr.GetAsyncBR(dd.Name)
if fsBackup == nil {
r.OnDataDownloadCancelled(r.ctx, dd.GetNamespace(), dd.GetName())
} else {
fsBackup.Cancel()
}
}

Some files were not shown because too many files have changed in this diff Show More