From bec569caf94a3c2a0395bdade742bc9cd6dd89f7 Mon Sep 17 00:00:00 2001 From: Josef Zweck Date: Fri, 7 Feb 2025 16:06:33 +0100 Subject: [PATCH] Use separate metadata files for onedrive (#137549) --- homeassistant/components/onedrive/__init__.py | 43 ++++++++- homeassistant/components/onedrive/backup.py | 95 ++++++++++++------- .../components/onedrive/strings.json | 3 + tests/components/onedrive/conftest.py | 10 +- tests/components/onedrive/const.py | 25 ++++- tests/components/onedrive/test_backup.py | 2 +- tests/components/onedrive/test_init.py | 37 ++++++++ 7 files changed, 178 insertions(+), 37 deletions(-) diff --git a/homeassistant/components/onedrive/__init__.py b/homeassistant/components/onedrive/__init__.py index 5feefb2cf7d..9716f692ec8 100644 --- a/homeassistant/components/onedrive/__init__.py +++ b/homeassistant/components/onedrive/__init__.py @@ -4,6 +4,8 @@ from __future__ import annotations from collections.abc import Awaitable, Callable from dataclasses import dataclass +from html import unescape +from json import dumps, loads import logging from typing import cast @@ -13,6 +15,7 @@ from onedrive_personal_sdk.exceptions import ( HttpRequestException, OneDriveException, ) +from onedrive_personal_sdk.models.items import ItemUpdate from homeassistant.config_entries import ConfigEntry from homeassistant.const import CONF_ACCESS_TOKEN @@ -45,7 +48,6 @@ _LOGGER = logging.getLogger(__name__) async def async_setup_entry(hass: HomeAssistant, entry: OneDriveConfigEntry) -> bool: """Set up OneDrive from a config entry.""" implementation = await async_get_config_entry_implementation(hass, entry) - session = OAuth2Session(hass, entry, implementation) async def get_access_token() -> str: @@ -89,6 +91,14 @@ async def async_setup_entry(hass: HomeAssistant, entry: OneDriveConfigEntry) -> backup_folder_id=backup_folder.id, ) + try: + await _migrate_backup_files(client, backup_folder.id) + except OneDriveException as err: + raise ConfigEntryNotReady( + translation_domain=DOMAIN, + translation_key="failed_to_migrate_files", + ) from err + _async_notify_backup_listeners_soon(hass) return True @@ -108,3 +118,34 @@ def _async_notify_backup_listeners(hass: HomeAssistant) -> None: @callback def _async_notify_backup_listeners_soon(hass: HomeAssistant) -> None: hass.loop.call_soon(_async_notify_backup_listeners, hass) + + +async def _migrate_backup_files(client: OneDriveClient, backup_folder_id: str) -> None: + """Migrate backup files to metadata version 2.""" + files = await client.list_drive_items(backup_folder_id) + for file in files: + if file.description and '"metadata_version": 1' in ( + metadata_json := unescape(file.description) + ): + metadata = loads(metadata_json) + del metadata["metadata_version"] + metadata_filename = file.name.rsplit(".", 1)[0] + ".metadata.json" + metadata_file = await client.upload_file( + backup_folder_id, + metadata_filename, + dumps(metadata), # type: ignore[arg-type] + ) + metadata_description = { + "metadata_version": 2, + "backup_id": metadata["backup_id"], + "backup_file_id": file.id, + } + await client.update_drive_item( + path_or_id=metadata_file.id, + data=ItemUpdate(description=dumps(metadata_description)), + ) + await client.update_drive_item( + path_or_id=file.id, + data=ItemUpdate(description=""), + ) + _LOGGER.debug("Migrated backup file %s", file.name) diff --git a/homeassistant/components/onedrive/backup.py b/homeassistant/components/onedrive/backup.py index 78bdcb24b8c..182e29aa63f 100644 --- a/homeassistant/components/onedrive/backup.py +++ b/homeassistant/components/onedrive/backup.py @@ -4,8 +4,8 @@ from __future__ import annotations from collections.abc import AsyncIterator, Callable, Coroutine from functools import wraps -import html -import json +from html import unescape +from json import dumps, loads import logging from typing import Any, Concatenate @@ -34,6 +34,7 @@ from .const import DATA_BACKUP_AGENT_LISTENERS, DOMAIN _LOGGER = logging.getLogger(__name__) UPLOAD_CHUNK_SIZE = 16 * 320 * 1024 # 5.2MB TIMEOUT = ClientTimeout(connect=10, total=43200) # 12 hours +METADATA_VERSION = 2 async def async_get_backup_agents( @@ -120,11 +121,19 @@ class OneDriveBackupAgent(BackupAgent): self, backup_id: str, **kwargs: Any ) -> AsyncIterator[bytes]: """Download a backup file.""" - item = await self._find_item_by_backup_id(backup_id) - if item is None: + metadata_item = await self._find_item_by_backup_id(backup_id) + if ( + metadata_item is None + or metadata_item.description is None + or "backup_file_id" not in metadata_item.description + ): raise BackupAgentError("Backup not found") - stream = await self._client.download_drive_item(item.id, timeout=TIMEOUT) + metadata_info = loads(unescape(metadata_item.description)) + + stream = await self._client.download_drive_item( + metadata_info["backup_file_id"], timeout=TIMEOUT + ) return stream.iter_chunked(1024) @handle_backup_errors @@ -136,15 +145,15 @@ class OneDriveBackupAgent(BackupAgent): **kwargs: Any, ) -> None: """Upload a backup.""" - + filename = suggested_filename(backup) file = FileInfo( - suggested_filename(backup), + filename, backup.size, self._folder_id, await open_stream(), ) try: - item = await LargeFileUploadClient.upload( + backup_file = await LargeFileUploadClient.upload( self._token_function, file, session=async_get_clientsession(self._hass) ) except HashMismatchError as err: @@ -152,15 +161,25 @@ class OneDriveBackupAgent(BackupAgent): "Hash validation failed, backup file might be corrupt" ) from err - # store metadata in description - backup_dict = backup.as_dict() - backup_dict["metadata_version"] = 1 # version of the backup metadata - description = json.dumps(backup_dict) + # store metadata in metadata file + description = dumps(backup.as_dict()) _LOGGER.debug("Creating metadata: %s", description) + metadata_filename = filename.rsplit(".", 1)[0] + ".metadata.json" + metadata_file = await self._client.upload_file( + self._folder_id, + metadata_filename, + description, # type: ignore[arg-type] + ) + # add metadata to the metadata file + metadata_description = { + "metadata_version": METADATA_VERSION, + "backup_id": backup.backup_id, + "backup_file_id": backup_file.id, + } await self._client.update_drive_item( - path_or_id=item.id, - data=ItemUpdate(description=description), + path_or_id=metadata_file.id, + data=ItemUpdate(description=dumps(metadata_description)), ) @handle_backup_errors @@ -170,18 +189,28 @@ class OneDriveBackupAgent(BackupAgent): **kwargs: Any, ) -> None: """Delete a backup file.""" - item = await self._find_item_by_backup_id(backup_id) - if item is None: + metadata_item = await self._find_item_by_backup_id(backup_id) + if ( + metadata_item is None + or metadata_item.description is None + or "backup_file_id" not in metadata_item.description + ): return - await self._client.delete_drive_item(item.id) + metadata_info = loads(unescape(metadata_item.description)) + + await self._client.delete_drive_item(metadata_info["backup_file_id"]) + await self._client.delete_drive_item(metadata_item.id) @handle_backup_errors async def async_list_backups(self, **kwargs: Any) -> list[AgentBackup]: """List backups.""" + items = await self._client.list_drive_items(self._folder_id) return [ - self._backup_from_description(item.description) - for item in await self._client.list_drive_items(self._folder_id) - if item.description and "homeassistant_version" in item.description + await self._download_backup_metadata(item.id) + for item in items + if item.description + and "backup_id" in item.description + and f'"metadata_version": {METADATA_VERSION}' in unescape(item.description) ] @handle_backup_errors @@ -189,19 +218,11 @@ class OneDriveBackupAgent(BackupAgent): self, backup_id: str, **kwargs: Any ) -> AgentBackup | None: """Return a backup.""" - item = await self._find_item_by_backup_id(backup_id) - return ( - self._backup_from_description(item.description) - if item and item.description - else None - ) + metadata_file = await self._find_item_by_backup_id(backup_id) + if metadata_file is None or metadata_file.description is None: + return None - def _backup_from_description(self, description: str) -> AgentBackup: - """Create a backup object from a description.""" - description = html.unescape( - description - ) # OneDrive encodes the description on save automatically - return AgentBackup.from_dict(json.loads(description)) + return await self._download_backup_metadata(metadata_file.id) async def _find_item_by_backup_id(self, backup_id: str) -> File | Folder | None: """Find an item by backup ID.""" @@ -209,7 +230,15 @@ class OneDriveBackupAgent(BackupAgent): ( item for item in await self._client.list_drive_items(self._folder_id) - if item.description and backup_id in item.description + if item.description + and backup_id in item.description + and f'"metadata_version": {METADATA_VERSION}' + in unescape(item.description) ), None, ) + + async def _download_backup_metadata(self, item_id: str) -> AgentBackup: + metadata_stream = await self._client.download_drive_item(item_id) + metadata_json = loads(await metadata_stream.read()) + return AgentBackup.from_dict(metadata_json) diff --git a/homeassistant/components/onedrive/strings.json b/homeassistant/components/onedrive/strings.json index 7686e83e2a5..ebc46d3eb12 100644 --- a/homeassistant/components/onedrive/strings.json +++ b/homeassistant/components/onedrive/strings.json @@ -35,6 +35,9 @@ }, "failed_to_get_folder": { "message": "Failed to get {folder} folder" + }, + "failed_to_migrate_files": { + "message": "Failed to migrate metadata to separate files" } } } diff --git a/tests/components/onedrive/conftest.py b/tests/components/onedrive/conftest.py index 0d6ee09d587..8a0da9f584e 100644 --- a/tests/components/onedrive/conftest.py +++ b/tests/components/onedrive/conftest.py @@ -1,6 +1,7 @@ """Fixtures for OneDrive tests.""" from collections.abc import AsyncIterator, Generator +from json import dumps import time from unittest.mock import AsyncMock, MagicMock, patch @@ -15,11 +16,13 @@ from homeassistant.core import HomeAssistant from homeassistant.setup import async_setup_component from .const import ( + BACKUP_METADATA, CLIENT_ID, CLIENT_SECRET, MOCK_APPROOT, MOCK_BACKUP_FILE, MOCK_BACKUP_FOLDER, + MOCK_METADATA_FILE, ) from tests.common import MockConfigEntry @@ -89,13 +92,17 @@ def mock_onedrive_client(mock_onedrive_client_init: MagicMock) -> Generator[Magi client = mock_onedrive_client_init.return_value client.get_approot.return_value = MOCK_APPROOT client.create_folder.return_value = MOCK_BACKUP_FOLDER - client.list_drive_items.return_value = [MOCK_BACKUP_FILE] + client.list_drive_items.return_value = [MOCK_BACKUP_FILE, MOCK_METADATA_FILE] client.get_drive_item.return_value = MOCK_BACKUP_FILE + client.upload_file.return_value = MOCK_METADATA_FILE class MockStreamReader: async def iter_chunked(self, chunk_size: int) -> AsyncIterator[bytes]: yield b"backup data" + async def read(self) -> bytes: + return dumps(BACKUP_METADATA).encode() + client.download_drive_item.return_value = MockStreamReader() return client @@ -107,6 +114,7 @@ def mock_large_file_upload_client() -> Generator[AsyncMock]: with patch( "homeassistant.components.onedrive.backup.LargeFileUploadClient.upload" ) as mock_upload: + mock_upload.return_value = MOCK_BACKUP_FILE yield mock_upload diff --git a/tests/components/onedrive/const.py b/tests/components/onedrive/const.py index ee3a5ce3dc4..3739369887d 100644 --- a/tests/components/onedrive/const.py +++ b/tests/components/onedrive/const.py @@ -72,6 +72,29 @@ MOCK_BACKUP_FILE = File( quick_xor_hash="hash", ), mime_type="application/x-tar", - description=escape(dumps(BACKUP_METADATA)), + description="", + created_by=CONTRIBUTOR, +) + +MOCK_METADATA_FILE = File( + id="id", + name="23e64aec.tar", + size=34519040, + parent_reference=ItemParentReference( + drive_id="mock_drive_id", id="id", path="path" + ), + hashes=Hashes( + quick_xor_hash="hash", + ), + mime_type="application/x-tar", + description=escape( + dumps( + { + "metadata_version": 2, + "backup_id": "23e64aec", + "backup_file_id": "id", + } + ) + ), created_by=CONTRIBUTOR, ) diff --git a/tests/components/onedrive/test_backup.py b/tests/components/onedrive/test_backup.py index 0277c3da02e..dd4f4d253d0 100644 --- a/tests/components/onedrive/test_backup.py +++ b/tests/components/onedrive/test_backup.py @@ -152,7 +152,7 @@ async def test_agents_delete( assert response["success"] assert response["result"] == {"agent_errors": {}} - mock_onedrive_client.delete_drive_item.assert_called_once() + assert mock_onedrive_client.delete_drive_item.call_count == 2 async def test_agents_upload( diff --git a/tests/components/onedrive/test_init.py b/tests/components/onedrive/test_init.py index a6ad55442aa..7ceab98ff21 100644 --- a/tests/components/onedrive/test_init.py +++ b/tests/components/onedrive/test_init.py @@ -1,5 +1,7 @@ """Test the OneDrive setup.""" +from html import escape +from json import dumps from unittest.mock import MagicMock from onedrive_personal_sdk.exceptions import AuthenticationError, OneDriveException @@ -9,6 +11,7 @@ from homeassistant.config_entries import ConfigEntryState from homeassistant.core import HomeAssistant from . import setup_integration +from .const import BACKUP_METADATA, MOCK_BACKUP_FILE from tests.common import MockConfigEntry @@ -17,6 +20,7 @@ async def test_load_unload_config_entry( hass: HomeAssistant, mock_config_entry: MockConfigEntry, mock_onedrive_client_init: MagicMock, + mock_onedrive_client: MagicMock, ) -> None: """Test loading and unloading the integration.""" await setup_integration(hass, mock_config_entry) @@ -25,6 +29,10 @@ async def test_load_unload_config_entry( token_callback = mock_onedrive_client_init.call_args[0][0] assert await token_callback() == "mock-access-token" + # make sure metadata migration is not called + assert mock_onedrive_client.upload_file.call_count == 0 + assert mock_onedrive_client.update_drive_item.call_count == 0 + assert mock_config_entry.state is ConfigEntryState.LOADED await hass.config_entries.async_unload(mock_config_entry.entry_id) @@ -64,3 +72,32 @@ async def test_get_integration_folder_error( await setup_integration(hass, mock_config_entry) assert mock_config_entry.state is ConfigEntryState.SETUP_RETRY assert "Failed to get backups_9f86d081 folder" in caplog.text + + +async def test_migrate_metadata_files( + hass: HomeAssistant, + mock_config_entry: MockConfigEntry, + mock_onedrive_client: MagicMock, +) -> None: + """Test migration of metadata files.""" + MOCK_BACKUP_FILE.description = escape( + dumps({**BACKUP_METADATA, "metadata_version": 1}) + ) + await setup_integration(hass, mock_config_entry) + await hass.async_block_till_done() + + mock_onedrive_client.upload_file.assert_called_once() + assert mock_onedrive_client.update_drive_item.call_count == 2 + assert mock_onedrive_client.update_drive_item.call_args[1]["data"].description == "" + + +async def test_migrate_metadata_files_errors( + hass: HomeAssistant, + mock_config_entry: MockConfigEntry, + mock_onedrive_client: MagicMock, +) -> None: + """Test migration of metadata files errors.""" + mock_onedrive_client.list_drive_items.side_effect = OneDriveException() + await setup_integration(hass, mock_config_entry) + + assert mock_config_entry.state is ConfigEntryState.SETUP_RETRY