Use separate metadata files for onedrive (#137549)

pull/137688/head
Josef Zweck 2025-02-07 16:06:33 +01:00 committed by Franck Nijhof
parent 3390fb32a8
commit bec569caf9
No known key found for this signature in database
GPG Key ID: D62583BA8AB11CA3
7 changed files with 178 additions and 37 deletions

View File

@ -4,6 +4,8 @@ from __future__ import annotations
from collections.abc import Awaitable, Callable from collections.abc import Awaitable, Callable
from dataclasses import dataclass from dataclasses import dataclass
from html import unescape
from json import dumps, loads
import logging import logging
from typing import cast from typing import cast
@ -13,6 +15,7 @@ from onedrive_personal_sdk.exceptions import (
HttpRequestException, HttpRequestException,
OneDriveException, OneDriveException,
) )
from onedrive_personal_sdk.models.items import ItemUpdate
from homeassistant.config_entries import ConfigEntry from homeassistant.config_entries import ConfigEntry
from homeassistant.const import CONF_ACCESS_TOKEN from homeassistant.const import CONF_ACCESS_TOKEN
@ -45,7 +48,6 @@ _LOGGER = logging.getLogger(__name__)
async def async_setup_entry(hass: HomeAssistant, entry: OneDriveConfigEntry) -> bool: async def async_setup_entry(hass: HomeAssistant, entry: OneDriveConfigEntry) -> bool:
"""Set up OneDrive from a config entry.""" """Set up OneDrive from a config entry."""
implementation = await async_get_config_entry_implementation(hass, entry) implementation = await async_get_config_entry_implementation(hass, entry)
session = OAuth2Session(hass, entry, implementation) session = OAuth2Session(hass, entry, implementation)
async def get_access_token() -> str: async def get_access_token() -> str:
@ -89,6 +91,14 @@ async def async_setup_entry(hass: HomeAssistant, entry: OneDriveConfigEntry) ->
backup_folder_id=backup_folder.id, backup_folder_id=backup_folder.id,
) )
try:
await _migrate_backup_files(client, backup_folder.id)
except OneDriveException as err:
raise ConfigEntryNotReady(
translation_domain=DOMAIN,
translation_key="failed_to_migrate_files",
) from err
_async_notify_backup_listeners_soon(hass) _async_notify_backup_listeners_soon(hass)
return True return True
@ -108,3 +118,34 @@ def _async_notify_backup_listeners(hass: HomeAssistant) -> None:
@callback @callback
def _async_notify_backup_listeners_soon(hass: HomeAssistant) -> None: def _async_notify_backup_listeners_soon(hass: HomeAssistant) -> None:
hass.loop.call_soon(_async_notify_backup_listeners, hass) hass.loop.call_soon(_async_notify_backup_listeners, hass)
async def _migrate_backup_files(client: OneDriveClient, backup_folder_id: str) -> None:
"""Migrate backup files to metadata version 2."""
files = await client.list_drive_items(backup_folder_id)
for file in files:
if file.description and '"metadata_version": 1' in (
metadata_json := unescape(file.description)
):
metadata = loads(metadata_json)
del metadata["metadata_version"]
metadata_filename = file.name.rsplit(".", 1)[0] + ".metadata.json"
metadata_file = await client.upload_file(
backup_folder_id,
metadata_filename,
dumps(metadata), # type: ignore[arg-type]
)
metadata_description = {
"metadata_version": 2,
"backup_id": metadata["backup_id"],
"backup_file_id": file.id,
}
await client.update_drive_item(
path_or_id=metadata_file.id,
data=ItemUpdate(description=dumps(metadata_description)),
)
await client.update_drive_item(
path_or_id=file.id,
data=ItemUpdate(description=""),
)
_LOGGER.debug("Migrated backup file %s", file.name)

View File

@ -4,8 +4,8 @@ from __future__ import annotations
from collections.abc import AsyncIterator, Callable, Coroutine from collections.abc import AsyncIterator, Callable, Coroutine
from functools import wraps from functools import wraps
import html from html import unescape
import json from json import dumps, loads
import logging import logging
from typing import Any, Concatenate from typing import Any, Concatenate
@ -34,6 +34,7 @@ from .const import DATA_BACKUP_AGENT_LISTENERS, DOMAIN
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
UPLOAD_CHUNK_SIZE = 16 * 320 * 1024 # 5.2MB UPLOAD_CHUNK_SIZE = 16 * 320 * 1024 # 5.2MB
TIMEOUT = ClientTimeout(connect=10, total=43200) # 12 hours TIMEOUT = ClientTimeout(connect=10, total=43200) # 12 hours
METADATA_VERSION = 2
async def async_get_backup_agents( async def async_get_backup_agents(
@ -120,11 +121,19 @@ class OneDriveBackupAgent(BackupAgent):
self, backup_id: str, **kwargs: Any self, backup_id: str, **kwargs: Any
) -> AsyncIterator[bytes]: ) -> AsyncIterator[bytes]:
"""Download a backup file.""" """Download a backup file."""
item = await self._find_item_by_backup_id(backup_id) metadata_item = await self._find_item_by_backup_id(backup_id)
if item is None: if (
metadata_item is None
or metadata_item.description is None
or "backup_file_id" not in metadata_item.description
):
raise BackupAgentError("Backup not found") raise BackupAgentError("Backup not found")
stream = await self._client.download_drive_item(item.id, timeout=TIMEOUT) metadata_info = loads(unescape(metadata_item.description))
stream = await self._client.download_drive_item(
metadata_info["backup_file_id"], timeout=TIMEOUT
)
return stream.iter_chunked(1024) return stream.iter_chunked(1024)
@handle_backup_errors @handle_backup_errors
@ -136,15 +145,15 @@ class OneDriveBackupAgent(BackupAgent):
**kwargs: Any, **kwargs: Any,
) -> None: ) -> None:
"""Upload a backup.""" """Upload a backup."""
filename = suggested_filename(backup)
file = FileInfo( file = FileInfo(
suggested_filename(backup), filename,
backup.size, backup.size,
self._folder_id, self._folder_id,
await open_stream(), await open_stream(),
) )
try: try:
item = await LargeFileUploadClient.upload( backup_file = await LargeFileUploadClient.upload(
self._token_function, file, session=async_get_clientsession(self._hass) self._token_function, file, session=async_get_clientsession(self._hass)
) )
except HashMismatchError as err: except HashMismatchError as err:
@ -152,15 +161,25 @@ class OneDriveBackupAgent(BackupAgent):
"Hash validation failed, backup file might be corrupt" "Hash validation failed, backup file might be corrupt"
) from err ) from err
# store metadata in description # store metadata in metadata file
backup_dict = backup.as_dict() description = dumps(backup.as_dict())
backup_dict["metadata_version"] = 1 # version of the backup metadata
description = json.dumps(backup_dict)
_LOGGER.debug("Creating metadata: %s", description) _LOGGER.debug("Creating metadata: %s", description)
metadata_filename = filename.rsplit(".", 1)[0] + ".metadata.json"
metadata_file = await self._client.upload_file(
self._folder_id,
metadata_filename,
description, # type: ignore[arg-type]
)
# add metadata to the metadata file
metadata_description = {
"metadata_version": METADATA_VERSION,
"backup_id": backup.backup_id,
"backup_file_id": backup_file.id,
}
await self._client.update_drive_item( await self._client.update_drive_item(
path_or_id=item.id, path_or_id=metadata_file.id,
data=ItemUpdate(description=description), data=ItemUpdate(description=dumps(metadata_description)),
) )
@handle_backup_errors @handle_backup_errors
@ -170,18 +189,28 @@ class OneDriveBackupAgent(BackupAgent):
**kwargs: Any, **kwargs: Any,
) -> None: ) -> None:
"""Delete a backup file.""" """Delete a backup file."""
item = await self._find_item_by_backup_id(backup_id) metadata_item = await self._find_item_by_backup_id(backup_id)
if item is None: if (
metadata_item is None
or metadata_item.description is None
or "backup_file_id" not in metadata_item.description
):
return return
await self._client.delete_drive_item(item.id) metadata_info = loads(unescape(metadata_item.description))
await self._client.delete_drive_item(metadata_info["backup_file_id"])
await self._client.delete_drive_item(metadata_item.id)
@handle_backup_errors @handle_backup_errors
async def async_list_backups(self, **kwargs: Any) -> list[AgentBackup]: async def async_list_backups(self, **kwargs: Any) -> list[AgentBackup]:
"""List backups.""" """List backups."""
items = await self._client.list_drive_items(self._folder_id)
return [ return [
self._backup_from_description(item.description) await self._download_backup_metadata(item.id)
for item in await self._client.list_drive_items(self._folder_id) for item in items
if item.description and "homeassistant_version" in item.description if item.description
and "backup_id" in item.description
and f'"metadata_version": {METADATA_VERSION}' in unescape(item.description)
] ]
@handle_backup_errors @handle_backup_errors
@ -189,19 +218,11 @@ class OneDriveBackupAgent(BackupAgent):
self, backup_id: str, **kwargs: Any self, backup_id: str, **kwargs: Any
) -> AgentBackup | None: ) -> AgentBackup | None:
"""Return a backup.""" """Return a backup."""
item = await self._find_item_by_backup_id(backup_id) metadata_file = await self._find_item_by_backup_id(backup_id)
return ( if metadata_file is None or metadata_file.description is None:
self._backup_from_description(item.description) return None
if item and item.description
else None
)
def _backup_from_description(self, description: str) -> AgentBackup: return await self._download_backup_metadata(metadata_file.id)
"""Create a backup object from a description."""
description = html.unescape(
description
) # OneDrive encodes the description on save automatically
return AgentBackup.from_dict(json.loads(description))
async def _find_item_by_backup_id(self, backup_id: str) -> File | Folder | None: async def _find_item_by_backup_id(self, backup_id: str) -> File | Folder | None:
"""Find an item by backup ID.""" """Find an item by backup ID."""
@ -209,7 +230,15 @@ class OneDriveBackupAgent(BackupAgent):
( (
item item
for item in await self._client.list_drive_items(self._folder_id) for item in await self._client.list_drive_items(self._folder_id)
if item.description and backup_id in item.description if item.description
and backup_id in item.description
and f'"metadata_version": {METADATA_VERSION}'
in unescape(item.description)
), ),
None, None,
) )
async def _download_backup_metadata(self, item_id: str) -> AgentBackup:
metadata_stream = await self._client.download_drive_item(item_id)
metadata_json = loads(await metadata_stream.read())
return AgentBackup.from_dict(metadata_json)

View File

@ -35,6 +35,9 @@
}, },
"failed_to_get_folder": { "failed_to_get_folder": {
"message": "Failed to get {folder} folder" "message": "Failed to get {folder} folder"
},
"failed_to_migrate_files": {
"message": "Failed to migrate metadata to separate files"
} }
} }
} }

View File

@ -1,6 +1,7 @@
"""Fixtures for OneDrive tests.""" """Fixtures for OneDrive tests."""
from collections.abc import AsyncIterator, Generator from collections.abc import AsyncIterator, Generator
from json import dumps
import time import time
from unittest.mock import AsyncMock, MagicMock, patch from unittest.mock import AsyncMock, MagicMock, patch
@ -15,11 +16,13 @@ from homeassistant.core import HomeAssistant
from homeassistant.setup import async_setup_component from homeassistant.setup import async_setup_component
from .const import ( from .const import (
BACKUP_METADATA,
CLIENT_ID, CLIENT_ID,
CLIENT_SECRET, CLIENT_SECRET,
MOCK_APPROOT, MOCK_APPROOT,
MOCK_BACKUP_FILE, MOCK_BACKUP_FILE,
MOCK_BACKUP_FOLDER, MOCK_BACKUP_FOLDER,
MOCK_METADATA_FILE,
) )
from tests.common import MockConfigEntry from tests.common import MockConfigEntry
@ -89,13 +92,17 @@ def mock_onedrive_client(mock_onedrive_client_init: MagicMock) -> Generator[Magi
client = mock_onedrive_client_init.return_value client = mock_onedrive_client_init.return_value
client.get_approot.return_value = MOCK_APPROOT client.get_approot.return_value = MOCK_APPROOT
client.create_folder.return_value = MOCK_BACKUP_FOLDER client.create_folder.return_value = MOCK_BACKUP_FOLDER
client.list_drive_items.return_value = [MOCK_BACKUP_FILE] client.list_drive_items.return_value = [MOCK_BACKUP_FILE, MOCK_METADATA_FILE]
client.get_drive_item.return_value = MOCK_BACKUP_FILE client.get_drive_item.return_value = MOCK_BACKUP_FILE
client.upload_file.return_value = MOCK_METADATA_FILE
class MockStreamReader: class MockStreamReader:
async def iter_chunked(self, chunk_size: int) -> AsyncIterator[bytes]: async def iter_chunked(self, chunk_size: int) -> AsyncIterator[bytes]:
yield b"backup data" yield b"backup data"
async def read(self) -> bytes:
return dumps(BACKUP_METADATA).encode()
client.download_drive_item.return_value = MockStreamReader() client.download_drive_item.return_value = MockStreamReader()
return client return client
@ -107,6 +114,7 @@ def mock_large_file_upload_client() -> Generator[AsyncMock]:
with patch( with patch(
"homeassistant.components.onedrive.backup.LargeFileUploadClient.upload" "homeassistant.components.onedrive.backup.LargeFileUploadClient.upload"
) as mock_upload: ) as mock_upload:
mock_upload.return_value = MOCK_BACKUP_FILE
yield mock_upload yield mock_upload

View File

@ -72,6 +72,29 @@ MOCK_BACKUP_FILE = File(
quick_xor_hash="hash", quick_xor_hash="hash",
), ),
mime_type="application/x-tar", mime_type="application/x-tar",
description=escape(dumps(BACKUP_METADATA)), description="",
created_by=CONTRIBUTOR,
)
MOCK_METADATA_FILE = File(
id="id",
name="23e64aec.tar",
size=34519040,
parent_reference=ItemParentReference(
drive_id="mock_drive_id", id="id", path="path"
),
hashes=Hashes(
quick_xor_hash="hash",
),
mime_type="application/x-tar",
description=escape(
dumps(
{
"metadata_version": 2,
"backup_id": "23e64aec",
"backup_file_id": "id",
}
)
),
created_by=CONTRIBUTOR, created_by=CONTRIBUTOR,
) )

View File

@ -152,7 +152,7 @@ async def test_agents_delete(
assert response["success"] assert response["success"]
assert response["result"] == {"agent_errors": {}} assert response["result"] == {"agent_errors": {}}
mock_onedrive_client.delete_drive_item.assert_called_once() assert mock_onedrive_client.delete_drive_item.call_count == 2
async def test_agents_upload( async def test_agents_upload(

View File

@ -1,5 +1,7 @@
"""Test the OneDrive setup.""" """Test the OneDrive setup."""
from html import escape
from json import dumps
from unittest.mock import MagicMock from unittest.mock import MagicMock
from onedrive_personal_sdk.exceptions import AuthenticationError, OneDriveException from onedrive_personal_sdk.exceptions import AuthenticationError, OneDriveException
@ -9,6 +11,7 @@ from homeassistant.config_entries import ConfigEntryState
from homeassistant.core import HomeAssistant from homeassistant.core import HomeAssistant
from . import setup_integration from . import setup_integration
from .const import BACKUP_METADATA, MOCK_BACKUP_FILE
from tests.common import MockConfigEntry from tests.common import MockConfigEntry
@ -17,6 +20,7 @@ async def test_load_unload_config_entry(
hass: HomeAssistant, hass: HomeAssistant,
mock_config_entry: MockConfigEntry, mock_config_entry: MockConfigEntry,
mock_onedrive_client_init: MagicMock, mock_onedrive_client_init: MagicMock,
mock_onedrive_client: MagicMock,
) -> None: ) -> None:
"""Test loading and unloading the integration.""" """Test loading and unloading the integration."""
await setup_integration(hass, mock_config_entry) await setup_integration(hass, mock_config_entry)
@ -25,6 +29,10 @@ async def test_load_unload_config_entry(
token_callback = mock_onedrive_client_init.call_args[0][0] token_callback = mock_onedrive_client_init.call_args[0][0]
assert await token_callback() == "mock-access-token" assert await token_callback() == "mock-access-token"
# make sure metadata migration is not called
assert mock_onedrive_client.upload_file.call_count == 0
assert mock_onedrive_client.update_drive_item.call_count == 0
assert mock_config_entry.state is ConfigEntryState.LOADED assert mock_config_entry.state is ConfigEntryState.LOADED
await hass.config_entries.async_unload(mock_config_entry.entry_id) await hass.config_entries.async_unload(mock_config_entry.entry_id)
@ -64,3 +72,32 @@ async def test_get_integration_folder_error(
await setup_integration(hass, mock_config_entry) await setup_integration(hass, mock_config_entry)
assert mock_config_entry.state is ConfigEntryState.SETUP_RETRY assert mock_config_entry.state is ConfigEntryState.SETUP_RETRY
assert "Failed to get backups_9f86d081 folder" in caplog.text assert "Failed to get backups_9f86d081 folder" in caplog.text
async def test_migrate_metadata_files(
hass: HomeAssistant,
mock_config_entry: MockConfigEntry,
mock_onedrive_client: MagicMock,
) -> None:
"""Test migration of metadata files."""
MOCK_BACKUP_FILE.description = escape(
dumps({**BACKUP_METADATA, "metadata_version": 1})
)
await setup_integration(hass, mock_config_entry)
await hass.async_block_till_done()
mock_onedrive_client.upload_file.assert_called_once()
assert mock_onedrive_client.update_drive_item.call_count == 2
assert mock_onedrive_client.update_drive_item.call_args[1]["data"].description == ""
async def test_migrate_metadata_files_errors(
hass: HomeAssistant,
mock_config_entry: MockConfigEntry,
mock_onedrive_client: MagicMock,
) -> None:
"""Test migration of metadata files errors."""
mock_onedrive_client.list_drive_items.side_effect = OneDriveException()
await setup_integration(hass, mock_config_entry)
assert mock_config_entry.state is ConfigEntryState.SETUP_RETRY