diff --git a/homeassistant/components/homeassistant/strings.json b/homeassistant/components/homeassistant/strings.json index 791b1a21929..5404ee4af64 100644 --- a/homeassistant/components/homeassistant/strings.json +++ b/homeassistant/components/homeassistant/strings.json @@ -27,6 +27,17 @@ "no_platform_setup": { "title": "Unused YAML configuration for the {platform} integration", "description": "It's not possible to configure {platform} {domain} by adding `{platform_key}` to the {domain} configuration. Please check the documentation for more information on how to set up this integration.\n\nTo resolve this:\n1. Remove `{platform_key}` occurences from the `{domain}:` configuration in your YAML configuration file.\n2. Restart Home Assistant.\n\nExample that should be removed:\n{yaml_example}\n" + }, + "storage_corruption": { + "title": "Storage corruption detected for `{storage_key}`", + "fix_flow": { + "step": { + "confirm": { + "title": "[%key:component::homeassistant::issues::storage_corruption::title%]", + "description": "The `{storage_key}` storage could not be parsed and has been renamed to `{corrupt_path}` to allow Home Assistant to continue.\n\nA default `{storage_key}` may have been created automatically.\n\nIf you made manual edits to the storage file, fix any syntax errors in `{corrupt_path}`, restore the file to the original path `{original_path}`, and restart Home Assistant. Otherwise, restore the system from a backup.\n\nClick SUBMIT below to confirm you have repaired the file or restored from a backup.\n\nThe exact error was: {error}" + } + } + } } }, "system_health": { diff --git a/homeassistant/helpers/storage.py b/homeassistant/helpers/storage.py index 128a36e3e14..dd394c84f91 100644 --- a/homeassistant/helpers/storage.py +++ b/homeassistant/helpers/storage.py @@ -6,15 +6,24 @@ from collections.abc import Callable, Mapping, Sequence from contextlib import suppress from copy import deepcopy import inspect -from json import JSONEncoder +from json import JSONDecodeError, JSONEncoder import logging import os from typing import Any, Generic, TypeVar from homeassistant.const import EVENT_HOMEASSISTANT_FINAL_WRITE -from homeassistant.core import CALLBACK_TYPE, CoreState, Event, HomeAssistant, callback +from homeassistant.core import ( + CALLBACK_TYPE, + DOMAIN as HOMEASSISTANT_DOMAIN, + CoreState, + Event, + HomeAssistant, + callback, +) +from homeassistant.exceptions import HomeAssistantError from homeassistant.loader import MAX_LOAD_CONCURRENTLY, bind_hass from homeassistant.util import json as json_util +import homeassistant.util.dt as dt_util from homeassistant.util.file import WriteError from . import json as json_helper @@ -146,9 +155,63 @@ class Store(Generic[_T]): # and we don't want that to mess with what we're trying to store. data = deepcopy(data) else: - data = await self.hass.async_add_executor_job( - json_util.load_json, self.path - ) + try: + data = await self.hass.async_add_executor_job( + json_util.load_json, self.path + ) + except HomeAssistantError as err: + if isinstance(err.__cause__, JSONDecodeError): + # If we have a JSONDecodeError, it means the file is corrupt. + # We can't recover from this, so we'll log an error, rename the file and + # return None so that we can start with a clean slate which will + # allow startup to continue so they can restore from a backup. + isotime = dt_util.utcnow().isoformat() + corrupt_postfix = f".corrupt.{isotime}" + corrupt_path = f"{self.path}{corrupt_postfix}" + await self.hass.async_add_executor_job( + os.rename, self.path, corrupt_path + ) + storage_key = self.key + _LOGGER.error( + "Unrecoverable error decoding storage %s at %s; " + "This may indicate an unclean shutdown, invalid syntax " + "from manual edits, or disk corruption; " + "The corrupt file has been saved as %s; " + "It is recommended to restore from backup: %s", + storage_key, + self.path, + corrupt_path, + err, + ) + from .issue_registry import ( # pylint: disable=import-outside-toplevel + IssueSeverity, + async_create_issue, + ) + + issue_domain = HOMEASSISTANT_DOMAIN + if ( + domain := (storage_key.partition(".")[0]) + ) and domain in self.hass.config.components: + issue_domain = domain + + async_create_issue( + self.hass, + HOMEASSISTANT_DOMAIN, + f"storage_corruption_{storage_key}_{isotime}", + is_fixable=True, + issue_domain=issue_domain, + translation_key="storage_corruption", + is_persistent=True, + severity=IssueSeverity.CRITICAL, + translation_placeholders={ + "storage_key": storage_key, + "original_path": self.path, + "corrupt_path": corrupt_path, + "error": str(err), + }, + ) + return None + raise if data == {}: return None diff --git a/tests/helpers/test_storage.py b/tests/helpers/test_storage.py index 76dfbdbeb46..81953c7d785 100644 --- a/tests/helpers/test_storage.py +++ b/tests/helpers/test_storage.py @@ -2,6 +2,7 @@ import asyncio from datetime import timedelta import json +import os from typing import Any, NamedTuple from unittest.mock import Mock, patch @@ -12,8 +13,9 @@ from homeassistant.const import ( EVENT_HOMEASSISTANT_FINAL_WRITE, EVENT_HOMEASSISTANT_STOP, ) -from homeassistant.core import CoreState, HomeAssistant -from homeassistant.helpers import storage +from homeassistant.core import DOMAIN as HOMEASSISTANT_DOMAIN, CoreState, HomeAssistant +from homeassistant.exceptions import HomeAssistantError +from homeassistant.helpers import issue_registry as ir, storage from homeassistant.util import dt as dt_util from homeassistant.util.color import RGBColor @@ -548,3 +550,156 @@ async def test_saving_load_round_trip(tmpdir: py.path.local) -> None: } await hass.async_stop(force=True) + + +async def test_loading_corrupt_core_file( + tmpdir: py.path.local, caplog: pytest.LogCaptureFixture +) -> None: + """Test we handle unrecoverable corruption in a core file.""" + loop = asyncio.get_running_loop() + hass = await async_test_home_assistant(loop) + + tmp_storage = await hass.async_add_executor_job(tmpdir.mkdir, "temp_storage") + hass.config.config_dir = tmp_storage + + storage_key = "core.anything" + store = storage.Store( + hass, MOCK_VERSION_2, storage_key, minor_version=MOCK_MINOR_VERSION_1 + ) + await store.async_save({"hello": "world"}) + storage_path = os.path.join(tmp_storage, ".storage") + store_file = os.path.join(storage_path, store.key) + + data = await store.async_load() + assert data == {"hello": "world"} + + def _corrupt_store(): + with open(store_file, "w") as f: + f.write("corrupt") + + await hass.async_add_executor_job(_corrupt_store) + + data = await store.async_load() + assert data is None + assert "Unrecoverable error decoding storage" in caplog.text + + issue_registry = ir.async_get(hass) + found_issue = None + issue_entry = None + for (domain, issue), entry in issue_registry.issues.items(): + if domain == HOMEASSISTANT_DOMAIN and issue.startswith( + f"storage_corruption_{storage_key}_" + ): + found_issue = issue + issue_entry = entry + break + + assert found_issue is not None + assert issue_entry is not None + assert issue_entry.is_fixable is True + assert issue_entry.translation_placeholders["storage_key"] == storage_key + assert issue_entry.issue_domain == HOMEASSISTANT_DOMAIN + assert ( + issue_entry.translation_placeholders["error"] + == "unexpected character: line 1 column 1 (char 0)" + ) + + files = await hass.async_add_executor_job( + os.listdir, os.path.join(tmp_storage, ".storage") + ) + assert ".corrupt" in files[0] + + await hass.async_stop(force=True) + + +async def test_loading_corrupt_file_known_domain( + tmpdir: py.path.local, caplog: pytest.LogCaptureFixture +) -> None: + """Test we handle unrecoverable corruption for a known domain.""" + loop = asyncio.get_running_loop() + hass = await async_test_home_assistant(loop) + hass.config.components.add("testdomain") + storage_key = "testdomain.testkey" + + tmp_storage = await hass.async_add_executor_job(tmpdir.mkdir, "temp_storage") + hass.config.config_dir = tmp_storage + + store = storage.Store( + hass, MOCK_VERSION_2, storage_key, minor_version=MOCK_MINOR_VERSION_1 + ) + await store.async_save({"hello": "world"}) + storage_path = os.path.join(tmp_storage, ".storage") + store_file = os.path.join(storage_path, store.key) + + data = await store.async_load() + assert data == {"hello": "world"} + + def _corrupt_store(): + with open(store_file, "w") as f: + f.write('{"valid":"json"}..with..corrupt') + + await hass.async_add_executor_job(_corrupt_store) + + data = await store.async_load() + assert data is None + assert "Unrecoverable error decoding storage" in caplog.text + + issue_registry = ir.async_get(hass) + found_issue = None + issue_entry = None + for (domain, issue), entry in issue_registry.issues.items(): + if domain == HOMEASSISTANT_DOMAIN and issue.startswith( + f"storage_corruption_{storage_key}_" + ): + found_issue = issue + issue_entry = entry + break + + assert found_issue is not None + assert issue_entry is not None + assert issue_entry.is_fixable is True + assert issue_entry.translation_placeholders["storage_key"] == storage_key + assert issue_entry.issue_domain == "testdomain" + assert ( + issue_entry.translation_placeholders["error"] + == "unexpected content after document: line 1 column 17 (char 16)" + ) + + files = await hass.async_add_executor_job( + os.listdir, os.path.join(tmp_storage, ".storage") + ) + assert ".corrupt" in files[0] + + await hass.async_stop(force=True) + + +async def test_os_error_is_fatal(tmpdir: py.path.local) -> None: + """Test OSError during load is fatal.""" + loop = asyncio.get_running_loop() + hass = await async_test_home_assistant(loop) + + tmp_storage = await hass.async_add_executor_job(tmpdir.mkdir, "temp_storage") + hass.config.config_dir = tmp_storage + + store = storage.Store( + hass, MOCK_VERSION_2, MOCK_KEY, minor_version=MOCK_MINOR_VERSION_1 + ) + await store.async_save({"hello": "world"}) + + with pytest.raises(OSError), patch( + "homeassistant.helpers.storage.json_util.load_json", side_effect=OSError + ): + await store.async_load() + + base_os_error = OSError() + base_os_error.errno = 30 + home_assistant_error = HomeAssistantError() + home_assistant_error.__cause__ = base_os_error + + with pytest.raises(HomeAssistantError), patch( + "homeassistant.helpers.storage.json_util.load_json", + side_effect=home_assistant_error, + ): + await store.async_load() + + await hass.async_stop(force=True)