core/homeassistant/util/json.py

179 lines
6.1 KiB
Python
Raw Normal View History

"""JSON utility functions."""
2021-03-17 20:46:07 +00:00
from __future__ import annotations
from collections.abc import Callable
import json
import logging
from os import PathLike
from typing import Any
Initial orjson support take 3 (#73849) * Initial orjson support take 2 Still need to work out problem building wheels -- Redux of #72754 / #32153 Now possible since the following is solved: ijl/orjson#220 (comment) This implements orjson where we use our default encoder. This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently. If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently). Areas where this makes a perceptible difference: - Anything that subscribes to entities (Initial subscribe_entities payload) - Initial download of registries on first connection / restore - History queries - Saving states to the database - Large logbook queries - Anything that subscribes to events (appdaemon) Cavets: orjson supports serializing dataclasses natively (and much faster) which eliminates the need to implement `as_dict` in many places when the data is already in a dataclass. This works well as long as all the data in the dataclass can also be serialized. I audited all places where we have an `as_dict` for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups). I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes If it turns out to be a problem we can disable this with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it will fallback to `as_dict` Its quite impressive for history queries <img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png"> * use for views as well * handle UnicodeEncodeError * tweak * DRY * DRY * not needed * fix tests * Update tests/components/http/test_view.py * Update tests/components/http/test_view.py * black * templates
2022-06-22 19:59:51 +00:00
import orjson
from homeassistant.exceptions import HomeAssistantError
from .file import WriteError # pylint: disable=unused-import # noqa: F401
2021-11-11 06:19:56 +00:00
_SENTINEL = object()
_LOGGER = logging.getLogger(__name__)
JsonValueType = (
dict[str, "JsonValueType"] | list["JsonValueType"] | str | int | float | bool | None
)
"""Any data that can be returned by the standard JSON deserializing process."""
JsonArrayType = list[JsonValueType]
"""List that can be returned by the standard JSON deserializing process."""
JsonObjectType = dict[str, JsonValueType]
"""Dictionary that can be returned by the standard JSON deserializing process."""
JSON_ENCODE_EXCEPTIONS = (TypeError, ValueError)
JSON_DECODE_EXCEPTIONS = (orjson.JSONDecodeError,)
class SerializationError(HomeAssistantError):
"""Error serializing the data to JSON."""
json_loads: Callable[[bytes | bytearray | memoryview | str], JsonValueType]
json_loads = orjson.loads
"""Parse JSON data."""
def json_loads_array(__obj: bytes | bytearray | memoryview | str) -> JsonArrayType:
"""Parse JSON data and ensure result is a list."""
value: JsonValueType = json_loads(__obj)
# Avoid isinstance overhead as we are not interested in list subclasses
if type(value) is list: # pylint: disable=unidiomatic-typecheck
return value
raise ValueError(f"Expected JSON to be parsed as a list got {type(value)}")
def json_loads_object(__obj: bytes | bytearray | memoryview | str) -> JsonObjectType:
"""Parse JSON data and ensure result is a dictionary."""
value: JsonValueType = json_loads(__obj)
# Avoid isinstance overhead as we are not interested in dict subclasses
if type(value) is dict: # pylint: disable=unidiomatic-typecheck
return value
raise ValueError(f"Expected JSON to be parsed as a dict got {type(value)}")
def load_json(
filename: str | PathLike, default: JsonValueType = _SENTINEL # type: ignore[assignment]
) -> JsonValueType:
"""Load JSON data from a file.
Defaults to returning empty dict if file is not found.
"""
try:
2019-07-31 19:25:30 +00:00
with open(filename, encoding="utf-8") as fdesc:
Initial orjson support take 3 (#73849) * Initial orjson support take 2 Still need to work out problem building wheels -- Redux of #72754 / #32153 Now possible since the following is solved: ijl/orjson#220 (comment) This implements orjson where we use our default encoder. This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently. If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently). Areas where this makes a perceptible difference: - Anything that subscribes to entities (Initial subscribe_entities payload) - Initial download of registries on first connection / restore - History queries - Saving states to the database - Large logbook queries - Anything that subscribes to events (appdaemon) Cavets: orjson supports serializing dataclasses natively (and much faster) which eliminates the need to implement `as_dict` in many places when the data is already in a dataclass. This works well as long as all the data in the dataclass can also be serialized. I audited all places where we have an `as_dict` for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups). I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes If it turns out to be a problem we can disable this with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it will fallback to `as_dict` Its quite impressive for history queries <img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png"> * use for views as well * handle UnicodeEncodeError * tweak * DRY * DRY * not needed * fix tests * Update tests/components/http/test_view.py * Update tests/components/http/test_view.py * black * templates
2022-06-22 19:59:51 +00:00
return orjson.loads(fdesc.read()) # type: ignore[no-any-return]
except FileNotFoundError:
# This is not a fatal error
2019-07-31 19:25:30 +00:00
_LOGGER.debug("JSON file not found: %s", filename)
except ValueError as error:
2019-07-31 19:25:30 +00:00
_LOGGER.exception("Could not parse JSON content: %s", filename)
raise HomeAssistantError(error) from error
except OSError as error:
2019-07-31 19:25:30 +00:00
_LOGGER.exception("JSON file reading failed: %s", filename)
raise HomeAssistantError(error) from error
return {} if default is _SENTINEL else default
def load_json_array(
filename: str | PathLike, default: JsonArrayType = _SENTINEL # type: ignore[assignment]
) -> JsonArrayType:
"""Load JSON data from a file and return as list.
Defaults to returning empty list if file is not found.
"""
if default is _SENTINEL:
default = []
value: JsonValueType = load_json(filename, default=default)
# Avoid isinstance overhead as we are not interested in list subclasses
if type(value) is list: # pylint: disable=unidiomatic-typecheck
return value
_LOGGER.exception(
"Expected JSON to be parsed as a list got %s in: %s", {type(value)}, filename
)
raise HomeAssistantError(f"Expected JSON to be parsed as a list got {type(value)}")
def load_json_object(
filename: str | PathLike, default: JsonObjectType = _SENTINEL # type: ignore[assignment]
) -> JsonObjectType:
"""Load JSON data from a file and return as dict.
Defaults to returning empty dict if file is not found.
"""
if default is _SENTINEL:
default = {}
value: JsonValueType = load_json(filename, default=default)
# Avoid isinstance overhead as we are not interested in dict subclasses
if type(value) is dict: # pylint: disable=unidiomatic-typecheck
return value
_LOGGER.exception(
"Expected JSON to be parsed as a dict got %s in: %s", {type(value)}, filename
)
raise HomeAssistantError(f"Expected JSON to be parsed as a dict got {type(value)}")
2019-07-31 19:25:30 +00:00
def save_json(
filename: str,
2021-03-17 20:46:07 +00:00
data: list | dict,
2019-07-31 19:25:30 +00:00
private: bool = False,
*,
2021-03-17 20:46:07 +00:00
encoder: type[json.JSONEncoder] | None = None,
atomic_writes: bool = False,
2019-07-31 19:25:30 +00:00
) -> None:
"""Save JSON data to a file."""
# pylint: disable-next=import-outside-toplevel
from homeassistant.helpers.frame import report
report(
(
"uses save_json from homeassistant.util.json module."
" This is deprecated and will stop working in Home Assistant 2022.4, it"
" should be updated to use homeassistant.helpers.json module instead"
),
error_if_core=False,
)
# pylint: disable-next=import-outside-toplevel
import homeassistant.helpers.json as json_helper
json_helper.save_json(
filename, data, private, encoder=encoder, atomic_writes=atomic_writes
)
2021-03-17 20:46:07 +00:00
def format_unserializable_data(data: dict[str, Any]) -> str:
"""Format output of find_paths in a friendly way.
Format is comma separated: <path>=<value>(<type>)
"""
return ", ".join(f"{path}={value}({type(value)}" for path, value in data.items())
def find_paths_unserializable_data(
bad_data: Any, *, dump: Callable[[Any], str] = json.dumps
2021-03-17 20:46:07 +00:00
) -> dict[str, Any]:
"""Find the paths to unserializable data.
This method is slow! Only use for error handling.
"""
# pylint: disable-next=import-outside-toplevel
from homeassistant.helpers.frame import report
report(
(
"uses find_paths_unserializable_data from homeassistant.util.json module."
" This is deprecated and will stop working in Home Assistant 2022.4, it"
" should be updated to use homeassistant.helpers.json module instead"
),
error_if_core=False,
)
# pylint: disable-next=import-outside-toplevel
import homeassistant.helpers.json as json_helper
return json_helper.find_paths_unserializable_data(bad_data, dump=dump)