core/homeassistant/util/json.py

132 lines
4.8 KiB
Python
Raw Normal View History

"""JSON utility functions."""
2021-03-17 20:46:07 +00:00
from __future__ import annotations
import logging
from os import PathLike
from typing import Any
Initial orjson support take 3 (#73849) * Initial orjson support take 2 Still need to work out problem building wheels -- Redux of #72754 / #32153 Now possible since the following is solved: ijl/orjson#220 (comment) This implements orjson where we use our default encoder. This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently. If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently). Areas where this makes a perceptible difference: - Anything that subscribes to entities (Initial subscribe_entities payload) - Initial download of registries on first connection / restore - History queries - Saving states to the database - Large logbook queries - Anything that subscribes to events (appdaemon) Cavets: orjson supports serializing dataclasses natively (and much faster) which eliminates the need to implement `as_dict` in many places when the data is already in a dataclass. This works well as long as all the data in the dataclass can also be serialized. I audited all places where we have an `as_dict` for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups). I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes If it turns out to be a problem we can disable this with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it will fallback to `as_dict` Its quite impressive for history queries <img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png"> * use for views as well * handle UnicodeEncodeError * tweak * DRY * DRY * not needed * fix tests * Update tests/components/http/test_view.py * Update tests/components/http/test_view.py * black * templates
2022-06-22 19:59:51 +00:00
import orjson
from homeassistant.exceptions import HomeAssistantError
_SENTINEL = object()
_LOGGER = logging.getLogger(__name__)
2024-06-02 13:28:24 +00:00
type JsonValueType = (
dict[str, JsonValueType] | list[JsonValueType] | str | int | float | bool | None
)
"""Any data that can be returned by the standard JSON deserializing process."""
2024-06-02 13:28:24 +00:00
type JsonArrayType = list[JsonValueType]
"""List that can be returned by the standard JSON deserializing process."""
2024-06-02 13:28:24 +00:00
type JsonObjectType = dict[str, JsonValueType]
"""Dictionary that can be returned by the standard JSON deserializing process."""
JSON_ENCODE_EXCEPTIONS = (TypeError, ValueError)
JSON_DECODE_EXCEPTIONS = (orjson.JSONDecodeError,)
class SerializationError(HomeAssistantError):
"""Error serializing the data to JSON."""
2024-11-22 15:53:26 +00:00
def json_loads(obj: bytes | bytearray | memoryview | str, /) -> JsonValueType:
"""Parse JSON data.
This adds a workaround for orjson not handling subclasses of str,
https://github.com/ijl/orjson/issues/445.
"""
# Avoid isinstance overhead for the common case
2024-11-22 15:53:26 +00:00
if type(obj) not in (bytes, bytearray, memoryview, str) and isinstance(obj, str):
return orjson.loads(str(obj)) # type:ignore[no-any-return]
return orjson.loads(obj) # type:ignore[no-any-return]
2024-11-22 15:53:26 +00:00
def json_loads_array(obj: bytes | bytearray | memoryview | str, /) -> JsonArrayType:
"""Parse JSON data and ensure result is a list."""
2024-11-22 15:53:26 +00:00
value: JsonValueType = json_loads(obj)
# Avoid isinstance overhead as we are not interested in list subclasses
2023-08-19 12:17:17 +00:00
if type(value) is list: # noqa: E721
return value
raise ValueError(f"Expected JSON to be parsed as a list got {type(value)}")
2024-11-22 15:53:26 +00:00
def json_loads_object(obj: bytes | bytearray | memoryview | str, /) -> JsonObjectType:
"""Parse JSON data and ensure result is a dictionary."""
2024-11-22 15:53:26 +00:00
value: JsonValueType = json_loads(obj)
# Avoid isinstance overhead as we are not interested in dict subclasses
2023-08-19 12:17:17 +00:00
if type(value) is dict: # noqa: E721
return value
raise ValueError(f"Expected JSON to be parsed as a dict got {type(value)}")
def load_json(
2024-01-12 13:43:17 +00:00
filename: str | PathLike[str],
default: JsonValueType = _SENTINEL, # type: ignore[assignment]
) -> JsonValueType:
"""Load JSON data from a file.
Defaults to returning empty dict if file is not found.
"""
try:
with open(filename, mode="rb") as fdesc:
Initial orjson support take 3 (#73849) * Initial orjson support take 2 Still need to work out problem building wheels -- Redux of #72754 / #32153 Now possible since the following is solved: ijl/orjson#220 (comment) This implements orjson where we use our default encoder. This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently. If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently). Areas where this makes a perceptible difference: - Anything that subscribes to entities (Initial subscribe_entities payload) - Initial download of registries on first connection / restore - History queries - Saving states to the database - Large logbook queries - Anything that subscribes to events (appdaemon) Cavets: orjson supports serializing dataclasses natively (and much faster) which eliminates the need to implement `as_dict` in many places when the data is already in a dataclass. This works well as long as all the data in the dataclass can also be serialized. I audited all places where we have an `as_dict` for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups). I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes If it turns out to be a problem we can disable this with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it will fallback to `as_dict` Its quite impressive for history queries <img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png"> * use for views as well * handle UnicodeEncodeError * tweak * DRY * DRY * not needed * fix tests * Update tests/components/http/test_view.py * Update tests/components/http/test_view.py * black * templates
2022-06-22 19:59:51 +00:00
return orjson.loads(fdesc.read()) # type: ignore[no-any-return]
except FileNotFoundError:
# This is not a fatal error
2019-07-31 19:25:30 +00:00
_LOGGER.debug("JSON file not found: %s", filename)
except JSON_DECODE_EXCEPTIONS as error:
2019-07-31 19:25:30 +00:00
_LOGGER.exception("Could not parse JSON content: %s", filename)
raise HomeAssistantError(f"Error while loading {filename}: {error}") from error
except OSError as error:
2019-07-31 19:25:30 +00:00
_LOGGER.exception("JSON file reading failed: %s", filename)
raise HomeAssistantError(f"Error while loading {filename}: {error}") from error
return {} if default is _SENTINEL else default
def load_json_array(
2024-01-12 13:43:17 +00:00
filename: str | PathLike[str],
default: JsonArrayType = _SENTINEL, # type: ignore[assignment]
) -> JsonArrayType:
"""Load JSON data from a file and return as list.
Defaults to returning empty list if file is not found.
"""
if default is _SENTINEL:
default = []
value: JsonValueType = load_json(filename, default=default)
# Avoid isinstance overhead as we are not interested in list subclasses
2023-08-19 12:17:17 +00:00
if type(value) is list: # noqa: E721
return value
_LOGGER.exception(
"Expected JSON to be parsed as a list got %s in: %s", {type(value)}, filename
)
raise HomeAssistantError(f"Expected JSON to be parsed as a list got {type(value)}")
def load_json_object(
2024-01-12 13:43:17 +00:00
filename: str | PathLike[str],
default: JsonObjectType = _SENTINEL, # type: ignore[assignment]
) -> JsonObjectType:
"""Load JSON data from a file and return as dict.
Defaults to returning empty dict if file is not found.
"""
if default is _SENTINEL:
default = {}
value: JsonValueType = load_json(filename, default=default)
# Avoid isinstance overhead as we are not interested in dict subclasses
2023-08-19 12:17:17 +00:00
if type(value) is dict: # noqa: E721
return value
_LOGGER.exception(
"Expected JSON to be parsed as a dict got %s in: %s", {type(value)}, filename
)
raise HomeAssistantError(f"Expected JSON to be parsed as a dict got {type(value)}")
2021-03-17 20:46:07 +00:00
def format_unserializable_data(data: dict[str, Any]) -> str:
"""Format output of find_paths in a friendly way.
Format is comma separated: <path>=<value>(<type>)
"""
return ", ".join(f"{path}={value}({type(value)}" for path, value in data.items())