core/homeassistant/util/json.py

152 lines
4.8 KiB
Python
Raw Normal View History

"""JSON utility functions."""
2021-03-17 20:46:07 +00:00
from __future__ import annotations
from collections import deque
from collections.abc import Callable
import json
import logging
from typing import Any
Initial orjson support take 3 (#73849) * Initial orjson support take 2 Still need to work out problem building wheels -- Redux of #72754 / #32153 Now possible since the following is solved: ijl/orjson#220 (comment) This implements orjson where we use our default encoder. This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently. If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently). Areas where this makes a perceptible difference: - Anything that subscribes to entities (Initial subscribe_entities payload) - Initial download of registries on first connection / restore - History queries - Saving states to the database - Large logbook queries - Anything that subscribes to events (appdaemon) Cavets: orjson supports serializing dataclasses natively (and much faster) which eliminates the need to implement `as_dict` in many places when the data is already in a dataclass. This works well as long as all the data in the dataclass can also be serialized. I audited all places where we have an `as_dict` for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups). I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes If it turns out to be a problem we can disable this with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it will fallback to `as_dict` Its quite impressive for history queries <img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png"> * use for views as well * handle UnicodeEncodeError * tweak * DRY * DRY * not needed * fix tests * Update tests/components/http/test_view.py * Update tests/components/http/test_view.py * black * templates
2022-06-22 19:59:51 +00:00
import orjson
from homeassistant.core import Event, State
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers.json import (
JSONEncoder as DefaultHASSJSONEncoder,
json_encoder_default as default_hass_orjson_encoder,
)
from .file import write_utf8_file, write_utf8_file_atomic
2021-11-11 06:19:56 +00:00
_LOGGER = logging.getLogger(__name__)
class SerializationError(HomeAssistantError):
"""Error serializing the data to JSON."""
class WriteError(HomeAssistantError):
"""Error writing the data."""
2021-03-17 20:46:07 +00:00
def load_json(filename: str, default: list | dict | None = None) -> list | dict:
"""Load JSON data from a file and return as dict or list.
Defaults to returning empty dict if file is not found.
"""
try:
2019-07-31 19:25:30 +00:00
with open(filename, encoding="utf-8") as fdesc:
Initial orjson support take 3 (#73849) * Initial orjson support take 2 Still need to work out problem building wheels -- Redux of #72754 / #32153 Now possible since the following is solved: ijl/orjson#220 (comment) This implements orjson where we use our default encoder. This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently. If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently). Areas where this makes a perceptible difference: - Anything that subscribes to entities (Initial subscribe_entities payload) - Initial download of registries on first connection / restore - History queries - Saving states to the database - Large logbook queries - Anything that subscribes to events (appdaemon) Cavets: orjson supports serializing dataclasses natively (and much faster) which eliminates the need to implement `as_dict` in many places when the data is already in a dataclass. This works well as long as all the data in the dataclass can also be serialized. I audited all places where we have an `as_dict` for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups). I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes If it turns out to be a problem we can disable this with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it will fallback to `as_dict` Its quite impressive for history queries <img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png"> * use for views as well * handle UnicodeEncodeError * tweak * DRY * DRY * not needed * fix tests * Update tests/components/http/test_view.py * Update tests/components/http/test_view.py * black * templates
2022-06-22 19:59:51 +00:00
return orjson.loads(fdesc.read()) # type: ignore[no-any-return]
except FileNotFoundError:
# This is not a fatal error
2019-07-31 19:25:30 +00:00
_LOGGER.debug("JSON file not found: %s", filename)
except ValueError as error:
2019-07-31 19:25:30 +00:00
_LOGGER.exception("Could not parse JSON content: %s", filename)
raise HomeAssistantError(error) from error
except OSError as error:
2019-07-31 19:25:30 +00:00
_LOGGER.exception("JSON file reading failed: %s", filename)
raise HomeAssistantError(error) from error
return {} if default is None else default
def _orjson_default_encoder(data: Any) -> str:
"""JSON encoder that uses orjson with hass defaults."""
return orjson.dumps(
data,
option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS,
default=default_hass_orjson_encoder,
).decode("utf-8")
2019-07-31 19:25:30 +00:00
def save_json(
filename: str,
2021-03-17 20:46:07 +00:00
data: list | dict,
2019-07-31 19:25:30 +00:00
private: bool = False,
*,
2021-03-17 20:46:07 +00:00
encoder: type[json.JSONEncoder] | None = None,
atomic_writes: bool = False,
2019-07-31 19:25:30 +00:00
) -> None:
"""Save JSON data to a file.
Returns True on success.
"""
dump: Callable[[Any], Any]
try:
# For backwards compatibility, if they pass in the
# default json encoder we use _orjson_default_encoder
# which is the orjson equivalent to the default encoder.
if encoder and encoder is not DefaultHASSJSONEncoder:
# If they pass a custom encoder that is not the
# DefaultHASSJSONEncoder, we use the slow path of json.dumps
dump = json.dumps
json_data = json.dumps(data, indent=2, cls=encoder)
Initial orjson support take 3 (#73849) * Initial orjson support take 2 Still need to work out problem building wheels -- Redux of #72754 / #32153 Now possible since the following is solved: ijl/orjson#220 (comment) This implements orjson where we use our default encoder. This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently. If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently). Areas where this makes a perceptible difference: - Anything that subscribes to entities (Initial subscribe_entities payload) - Initial download of registries on first connection / restore - History queries - Saving states to the database - Large logbook queries - Anything that subscribes to events (appdaemon) Cavets: orjson supports serializing dataclasses natively (and much faster) which eliminates the need to implement `as_dict` in many places when the data is already in a dataclass. This works well as long as all the data in the dataclass can also be serialized. I audited all places where we have an `as_dict` for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups). I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes If it turns out to be a problem we can disable this with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it will fallback to `as_dict` Its quite impressive for history queries <img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png"> * use for views as well * handle UnicodeEncodeError * tweak * DRY * DRY * not needed * fix tests * Update tests/components/http/test_view.py * Update tests/components/http/test_view.py * black * templates
2022-06-22 19:59:51 +00:00
else:
dump = _orjson_default_encoder
json_data = _orjson_default_encoder(data)
except TypeError as error:
msg = f"Failed to serialize to JSON: {filename}. Bad data at {format_unserializable_data(find_paths_unserializable_data(data, dump=dump))}"
_LOGGER.error(msg)
raise SerializationError(msg) from error
if atomic_writes:
write_utf8_file_atomic(filename, json_data, private)
else:
write_utf8_file(filename, json_data, private)
2021-03-17 20:46:07 +00:00
def format_unserializable_data(data: dict[str, Any]) -> str:
"""Format output of find_paths in a friendly way.
Format is comma separated: <path>=<value>(<type>)
"""
return ", ".join(f"{path}={value}({type(value)}" for path, value in data.items())
def find_paths_unserializable_data(
bad_data: Any, *, dump: Callable[[Any], str] = json.dumps
2021-03-17 20:46:07 +00:00
) -> dict[str, Any]:
"""Find the paths to unserializable data.
This method is slow! Only use for error handling.
"""
to_process = deque([(bad_data, "$")])
invalid = {}
while to_process:
obj, obj_path = to_process.popleft()
try:
dump(obj)
2020-02-19 00:06:13 +00:00
continue
except (ValueError, TypeError):
2020-02-19 00:06:13 +00:00
pass
# We convert objects with as_dict to their dict values so we can find bad data inside it
if hasattr(obj, "as_dict"):
desc = obj.__class__.__name__
if isinstance(obj, State):
desc += f": {obj.entity_id}"
elif isinstance(obj, Event):
desc += f": {obj.event_type}"
obj_path += f"({desc})"
obj = obj.as_dict()
if isinstance(obj, dict):
for key, value in obj.items():
try:
# Is key valid?
dump({key: None})
except TypeError:
invalid[f"{obj_path}<key: {key}>"] = key
else:
# Process value
to_process.append((value, f"{obj_path}.{key}"))
elif isinstance(obj, list):
for idx, value in enumerate(obj):
to_process.append((value, f"{obj_path}[{idx}]"))
2020-02-19 00:06:13 +00:00
else:
invalid[obj_path] = obj
return invalid