Optimize statistics generation (#56821)

* Optimize statistics generation

* pylint
pull/56835/head
Erik Montnemery 2021-09-30 17:14:36 +02:00 committed by GitHub
parent d5bda3ac14
commit 6af1a835e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 101 additions and 64 deletions

View File

@ -54,7 +54,7 @@ CONFIG_SCHEMA = vol.Schema(
@deprecated_function("homeassistant.components.recorder.history.get_significant_states")
def get_significant_states(hass, *args, **kwargs):
"""Wrap _get_significant_states with an sql session."""
"""Wrap get_significant_states_with_session with an sql session."""
return history.get_significant_states(hass, *args, **kwargs)
@ -268,18 +268,16 @@ class HistoryPeriodView(HomeAssistantView):
timer_start = time.perf_counter()
with session_scope(hass=hass) as session:
result = (
history._get_significant_states( # pylint: disable=protected-access
hass,
session,
start_time,
end_time,
entity_ids,
self.filters,
include_start_time_state,
significant_changes_only,
minimal_response,
)
result = history.get_significant_states_with_session(
hass,
session,
start_time,
end_time,
entity_ids,
self.filters,
include_start_time_state,
significant_changes_only,
minimal_response,
)
result = list(result.values())

View File

@ -61,12 +61,12 @@ def async_setup(hass):
def get_significant_states(hass, *args, **kwargs):
"""Wrap _get_significant_states with a sql session."""
"""Wrap get_significant_states_with_session with an sql session."""
with session_scope(hass=hass) as session:
return _get_significant_states(hass, session, *args, **kwargs)
return get_significant_states_with_session(hass, session, *args, **kwargs)
def _get_significant_states(
def get_significant_states_with_session(
hass,
session,
start_time,

View File

@ -24,7 +24,7 @@ from .models import (
StatisticsShortTerm,
process_timestamp,
)
from .statistics import _get_metadata, get_start_time
from .statistics import get_metadata_with_session, get_start_time
from .util import session_scope
_LOGGER = logging.getLogger(__name__)
@ -564,7 +564,7 @@ def _apply_update(instance, session, new_version, old_version): # noqa: C901
fake_start_time += timedelta(minutes=5)
# Copy last hourly statistic to the newly created 5-minute statistics table
sum_statistics = _get_metadata(
sum_statistics = get_metadata_with_session(
instance.hass, session, None, statistic_type="sum"
)
for metadata_id in sum_statistics:

View File

@ -195,7 +195,7 @@ def _update_or_add_metadata(
Updating metadata source is not possible.
"""
statistic_id = new_metadata["statistic_id"]
old_metadata_dict = _get_metadata(hass, session, [statistic_id], None)
old_metadata_dict = get_metadata_with_session(hass, session, [statistic_id], None)
if not old_metadata_dict:
unit = new_metadata["unit_of_measurement"]
has_mean = new_metadata["has_mean"]
@ -210,7 +210,7 @@ def _update_or_add_metadata(
)
return meta.id # type: ignore[no-any-return]
metadata_id, old_metadata = next(iter(old_metadata_dict.items()))
metadata_id, old_metadata = old_metadata_dict[statistic_id]
if (
old_metadata["has_mean"] != new_metadata["has_mean"]
or old_metadata["has_sum"] != new_metadata["has_sum"]
@ -361,13 +361,15 @@ def compile_statistics(instance: Recorder, start: datetime) -> bool:
return True
def _get_metadata(
def get_metadata_with_session(
hass: HomeAssistant,
session: scoped_session,
statistic_ids: list[str] | None,
statistic_ids: Iterable[str] | None,
statistic_type: Literal["mean"] | Literal["sum"] | None,
) -> dict[int, StatisticMetaData]:
"""Fetch meta data, returns a dict of StatisticMetaData indexed by statistic_id.
) -> dict[str, tuple[int, StatisticMetaData]]:
"""Fetch meta data.
Returns a dict of (metadata_id, StatisticMetaData) indexed by statistic_id.
If statistic_ids is given, fetch metadata only for the listed statistics_ids.
If statistic_type is given, fetch metadata only for statistic_ids supporting it.
@ -403,24 +405,21 @@ def _get_metadata(
metadata_ids = [metadata[0] for metadata in result]
# Prepare the result dict
metadata: dict[int, StatisticMetaData] = {}
metadata: dict[str, tuple[int, StatisticMetaData]] = {}
for _id in metadata_ids:
meta = _meta(result, _id)
if meta:
metadata[_id] = meta
metadata[meta["statistic_id"]] = (_id, meta)
return metadata
def get_metadata(
hass: HomeAssistant,
statistic_id: str,
) -> StatisticMetaData | None:
"""Return metadata for a statistic_id."""
statistic_ids: Iterable[str],
) -> dict[str, tuple[int, StatisticMetaData]]:
"""Return metadata for statistic_ids."""
with session_scope(hass=hass) as session:
metadata = _get_metadata(hass, session, [statistic_id], None)
if not metadata:
return None
return next(iter(metadata.values()))
return get_metadata_with_session(hass, session, statistic_ids, None)
def _configured_unit(unit: str, units: UnitSystem) -> str:
@ -469,9 +468,9 @@ def list_statistic_ids(
# Query the database
with session_scope(hass=hass) as session:
metadata = _get_metadata(hass, session, None, statistic_type)
metadata = get_metadata_with_session(hass, session, None, statistic_type)
for meta in metadata.values():
for _, meta in metadata.values():
unit = meta["unit_of_measurement"]
if unit is not None:
# Display unit according to user settings
@ -480,7 +479,7 @@ def list_statistic_ids(
statistic_ids = {
meta["statistic_id"]: meta["unit_of_measurement"]
for meta in metadata.values()
for _, meta in metadata.values()
}
# Query all integrations with a registered recorder platform
@ -548,13 +547,13 @@ def statistics_during_period(
metadata = None
with session_scope(hass=hass) as session:
# Fetch metadata for the given (or all) statistic_ids
metadata = _get_metadata(hass, session, statistic_ids, None)
metadata = get_metadata_with_session(hass, session, statistic_ids, None)
if not metadata:
return {}
metadata_ids = None
if statistic_ids is not None:
metadata_ids = list(metadata.keys())
metadata_ids = [metadata_id for metadata_id, _ in metadata.values()]
if period == "hour":
bakery = STATISTICS_BAKERY
@ -589,7 +588,7 @@ def get_last_statistics(
statistic_ids = [statistic_id]
with session_scope(hass=hass) as session:
# Fetch metadata for the given statistic_id
metadata = _get_metadata(hass, session, statistic_ids, None)
metadata = get_metadata_with_session(hass, session, statistic_ids, None)
if not metadata:
return {}
@ -598,7 +597,7 @@ def get_last_statistics(
)
baked_query += lambda q: q.filter_by(metadata_id=bindparam("metadata_id"))
metadata_id = next(iter(metadata.keys()))
metadata_id = metadata[statistic_id][0]
baked_query += lambda q: q.order_by(
StatisticsShortTerm.metadata_id, StatisticsShortTerm.start.desc()
@ -629,7 +628,7 @@ def _sorted_statistics_to_dict(
hass: HomeAssistant,
stats: list,
statistic_ids: list[str] | None,
metadata: dict[int, StatisticMetaData],
_metadata: dict[str, tuple[int, StatisticMetaData]],
convert_units: bool,
duration: timedelta,
) -> dict[str, list[dict]]:
@ -646,6 +645,8 @@ def _sorted_statistics_to_dict(
for stat_id in statistic_ids:
result[stat_id] = []
metadata = dict(_metadata.values())
# Append all statistic entries, and optionally do unit conversion
for meta_id, group in groupby(stats, lambda stat: stat.metadata_id): # type: ignore
unit = metadata[meta_id]["unit_of_measurement"]

View File

@ -8,7 +8,14 @@ import itertools
import logging
import math
from homeassistant.components.recorder import history, is_entity_recorded, statistics
from sqlalchemy.orm.session import Session
from homeassistant.components.recorder import (
history,
is_entity_recorded,
statistics,
util as recorder_util,
)
from homeassistant.components.recorder.models import (
StatisticData,
StatisticMetaData,
@ -196,6 +203,8 @@ def _parse_float(state: str) -> float:
def _normalize_states(
hass: HomeAssistant,
session: Session,
old_metadatas: dict[str, tuple[int, StatisticMetaData]],
entity_history: Iterable[State],
device_class: str | None,
entity_id: str,
@ -221,10 +230,10 @@ def _normalize_states(
if entity_id not in hass.data[WARN_UNSTABLE_UNIT]:
hass.data[WARN_UNSTABLE_UNIT].add(entity_id)
extra = ""
if old_metadata := statistics.get_metadata(hass, entity_id):
if old_metadata := old_metadatas.get(entity_id):
extra = (
" and matches the unit of already compiled statistics "
f"({old_metadata['unit_of_measurement']})"
f"({old_metadata[1]['unit_of_measurement']})"
)
_LOGGER.warning(
"The unit of %s is changing, got multiple %s, generation of long term "
@ -368,17 +377,32 @@ def _last_reset_as_utc_isoformat(
return dt_util.as_utc(last_reset).isoformat()
def compile_statistics( # noqa: C901
def compile_statistics(
hass: HomeAssistant, start: datetime.datetime, end: datetime.datetime
) -> list[StatisticResult]:
"""Compile statistics for all entities during start-end.
Note: This will query the database and must not be run in the event loop
"""
with recorder_util.session_scope(hass=hass) as session:
result = _compile_statistics(hass, session, start, end)
return result
def _compile_statistics( # noqa: C901
hass: HomeAssistant,
session: Session,
start: datetime.datetime,
end: datetime.datetime,
) -> list[StatisticResult]:
"""Compile statistics for all entities during start-end."""
result: list[StatisticResult] = []
sensor_states = _get_sensor_states(hass)
wanted_statistics = _wanted_statistics(sensor_states)
old_metadatas = statistics.get_metadata_with_session(
hass, session, [i.entity_id for i in sensor_states], None
)
# Get history between start and end
entities_full_history = [
@ -386,8 +410,9 @@ def compile_statistics( # noqa: C901
]
history_list = {}
if entities_full_history:
history_list = history.get_significant_states( # type: ignore
history_list = history.get_significant_states_with_session( # type: ignore
hass,
session,
start - datetime.timedelta.resolution,
end,
entity_ids=entities_full_history,
@ -399,8 +424,9 @@ def compile_statistics( # noqa: C901
if "sum" not in wanted_statistics[i.entity_id]
]
if entities_significant_history:
_history_list = history.get_significant_states( # type: ignore
_history_list = history.get_significant_states_with_session( # type: ignore
hass,
session,
start - datetime.timedelta.resolution,
end,
entity_ids=entities_significant_history,
@ -420,14 +446,16 @@ def compile_statistics( # noqa: C901
state_class = _state.attributes[ATTR_STATE_CLASS]
device_class = _state.attributes.get(ATTR_DEVICE_CLASS)
entity_history = history_list[entity_id]
unit, fstates = _normalize_states(hass, entity_history, device_class, entity_id)
unit, fstates = _normalize_states(
hass, session, old_metadatas, entity_history, device_class, entity_id
)
if not fstates:
continue
# Check metadata
if old_metadata := statistics.get_metadata(hass, entity_id):
if old_metadata["unit_of_measurement"] != unit:
if old_metadata := old_metadatas.get(entity_id):
if old_metadata[1]["unit_of_measurement"] != unit:
if WARN_UNSTABLE_UNIT not in hass.data:
hass.data[WARN_UNSTABLE_UNIT] = set()
if entity_id not in hass.data[WARN_UNSTABLE_UNIT]:
@ -438,8 +466,8 @@ def compile_statistics( # noqa: C901
"will be suppressed unless the unit changes back to %s",
entity_id,
unit,
old_metadata["unit_of_measurement"],
old_metadata["unit_of_measurement"],
old_metadata[1]["unit_of_measurement"],
old_metadata[1]["unit_of_measurement"],
)
continue
@ -617,10 +645,10 @@ def validate_statistics(
state_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
if device_class not in UNIT_CONVERSIONS:
metadata = statistics.get_metadata(hass, entity_id)
metadata = statistics.get_metadata(hass, (entity_id,))
if not metadata:
continue
metadata_unit = metadata["unit_of_measurement"]
metadata_unit = metadata[entity_id][1]["unit_of_measurement"]
if state_unit != metadata_unit:
validation_result[entity_id].append(
statistics.ValidationIssue(

View File

@ -1725,12 +1725,17 @@ def test_compile_hourly_statistics_changing_statistics(
assert statistic_ids == [
{"statistic_id": "sensor.test1", "unit_of_measurement": None}
]
metadata = get_metadata(hass, "sensor.test1")
metadata = get_metadata(hass, ("sensor.test1",))
assert metadata == {
"has_mean": True,
"has_sum": False,
"statistic_id": "sensor.test1",
"unit_of_measurement": None,
"sensor.test1": (
1,
{
"has_mean": True,
"has_sum": False,
"statistic_id": "sensor.test1",
"unit_of_measurement": None,
},
)
}
# Add more states, with changed state class
@ -1745,12 +1750,17 @@ def test_compile_hourly_statistics_changing_statistics(
assert statistic_ids == [
{"statistic_id": "sensor.test1", "unit_of_measurement": None}
]
metadata = get_metadata(hass, "sensor.test1")
metadata = get_metadata(hass, ("sensor.test1",))
assert metadata == {
"has_mean": False,
"has_sum": True,
"statistic_id": "sensor.test1",
"unit_of_measurement": None,
"sensor.test1": (
1,
{
"has_mean": False,
"has_sum": True,
"statistic_id": "sensor.test1",
"unit_of_measurement": None,
},
)
}
stats = statistics_during_period(hass, period0, period="5minute")
assert stats == {