Speed up formatting statistics data (#88228)
* Speed up fetching statistics by using attrgetter Uses the faster native code vs a lambdapull/88225/head^24aeae28671/Modules/_operator.c (L1406)
* avoid dict lookups * avoid dict lookups * naming * split * Revert "split" This reverts commit0ead89603d
. * Revert "Revert "split"" This reverts commit20014af168
. * tweak * tweak * tweak * tweak * Revert "tweak" This reverts commitd5e4aac7c5
. * Revert "tweak" This reverts commit0cebae33f8
. * Revert "tweak" This reverts commit0184e47e24
. * comment * comment * comment * reduce local vars * less
parent
792538c124
commit
6c542bd314
|
@ -10,6 +10,7 @@ from functools import lru_cache, partial
|
||||||
from itertools import chain, groupby
|
from itertools import chain, groupby
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
from operator import itemgetter
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from statistics import mean
|
from statistics import mean
|
||||||
|
@ -1726,7 +1727,7 @@ def _statistics_during_period_with_session(
|
||||||
stmt = _statistics_during_period_stmt(
|
stmt = _statistics_during_period_stmt(
|
||||||
start_time, end_time, metadata_ids, table, types
|
start_time, end_time, metadata_ids, table, types
|
||||||
)
|
)
|
||||||
stats = execute_stmt_lambda_element(session, stmt)
|
stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
|
||||||
|
|
||||||
if not stats:
|
if not stats:
|
||||||
return {}
|
return {}
|
||||||
|
@ -1843,7 +1844,7 @@ def _get_last_statistics(
|
||||||
stmt = _get_last_statistics_stmt(metadata_id, number_of_stats)
|
stmt = _get_last_statistics_stmt(metadata_id, number_of_stats)
|
||||||
else:
|
else:
|
||||||
stmt = _get_last_statistics_short_term_stmt(metadata_id, number_of_stats)
|
stmt = _get_last_statistics_short_term_stmt(metadata_id, number_of_stats)
|
||||||
stats = execute_stmt_lambda_element(session, stmt)
|
stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
|
||||||
|
|
||||||
if not stats:
|
if not stats:
|
||||||
return {}
|
return {}
|
||||||
|
@ -1939,7 +1940,7 @@ def get_latest_short_term_statistics(
|
||||||
if statistic_id in metadata
|
if statistic_id in metadata
|
||||||
]
|
]
|
||||||
stmt = _latest_short_term_statistics_stmt(metadata_ids)
|
stmt = _latest_short_term_statistics_stmt(metadata_ids)
|
||||||
stats = execute_stmt_lambda_element(session, stmt)
|
stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
|
||||||
if not stats:
|
if not stats:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
@ -2003,7 +2004,7 @@ def _statistics_at_time(
|
||||||
def _sorted_statistics_to_dict(
|
def _sorted_statistics_to_dict(
|
||||||
hass: HomeAssistant,
|
hass: HomeAssistant,
|
||||||
session: Session,
|
session: Session,
|
||||||
stats: Iterable[Row],
|
stats: Sequence[Row[Any]],
|
||||||
statistic_ids: list[str] | None,
|
statistic_ids: list[str] | None,
|
||||||
_metadata: dict[str, tuple[int, StatisticMetaData]],
|
_metadata: dict[str, tuple[int, StatisticMetaData]],
|
||||||
convert_units: bool,
|
convert_units: bool,
|
||||||
|
@ -2013,20 +2014,22 @@ def _sorted_statistics_to_dict(
|
||||||
types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
|
types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
|
||||||
) -> dict[str, list[dict]]:
|
) -> dict[str, list[dict]]:
|
||||||
"""Convert SQL results into JSON friendly data structure."""
|
"""Convert SQL results into JSON friendly data structure."""
|
||||||
|
assert stats, "stats must not be empty" # Guard against implementation error
|
||||||
result: dict = defaultdict(list)
|
result: dict = defaultdict(list)
|
||||||
metadata = dict(_metadata.values())
|
metadata = dict(_metadata.values())
|
||||||
need_stat_at_start_time: set[int] = set()
|
need_stat_at_start_time: set[int] = set()
|
||||||
start_time_ts = start_time.timestamp() if start_time else None
|
start_time_ts = start_time.timestamp() if start_time else None
|
||||||
# Identify metadata IDs for which no data was available at the requested start time
|
# Identify metadata IDs for which no data was available at the requested start time
|
||||||
|
field_map: dict[str, int] = {key: idx for idx, key in enumerate(stats[0]._fields)}
|
||||||
|
metadata_id_idx = field_map["metadata_id"]
|
||||||
|
start_ts_idx = field_map["start_ts"]
|
||||||
stats_by_meta_id: dict[int, list[Row]] = {}
|
stats_by_meta_id: dict[int, list[Row]] = {}
|
||||||
seen_statistic_ids: set[str] = set()
|
seen_statistic_ids: set[str] = set()
|
||||||
for meta_id, group in groupby(
|
key_func = itemgetter(metadata_id_idx)
|
||||||
stats,
|
for meta_id, group in groupby(stats, key_func):
|
||||||
lambda stat: stat.metadata_id, # type: ignore[no-any-return]
|
|
||||||
):
|
|
||||||
stats_list = stats_by_meta_id[meta_id] = list(group)
|
stats_list = stats_by_meta_id[meta_id] = list(group)
|
||||||
seen_statistic_ids.add(metadata[meta_id]["statistic_id"])
|
seen_statistic_ids.add(metadata[meta_id]["statistic_id"])
|
||||||
first_start_time_ts = stats_list[0].start_ts
|
first_start_time_ts = stats_list[0][start_ts_idx]
|
||||||
if start_time_ts and first_start_time_ts > start_time_ts:
|
if start_time_ts and first_start_time_ts > start_time_ts:
|
||||||
need_stat_at_start_time.add(meta_id)
|
need_stat_at_start_time.add(meta_id)
|
||||||
|
|
||||||
|
@ -2046,14 +2049,17 @@ def _sorted_statistics_to_dict(
|
||||||
session, need_stat_at_start_time, table, start_time, types
|
session, need_stat_at_start_time, table, start_time, types
|
||||||
):
|
):
|
||||||
for stat in tmp:
|
for stat in tmp:
|
||||||
stats_by_meta_id[stat.metadata_id].insert(0, stat)
|
stats_by_meta_id[stat[metadata_id_idx]].insert(0, stat)
|
||||||
|
|
||||||
_want_mean = "mean" in types
|
# Figure out which fields we need to extract from the SQL result
|
||||||
_want_min = "min" in types
|
# and which indices they have in the result so we can avoid the overhead
|
||||||
_want_max = "max" in types
|
# of doing a dict lookup for each row
|
||||||
_want_last_reset = "last_reset" in types
|
mean_idx = field_map["mean"] if "mean" in types else None
|
||||||
_want_state = "state" in types
|
min_idx = field_map["min"] if "min" in types else None
|
||||||
_want_sum = "sum" in types
|
max_idx = field_map["max"] if "max" in types else None
|
||||||
|
last_reset_ts_idx = field_map["last_reset_ts"] if "last_reset" in types else None
|
||||||
|
state_idx = field_map["state"] if "state" in types else None
|
||||||
|
sum_idx = field_map["sum"] if "sum" in types else None
|
||||||
# Append all statistic entries, and optionally do unit conversion
|
# Append all statistic entries, and optionally do unit conversion
|
||||||
table_duration_seconds = table.duration.total_seconds()
|
table_duration_seconds = table.duration.total_seconds()
|
||||||
for meta_id, stats_list in stats_by_meta_id.items():
|
for meta_id, stats_list in stats_by_meta_id.items():
|
||||||
|
@ -2066,25 +2072,44 @@ def _sorted_statistics_to_dict(
|
||||||
convert = _get_statistic_to_display_unit_converter(unit, state_unit, units)
|
convert = _get_statistic_to_display_unit_converter(unit, state_unit, units)
|
||||||
else:
|
else:
|
||||||
convert = None
|
convert = None
|
||||||
ent_results = result[statistic_id]
|
ent_results_append = result[statistic_id].append
|
||||||
|
#
|
||||||
|
# The below loop is a red hot path for energy, and every
|
||||||
|
# optimization counts in here.
|
||||||
|
#
|
||||||
|
# Specifically, we want to avoid function calls,
|
||||||
|
# attribute lookups, and dict lookups as much as possible.
|
||||||
|
#
|
||||||
for db_state in stats_list:
|
for db_state in stats_list:
|
||||||
row: dict[str, Any] = {
|
row: dict[str, Any] = {
|
||||||
"start": (start_ts := db_state.start_ts),
|
"start": (start_ts := db_state[start_ts_idx]),
|
||||||
"end": start_ts + table_duration_seconds,
|
"end": start_ts + table_duration_seconds,
|
||||||
}
|
}
|
||||||
if _want_mean:
|
if last_reset_ts_idx is not None:
|
||||||
row["mean"] = convert(db_state.mean) if convert else db_state.mean
|
row["last_reset"] = db_state[last_reset_ts_idx]
|
||||||
if _want_min:
|
if convert:
|
||||||
row["min"] = convert(db_state.min) if convert else db_state.min
|
if mean_idx is not None:
|
||||||
if _want_max:
|
row["mean"] = convert(db_state[mean_idx])
|
||||||
row["max"] = convert(db_state.max) if convert else db_state.max
|
if min_idx is not None:
|
||||||
if _want_last_reset:
|
row["min"] = convert(db_state[min_idx])
|
||||||
row["last_reset"] = db_state.last_reset_ts
|
if max_idx is not None:
|
||||||
if _want_state:
|
row["max"] = convert(db_state[max_idx])
|
||||||
row["state"] = convert(db_state.state) if convert else db_state.state
|
if state_idx is not None:
|
||||||
if _want_sum:
|
row["state"] = convert(db_state[state_idx])
|
||||||
row["sum"] = convert(db_state.sum) if convert else db_state.sum
|
if sum_idx is not None:
|
||||||
ent_results.append(row)
|
row["sum"] = convert(db_state[sum_idx])
|
||||||
|
else:
|
||||||
|
if mean_idx is not None:
|
||||||
|
row["mean"] = db_state[mean_idx]
|
||||||
|
if min_idx is not None:
|
||||||
|
row["min"] = db_state[min_idx]
|
||||||
|
if max_idx is not None:
|
||||||
|
row["max"] = db_state[max_idx]
|
||||||
|
if state_idx is not None:
|
||||||
|
row["state"] = db_state[state_idx]
|
||||||
|
if sum_idx is not None:
|
||||||
|
row["sum"] = db_state[sum_idx]
|
||||||
|
ent_results_append(row)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue