Speed up formatting statistics data (#88228)

* Speed up fetching statistics by using attrgetter

Uses the faster native code vs a lambda
4aeae28671/Modules/_operator.c (L1406)

* avoid dict lookups

* avoid dict lookups

* naming

* split

* Revert "split"

This reverts commit 0ead89603d.

* Revert "Revert "split""

This reverts commit 20014af168.

* tweak

* tweak

* tweak

* tweak

* Revert "tweak"

This reverts commit d5e4aac7c5.

* Revert "tweak"

This reverts commit 0cebae33f8.

* Revert "tweak"

This reverts commit 0184e47e24.

* comment

* comment

* comment

* reduce local vars

* less
pull/88225/head^2
J. Nick Koston 2023-02-16 11:34:20 -06:00 committed by GitHub
parent 792538c124
commit 6c542bd314
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 56 additions and 31 deletions

View File

@ -10,6 +10,7 @@ from functools import lru_cache, partial
from itertools import chain, groupby from itertools import chain, groupby
import json import json
import logging import logging
from operator import itemgetter
import os import os
import re import re
from statistics import mean from statistics import mean
@ -1726,7 +1727,7 @@ def _statistics_during_period_with_session(
stmt = _statistics_during_period_stmt( stmt = _statistics_during_period_stmt(
start_time, end_time, metadata_ids, table, types start_time, end_time, metadata_ids, table, types
) )
stats = execute_stmt_lambda_element(session, stmt) stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
if not stats: if not stats:
return {} return {}
@ -1843,7 +1844,7 @@ def _get_last_statistics(
stmt = _get_last_statistics_stmt(metadata_id, number_of_stats) stmt = _get_last_statistics_stmt(metadata_id, number_of_stats)
else: else:
stmt = _get_last_statistics_short_term_stmt(metadata_id, number_of_stats) stmt = _get_last_statistics_short_term_stmt(metadata_id, number_of_stats)
stats = execute_stmt_lambda_element(session, stmt) stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
if not stats: if not stats:
return {} return {}
@ -1939,7 +1940,7 @@ def get_latest_short_term_statistics(
if statistic_id in metadata if statistic_id in metadata
] ]
stmt = _latest_short_term_statistics_stmt(metadata_ids) stmt = _latest_short_term_statistics_stmt(metadata_ids)
stats = execute_stmt_lambda_element(session, stmt) stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
if not stats: if not stats:
return {} return {}
@ -2003,7 +2004,7 @@ def _statistics_at_time(
def _sorted_statistics_to_dict( def _sorted_statistics_to_dict(
hass: HomeAssistant, hass: HomeAssistant,
session: Session, session: Session,
stats: Iterable[Row], stats: Sequence[Row[Any]],
statistic_ids: list[str] | None, statistic_ids: list[str] | None,
_metadata: dict[str, tuple[int, StatisticMetaData]], _metadata: dict[str, tuple[int, StatisticMetaData]],
convert_units: bool, convert_units: bool,
@ -2013,20 +2014,22 @@ def _sorted_statistics_to_dict(
types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]], types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
) -> dict[str, list[dict]]: ) -> dict[str, list[dict]]:
"""Convert SQL results into JSON friendly data structure.""" """Convert SQL results into JSON friendly data structure."""
assert stats, "stats must not be empty" # Guard against implementation error
result: dict = defaultdict(list) result: dict = defaultdict(list)
metadata = dict(_metadata.values()) metadata = dict(_metadata.values())
need_stat_at_start_time: set[int] = set() need_stat_at_start_time: set[int] = set()
start_time_ts = start_time.timestamp() if start_time else None start_time_ts = start_time.timestamp() if start_time else None
# Identify metadata IDs for which no data was available at the requested start time # Identify metadata IDs for which no data was available at the requested start time
field_map: dict[str, int] = {key: idx for idx, key in enumerate(stats[0]._fields)}
metadata_id_idx = field_map["metadata_id"]
start_ts_idx = field_map["start_ts"]
stats_by_meta_id: dict[int, list[Row]] = {} stats_by_meta_id: dict[int, list[Row]] = {}
seen_statistic_ids: set[str] = set() seen_statistic_ids: set[str] = set()
for meta_id, group in groupby( key_func = itemgetter(metadata_id_idx)
stats, for meta_id, group in groupby(stats, key_func):
lambda stat: stat.metadata_id, # type: ignore[no-any-return]
):
stats_list = stats_by_meta_id[meta_id] = list(group) stats_list = stats_by_meta_id[meta_id] = list(group)
seen_statistic_ids.add(metadata[meta_id]["statistic_id"]) seen_statistic_ids.add(metadata[meta_id]["statistic_id"])
first_start_time_ts = stats_list[0].start_ts first_start_time_ts = stats_list[0][start_ts_idx]
if start_time_ts and first_start_time_ts > start_time_ts: if start_time_ts and first_start_time_ts > start_time_ts:
need_stat_at_start_time.add(meta_id) need_stat_at_start_time.add(meta_id)
@ -2046,14 +2049,17 @@ def _sorted_statistics_to_dict(
session, need_stat_at_start_time, table, start_time, types session, need_stat_at_start_time, table, start_time, types
): ):
for stat in tmp: for stat in tmp:
stats_by_meta_id[stat.metadata_id].insert(0, stat) stats_by_meta_id[stat[metadata_id_idx]].insert(0, stat)
_want_mean = "mean" in types # Figure out which fields we need to extract from the SQL result
_want_min = "min" in types # and which indices they have in the result so we can avoid the overhead
_want_max = "max" in types # of doing a dict lookup for each row
_want_last_reset = "last_reset" in types mean_idx = field_map["mean"] if "mean" in types else None
_want_state = "state" in types min_idx = field_map["min"] if "min" in types else None
_want_sum = "sum" in types max_idx = field_map["max"] if "max" in types else None
last_reset_ts_idx = field_map["last_reset_ts"] if "last_reset" in types else None
state_idx = field_map["state"] if "state" in types else None
sum_idx = field_map["sum"] if "sum" in types else None
# Append all statistic entries, and optionally do unit conversion # Append all statistic entries, and optionally do unit conversion
table_duration_seconds = table.duration.total_seconds() table_duration_seconds = table.duration.total_seconds()
for meta_id, stats_list in stats_by_meta_id.items(): for meta_id, stats_list in stats_by_meta_id.items():
@ -2066,25 +2072,44 @@ def _sorted_statistics_to_dict(
convert = _get_statistic_to_display_unit_converter(unit, state_unit, units) convert = _get_statistic_to_display_unit_converter(unit, state_unit, units)
else: else:
convert = None convert = None
ent_results = result[statistic_id] ent_results_append = result[statistic_id].append
#
# The below loop is a red hot path for energy, and every
# optimization counts in here.
#
# Specifically, we want to avoid function calls,
# attribute lookups, and dict lookups as much as possible.
#
for db_state in stats_list: for db_state in stats_list:
row: dict[str, Any] = { row: dict[str, Any] = {
"start": (start_ts := db_state.start_ts), "start": (start_ts := db_state[start_ts_idx]),
"end": start_ts + table_duration_seconds, "end": start_ts + table_duration_seconds,
} }
if _want_mean: if last_reset_ts_idx is not None:
row["mean"] = convert(db_state.mean) if convert else db_state.mean row["last_reset"] = db_state[last_reset_ts_idx]
if _want_min: if convert:
row["min"] = convert(db_state.min) if convert else db_state.min if mean_idx is not None:
if _want_max: row["mean"] = convert(db_state[mean_idx])
row["max"] = convert(db_state.max) if convert else db_state.max if min_idx is not None:
if _want_last_reset: row["min"] = convert(db_state[min_idx])
row["last_reset"] = db_state.last_reset_ts if max_idx is not None:
if _want_state: row["max"] = convert(db_state[max_idx])
row["state"] = convert(db_state.state) if convert else db_state.state if state_idx is not None:
if _want_sum: row["state"] = convert(db_state[state_idx])
row["sum"] = convert(db_state.sum) if convert else db_state.sum if sum_idx is not None:
ent_results.append(row) row["sum"] = convert(db_state[sum_idx])
else:
if mean_idx is not None:
row["mean"] = db_state[mean_idx]
if min_idx is not None:
row["min"] = db_state[min_idx]
if max_idx is not None:
row["max"] = db_state[max_idx]
if state_idx is not None:
row["state"] = db_state[state_idx]
if sum_idx is not None:
row["sum"] = db_state[sum_idx]
ent_results_append(row)
return result return result