From 6c542bd314d16b2fb282cf641a1a8e4e8744aeff Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 16 Feb 2023 11:34:20 -0600 Subject: [PATCH] Speed up formatting statistics data (#88228) * Speed up fetching statistics by using attrgetter Uses the faster native code vs a lambda https://github.com/python/cpython/blob/4aeae286715a3a9fa624429733582917606000c3/Modules/_operator.c#L1406 * avoid dict lookups * avoid dict lookups * naming * split * Revert "split" This reverts commit 0ead89603d1570554148f580948e9bce9eca6d38. * Revert "Revert "split"" This reverts commit 20014af16862d067b7f807f1eb0f909e89fd4b6a. * tweak * tweak * tweak * tweak * Revert "tweak" This reverts commit d5e4aac7c5cfc22fdabb016cefb9da6510578cd3. * Revert "tweak" This reverts commit 0cebae33f817e0a075dcb3f11d9c479cdc50d3dd. * Revert "tweak" This reverts commit 0184e47e2473b9855513bd45b601247f1d8be85e. * comment * comment * comment * reduce local vars * less --- .../components/recorder/statistics.py | 87 ++++++++++++------- 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index 4f812265f11..1e2f6fb8ab9 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -10,6 +10,7 @@ from functools import lru_cache, partial from itertools import chain, groupby import json import logging +from operator import itemgetter import os import re from statistics import mean @@ -1726,7 +1727,7 @@ def _statistics_during_period_with_session( stmt = _statistics_during_period_stmt( start_time, end_time, metadata_ids, table, types ) - stats = execute_stmt_lambda_element(session, stmt) + stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt)) if not stats: return {} @@ -1843,7 +1844,7 @@ def _get_last_statistics( stmt = _get_last_statistics_stmt(metadata_id, number_of_stats) else: stmt = _get_last_statistics_short_term_stmt(metadata_id, number_of_stats) - stats = execute_stmt_lambda_element(session, stmt) + stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt)) if not stats: return {} @@ -1939,7 +1940,7 @@ def get_latest_short_term_statistics( if statistic_id in metadata ] stmt = _latest_short_term_statistics_stmt(metadata_ids) - stats = execute_stmt_lambda_element(session, stmt) + stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt)) if not stats: return {} @@ -2003,7 +2004,7 @@ def _statistics_at_time( def _sorted_statistics_to_dict( hass: HomeAssistant, session: Session, - stats: Iterable[Row], + stats: Sequence[Row[Any]], statistic_ids: list[str] | None, _metadata: dict[str, tuple[int, StatisticMetaData]], convert_units: bool, @@ -2013,20 +2014,22 @@ def _sorted_statistics_to_dict( types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]], ) -> dict[str, list[dict]]: """Convert SQL results into JSON friendly data structure.""" + assert stats, "stats must not be empty" # Guard against implementation error result: dict = defaultdict(list) metadata = dict(_metadata.values()) need_stat_at_start_time: set[int] = set() start_time_ts = start_time.timestamp() if start_time else None # Identify metadata IDs for which no data was available at the requested start time + field_map: dict[str, int] = {key: idx for idx, key in enumerate(stats[0]._fields)} + metadata_id_idx = field_map["metadata_id"] + start_ts_idx = field_map["start_ts"] stats_by_meta_id: dict[int, list[Row]] = {} seen_statistic_ids: set[str] = set() - for meta_id, group in groupby( - stats, - lambda stat: stat.metadata_id, # type: ignore[no-any-return] - ): + key_func = itemgetter(metadata_id_idx) + for meta_id, group in groupby(stats, key_func): stats_list = stats_by_meta_id[meta_id] = list(group) seen_statistic_ids.add(metadata[meta_id]["statistic_id"]) - first_start_time_ts = stats_list[0].start_ts + first_start_time_ts = stats_list[0][start_ts_idx] if start_time_ts and first_start_time_ts > start_time_ts: need_stat_at_start_time.add(meta_id) @@ -2046,14 +2049,17 @@ def _sorted_statistics_to_dict( session, need_stat_at_start_time, table, start_time, types ): for stat in tmp: - stats_by_meta_id[stat.metadata_id].insert(0, stat) + stats_by_meta_id[stat[metadata_id_idx]].insert(0, stat) - _want_mean = "mean" in types - _want_min = "min" in types - _want_max = "max" in types - _want_last_reset = "last_reset" in types - _want_state = "state" in types - _want_sum = "sum" in types + # Figure out which fields we need to extract from the SQL result + # and which indices they have in the result so we can avoid the overhead + # of doing a dict lookup for each row + mean_idx = field_map["mean"] if "mean" in types else None + min_idx = field_map["min"] if "min" in types else None + max_idx = field_map["max"] if "max" in types else None + last_reset_ts_idx = field_map["last_reset_ts"] if "last_reset" in types else None + state_idx = field_map["state"] if "state" in types else None + sum_idx = field_map["sum"] if "sum" in types else None # Append all statistic entries, and optionally do unit conversion table_duration_seconds = table.duration.total_seconds() for meta_id, stats_list in stats_by_meta_id.items(): @@ -2066,25 +2072,44 @@ def _sorted_statistics_to_dict( convert = _get_statistic_to_display_unit_converter(unit, state_unit, units) else: convert = None - ent_results = result[statistic_id] + ent_results_append = result[statistic_id].append + # + # The below loop is a red hot path for energy, and every + # optimization counts in here. + # + # Specifically, we want to avoid function calls, + # attribute lookups, and dict lookups as much as possible. + # for db_state in stats_list: row: dict[str, Any] = { - "start": (start_ts := db_state.start_ts), + "start": (start_ts := db_state[start_ts_idx]), "end": start_ts + table_duration_seconds, } - if _want_mean: - row["mean"] = convert(db_state.mean) if convert else db_state.mean - if _want_min: - row["min"] = convert(db_state.min) if convert else db_state.min - if _want_max: - row["max"] = convert(db_state.max) if convert else db_state.max - if _want_last_reset: - row["last_reset"] = db_state.last_reset_ts - if _want_state: - row["state"] = convert(db_state.state) if convert else db_state.state - if _want_sum: - row["sum"] = convert(db_state.sum) if convert else db_state.sum - ent_results.append(row) + if last_reset_ts_idx is not None: + row["last_reset"] = db_state[last_reset_ts_idx] + if convert: + if mean_idx is not None: + row["mean"] = convert(db_state[mean_idx]) + if min_idx is not None: + row["min"] = convert(db_state[min_idx]) + if max_idx is not None: + row["max"] = convert(db_state[max_idx]) + if state_idx is not None: + row["state"] = convert(db_state[state_idx]) + if sum_idx is not None: + row["sum"] = convert(db_state[sum_idx]) + else: + if mean_idx is not None: + row["mean"] = db_state[mean_idx] + if min_idx is not None: + row["min"] = db_state[min_idx] + if max_idx is not None: + row["max"] = db_state[max_idx] + if state_idx is not None: + row["state"] = db_state[state_idx] + if sum_idx is not None: + row["sum"] = db_state[sum_idx] + ent_results_append(row) return result