From 6c542bd314d16b2fb282cf641a1a8e4e8744aeff Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@koston.org>
Date: Thu, 16 Feb 2023 11:34:20 -0600
Subject: [PATCH] Speed up formatting statistics data (#88228)

* Speed up fetching statistics by using attrgetter

Uses the faster native code vs a lambda
https://github.com/python/cpython/blob/4aeae286715a3a9fa624429733582917606000c3/Modules/_operator.c#L1406

* avoid dict lookups

* avoid dict lookups

* naming

* split

* Revert "split"

This reverts commit 0ead89603d1570554148f580948e9bce9eca6d38.

* Revert "Revert "split""

This reverts commit 20014af16862d067b7f807f1eb0f909e89fd4b6a.

* tweak

* tweak

* tweak

* tweak

* Revert "tweak"

This reverts commit d5e4aac7c5cfc22fdabb016cefb9da6510578cd3.

* Revert "tweak"

This reverts commit 0cebae33f817e0a075dcb3f11d9c479cdc50d3dd.

* Revert "tweak"

This reverts commit 0184e47e2473b9855513bd45b601247f1d8be85e.

* comment

* comment

* comment

* reduce local vars

* less
---
 .../components/recorder/statistics.py         | 87 ++++++++++++-------
 1 file changed, 56 insertions(+), 31 deletions(-)

diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py
index 4f812265f11..1e2f6fb8ab9 100644
--- a/homeassistant/components/recorder/statistics.py
+++ b/homeassistant/components/recorder/statistics.py
@@ -10,6 +10,7 @@ from functools import lru_cache, partial
 from itertools import chain, groupby
 import json
 import logging
+from operator import itemgetter
 import os
 import re
 from statistics import mean
@@ -1726,7 +1727,7 @@ def _statistics_during_period_with_session(
     stmt = _statistics_during_period_stmt(
         start_time, end_time, metadata_ids, table, types
     )
-    stats = execute_stmt_lambda_element(session, stmt)
+    stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
 
     if not stats:
         return {}
@@ -1843,7 +1844,7 @@ def _get_last_statistics(
             stmt = _get_last_statistics_stmt(metadata_id, number_of_stats)
         else:
             stmt = _get_last_statistics_short_term_stmt(metadata_id, number_of_stats)
-        stats = execute_stmt_lambda_element(session, stmt)
+        stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
 
         if not stats:
             return {}
@@ -1939,7 +1940,7 @@ def get_latest_short_term_statistics(
             if statistic_id in metadata
         ]
         stmt = _latest_short_term_statistics_stmt(metadata_ids)
-        stats = execute_stmt_lambda_element(session, stmt)
+        stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))
         if not stats:
             return {}
 
@@ -2003,7 +2004,7 @@ def _statistics_at_time(
 def _sorted_statistics_to_dict(
     hass: HomeAssistant,
     session: Session,
-    stats: Iterable[Row],
+    stats: Sequence[Row[Any]],
     statistic_ids: list[str] | None,
     _metadata: dict[str, tuple[int, StatisticMetaData]],
     convert_units: bool,
@@ -2013,20 +2014,22 @@ def _sorted_statistics_to_dict(
     types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
 ) -> dict[str, list[dict]]:
     """Convert SQL results into JSON friendly data structure."""
+    assert stats, "stats must not be empty"  # Guard against implementation error
     result: dict = defaultdict(list)
     metadata = dict(_metadata.values())
     need_stat_at_start_time: set[int] = set()
     start_time_ts = start_time.timestamp() if start_time else None
     # Identify metadata IDs for which no data was available at the requested start time
+    field_map: dict[str, int] = {key: idx for idx, key in enumerate(stats[0]._fields)}
+    metadata_id_idx = field_map["metadata_id"]
+    start_ts_idx = field_map["start_ts"]
     stats_by_meta_id: dict[int, list[Row]] = {}
     seen_statistic_ids: set[str] = set()
-    for meta_id, group in groupby(
-        stats,
-        lambda stat: stat.metadata_id,  # type: ignore[no-any-return]
-    ):
+    key_func = itemgetter(metadata_id_idx)
+    for meta_id, group in groupby(stats, key_func):
         stats_list = stats_by_meta_id[meta_id] = list(group)
         seen_statistic_ids.add(metadata[meta_id]["statistic_id"])
-        first_start_time_ts = stats_list[0].start_ts
+        first_start_time_ts = stats_list[0][start_ts_idx]
         if start_time_ts and first_start_time_ts > start_time_ts:
             need_stat_at_start_time.add(meta_id)
 
@@ -2046,14 +2049,17 @@ def _sorted_statistics_to_dict(
             session, need_stat_at_start_time, table, start_time, types
         ):
             for stat in tmp:
-                stats_by_meta_id[stat.metadata_id].insert(0, stat)
+                stats_by_meta_id[stat[metadata_id_idx]].insert(0, stat)
 
-    _want_mean = "mean" in types
-    _want_min = "min" in types
-    _want_max = "max" in types
-    _want_last_reset = "last_reset" in types
-    _want_state = "state" in types
-    _want_sum = "sum" in types
+    # Figure out which fields we need to extract from the SQL result
+    # and which indices they have in the result so we can avoid the overhead
+    # of doing a dict lookup for each row
+    mean_idx = field_map["mean"] if "mean" in types else None
+    min_idx = field_map["min"] if "min" in types else None
+    max_idx = field_map["max"] if "max" in types else None
+    last_reset_ts_idx = field_map["last_reset_ts"] if "last_reset" in types else None
+    state_idx = field_map["state"] if "state" in types else None
+    sum_idx = field_map["sum"] if "sum" in types else None
     # Append all statistic entries, and optionally do unit conversion
     table_duration_seconds = table.duration.total_seconds()
     for meta_id, stats_list in stats_by_meta_id.items():
@@ -2066,25 +2072,44 @@ def _sorted_statistics_to_dict(
             convert = _get_statistic_to_display_unit_converter(unit, state_unit, units)
         else:
             convert = None
-        ent_results = result[statistic_id]
+        ent_results_append = result[statistic_id].append
+        #
+        # The below loop is a red hot path for energy, and every
+        # optimization counts in here.
+        #
+        # Specifically, we want to avoid function calls,
+        # attribute lookups, and dict lookups as much as possible.
+        #
         for db_state in stats_list:
             row: dict[str, Any] = {
-                "start": (start_ts := db_state.start_ts),
+                "start": (start_ts := db_state[start_ts_idx]),
                 "end": start_ts + table_duration_seconds,
             }
-            if _want_mean:
-                row["mean"] = convert(db_state.mean) if convert else db_state.mean
-            if _want_min:
-                row["min"] = convert(db_state.min) if convert else db_state.min
-            if _want_max:
-                row["max"] = convert(db_state.max) if convert else db_state.max
-            if _want_last_reset:
-                row["last_reset"] = db_state.last_reset_ts
-            if _want_state:
-                row["state"] = convert(db_state.state) if convert else db_state.state
-            if _want_sum:
-                row["sum"] = convert(db_state.sum) if convert else db_state.sum
-            ent_results.append(row)
+            if last_reset_ts_idx is not None:
+                row["last_reset"] = db_state[last_reset_ts_idx]
+            if convert:
+                if mean_idx is not None:
+                    row["mean"] = convert(db_state[mean_idx])
+                if min_idx is not None:
+                    row["min"] = convert(db_state[min_idx])
+                if max_idx is not None:
+                    row["max"] = convert(db_state[max_idx])
+                if state_idx is not None:
+                    row["state"] = convert(db_state[state_idx])
+                if sum_idx is not None:
+                    row["sum"] = convert(db_state[sum_idx])
+            else:
+                if mean_idx is not None:
+                    row["mean"] = db_state[mean_idx]
+                if min_idx is not None:
+                    row["min"] = db_state[min_idx]
+                if max_idx is not None:
+                    row["max"] = db_state[max_idx]
+                if state_idx is not None:
+                    row["state"] = db_state[state_idx]
+                if sum_idx is not None:
+                    row["sum"] = db_state[sum_idx]
+            ent_results_append(row)
 
     return result