From e10e3ee7cc3e37746242dd2584b24998db3a340f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 2 Apr 2023 14:51:25 -1000 Subject: [PATCH] Fix memory churn in state templates (#90685) * Fix memory churn in state templates The LRU for state templates was limited to 512 states. As soon as it was exaused, system performance would tank as each template that iterated all states would have to create and GC any state > 512 * does it scale? * avoid copy on all * comment * preen * cover * cover * comments * comments * comments * preen * preen --- homeassistant/bootstrap.py | 1 + homeassistant/helpers/template.py | 98 +++++++++++++++++++++++++++---- tests/helpers/test_template.py | 40 ++++++++++++- 3 files changed, 128 insertions(+), 11 deletions(-) diff --git a/homeassistant/bootstrap.py b/homeassistant/bootstrap.py index 445ff35793c..d98680c70d4 100644 --- a/homeassistant/bootstrap.py +++ b/homeassistant/bootstrap.py @@ -239,6 +239,7 @@ async def load_registries(hass: core.HomeAssistant) -> None: # Load the registries and cache the result of platform.uname().processor entity.async_setup(hass) + template.async_setup(hass) await asyncio.gather( area_registry.async_load(hass), device_registry.async_load(hass), diff --git a/homeassistant/helpers/template.py b/homeassistant/helpers/template.py index 8e5951488ba..fb693d6957d 100644 --- a/homeassistant/helpers/template.py +++ b/homeassistant/helpers/template.py @@ -5,7 +5,7 @@ from ast import literal_eval import asyncio import base64 import collections.abc -from collections.abc import Callable, Collection, Generator, Iterable +from collections.abc import Callable, Collection, Generator, Iterable, MutableMapping from contextlib import contextmanager, suppress from contextvars import ContextVar from datetime import datetime, timedelta @@ -41,6 +41,7 @@ from jinja2 import pass_context, pass_environment, pass_eval_context from jinja2.runtime import AsyncLoopContext, LoopContext from jinja2.sandbox import ImmutableSandboxedEnvironment from jinja2.utils import Namespace +from lru import LRU # pylint: disable=no-name-in-module import voluptuous as vol from homeassistant.const import ( @@ -49,6 +50,8 @@ from homeassistant.const import ( ATTR_LONGITUDE, ATTR_PERSONS, ATTR_UNIT_OF_MEASUREMENT, + EVENT_HOMEASSISTANT_START, + EVENT_HOMEASSISTANT_STOP, STATE_UNAVAILABLE, STATE_UNKNOWN, UnitOfLength, @@ -121,11 +124,77 @@ template_cv: ContextVar[tuple[str, str] | None] = ContextVar( "template_cv", default=None ) +# +# CACHED_TEMPLATE_STATES is a rough estimate of the number of entities +# on a typical system. It is used as the initial size of the LRU cache +# for TemplateState objects. +# +# If the cache is too small we will end up creating and destroying +# TemplateState objects too often which will cause a lot of GC activity +# and slow down the system. For systems with a lot of entities and +# templates, this can reach 100000s of object creations and destructions +# per minute. +# +# Since entity counts may grow over time, we will increase +# the size if the number of entities grows via _async_adjust_lru_sizes +# at the start of the system and every 10 minutes if needed. +# CACHED_TEMPLATE_STATES = 512 EVAL_CACHE_SIZE = 512 MAX_CUSTOM_TEMPLATE_SIZE = 5 * 1024 * 1024 +CACHED_TEMPLATE_LRU: MutableMapping[State, TemplateState] = LRU(CACHED_TEMPLATE_STATES) +CACHED_TEMPLATE_NO_COLLECT_LRU: MutableMapping[State, TemplateState] = LRU( + CACHED_TEMPLATE_STATES +) +ENTITY_COUNT_GROWTH_FACTOR = 1.2 + + +def _template_state_no_collect(hass: HomeAssistant, state: State) -> TemplateState: + """Return a TemplateState for a state without collecting.""" + if template_state := CACHED_TEMPLATE_NO_COLLECT_LRU.get(state): + return template_state + template_state = _create_template_state_no_collect(hass, state) + CACHED_TEMPLATE_NO_COLLECT_LRU[state] = template_state + return template_state + + +def _template_state(hass: HomeAssistant, state: State) -> TemplateState: + """Return a TemplateState for a state that collects.""" + if template_state := CACHED_TEMPLATE_LRU.get(state): + return template_state + template_state = TemplateState(hass, state) + CACHED_TEMPLATE_LRU[state] = template_state + return template_state + + +def async_setup(hass: HomeAssistant) -> bool: + """Set up tracking the template LRUs.""" + + @callback + def _async_adjust_lru_sizes(_: Any) -> None: + """Adjust the lru cache sizes.""" + new_size = int( + round(hass.states.async_entity_ids_count() * ENTITY_COUNT_GROWTH_FACTOR) + ) + for lru in (CACHED_TEMPLATE_LRU, CACHED_TEMPLATE_NO_COLLECT_LRU): + # There is no typing for LRU + current_size = lru.get_size() # type: ignore[attr-defined] + if new_size > current_size: + lru.set_size(new_size) # type: ignore[attr-defined] + + from .event import ( # pylint: disable=import-outside-toplevel + async_track_time_interval, + ) + + cancel = async_track_time_interval( + hass, _async_adjust_lru_sizes, timedelta(minutes=10) + ) + hass.bus.async_listen_once(EVENT_HOMEASSISTANT_START, _async_adjust_lru_sizes) + hass.bus.async_listen_once(EVENT_HOMEASSISTANT_STOP, callback(lambda _: cancel())) + return True + @bind_hass def attach(hass: HomeAssistant, obj: Any) -> None: @@ -969,21 +1038,33 @@ class TemplateStateFromEntityId(TemplateStateBase): return f"