Fix memory churn in state templates (#90685)
* Fix memory churn in state templates The LRU for state templates was limited to 512 states. As soon as it was exaused, system performance would tank as each template that iterated all states would have to create and GC any state > 512 * does it scale? * avoid copy on all * comment * preen * cover * cover * comments * comments * comments * preen * preenpull/90855/head
parent
83b7018be2
commit
e10e3ee7cc
|
@ -239,6 +239,7 @@ async def load_registries(hass: core.HomeAssistant) -> None:
|
|||
|
||||
# Load the registries and cache the result of platform.uname().processor
|
||||
entity.async_setup(hass)
|
||||
template.async_setup(hass)
|
||||
await asyncio.gather(
|
||||
area_registry.async_load(hass),
|
||||
device_registry.async_load(hass),
|
||||
|
|
|
@ -5,7 +5,7 @@ from ast import literal_eval
|
|||
import asyncio
|
||||
import base64
|
||||
import collections.abc
|
||||
from collections.abc import Callable, Collection, Generator, Iterable
|
||||
from collections.abc import Callable, Collection, Generator, Iterable, MutableMapping
|
||||
from contextlib import contextmanager, suppress
|
||||
from contextvars import ContextVar
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -41,6 +41,7 @@ from jinja2 import pass_context, pass_environment, pass_eval_context
|
|||
from jinja2.runtime import AsyncLoopContext, LoopContext
|
||||
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||
from jinja2.utils import Namespace
|
||||
from lru import LRU # pylint: disable=no-name-in-module
|
||||
import voluptuous as vol
|
||||
|
||||
from homeassistant.const import (
|
||||
|
@ -49,6 +50,8 @@ from homeassistant.const import (
|
|||
ATTR_LONGITUDE,
|
||||
ATTR_PERSONS,
|
||||
ATTR_UNIT_OF_MEASUREMENT,
|
||||
EVENT_HOMEASSISTANT_START,
|
||||
EVENT_HOMEASSISTANT_STOP,
|
||||
STATE_UNAVAILABLE,
|
||||
STATE_UNKNOWN,
|
||||
UnitOfLength,
|
||||
|
@ -121,11 +124,77 @@ template_cv: ContextVar[tuple[str, str] | None] = ContextVar(
|
|||
"template_cv", default=None
|
||||
)
|
||||
|
||||
#
|
||||
# CACHED_TEMPLATE_STATES is a rough estimate of the number of entities
|
||||
# on a typical system. It is used as the initial size of the LRU cache
|
||||
# for TemplateState objects.
|
||||
#
|
||||
# If the cache is too small we will end up creating and destroying
|
||||
# TemplateState objects too often which will cause a lot of GC activity
|
||||
# and slow down the system. For systems with a lot of entities and
|
||||
# templates, this can reach 100000s of object creations and destructions
|
||||
# per minute.
|
||||
#
|
||||
# Since entity counts may grow over time, we will increase
|
||||
# the size if the number of entities grows via _async_adjust_lru_sizes
|
||||
# at the start of the system and every 10 minutes if needed.
|
||||
#
|
||||
CACHED_TEMPLATE_STATES = 512
|
||||
EVAL_CACHE_SIZE = 512
|
||||
|
||||
MAX_CUSTOM_TEMPLATE_SIZE = 5 * 1024 * 1024
|
||||
|
||||
CACHED_TEMPLATE_LRU: MutableMapping[State, TemplateState] = LRU(CACHED_TEMPLATE_STATES)
|
||||
CACHED_TEMPLATE_NO_COLLECT_LRU: MutableMapping[State, TemplateState] = LRU(
|
||||
CACHED_TEMPLATE_STATES
|
||||
)
|
||||
ENTITY_COUNT_GROWTH_FACTOR = 1.2
|
||||
|
||||
|
||||
def _template_state_no_collect(hass: HomeAssistant, state: State) -> TemplateState:
|
||||
"""Return a TemplateState for a state without collecting."""
|
||||
if template_state := CACHED_TEMPLATE_NO_COLLECT_LRU.get(state):
|
||||
return template_state
|
||||
template_state = _create_template_state_no_collect(hass, state)
|
||||
CACHED_TEMPLATE_NO_COLLECT_LRU[state] = template_state
|
||||
return template_state
|
||||
|
||||
|
||||
def _template_state(hass: HomeAssistant, state: State) -> TemplateState:
|
||||
"""Return a TemplateState for a state that collects."""
|
||||
if template_state := CACHED_TEMPLATE_LRU.get(state):
|
||||
return template_state
|
||||
template_state = TemplateState(hass, state)
|
||||
CACHED_TEMPLATE_LRU[state] = template_state
|
||||
return template_state
|
||||
|
||||
|
||||
def async_setup(hass: HomeAssistant) -> bool:
|
||||
"""Set up tracking the template LRUs."""
|
||||
|
||||
@callback
|
||||
def _async_adjust_lru_sizes(_: Any) -> None:
|
||||
"""Adjust the lru cache sizes."""
|
||||
new_size = int(
|
||||
round(hass.states.async_entity_ids_count() * ENTITY_COUNT_GROWTH_FACTOR)
|
||||
)
|
||||
for lru in (CACHED_TEMPLATE_LRU, CACHED_TEMPLATE_NO_COLLECT_LRU):
|
||||
# There is no typing for LRU
|
||||
current_size = lru.get_size() # type: ignore[attr-defined]
|
||||
if new_size > current_size:
|
||||
lru.set_size(new_size) # type: ignore[attr-defined]
|
||||
|
||||
from .event import ( # pylint: disable=import-outside-toplevel
|
||||
async_track_time_interval,
|
||||
)
|
||||
|
||||
cancel = async_track_time_interval(
|
||||
hass, _async_adjust_lru_sizes, timedelta(minutes=10)
|
||||
)
|
||||
hass.bus.async_listen_once(EVENT_HOMEASSISTANT_START, _async_adjust_lru_sizes)
|
||||
hass.bus.async_listen_once(EVENT_HOMEASSISTANT_STOP, callback(lambda _: cancel()))
|
||||
return True
|
||||
|
||||
|
||||
@bind_hass
|
||||
def attach(hass: HomeAssistant, obj: Any) -> None:
|
||||
|
@ -969,21 +1038,33 @@ class TemplateStateFromEntityId(TemplateStateBase):
|
|||
return f"<template TemplateStateFromEntityId({self._entity_id})>"
|
||||
|
||||
|
||||
_create_template_state_no_collect = partial(TemplateState, collect=False)
|
||||
|
||||
|
||||
def _collect_state(hass: HomeAssistant, entity_id: str) -> None:
|
||||
if (entity_collect := hass.data.get(_RENDER_INFO)) is not None:
|
||||
entity_collect.entities.add(entity_id)
|
||||
|
||||
|
||||
_template_state_no_collect = lru_cache(maxsize=CACHED_TEMPLATE_STATES)(
|
||||
partial(TemplateState, collect=False)
|
||||
)
|
||||
|
||||
|
||||
def _state_generator(
|
||||
hass: HomeAssistant, domain: str | None
|
||||
) -> Generator[TemplateState, None, None]:
|
||||
"""State generator for a domain or all states."""
|
||||
for state in hass.states.async_all(domain):
|
||||
states = hass.states
|
||||
# If domain is None, we want to iterate over all states, but making
|
||||
# a copy of the dict is expensive. So we iterate over the protected
|
||||
# _states dict instead. This is safe because we're not modifying it
|
||||
# and everything is happening in the same thread (MainThread).
|
||||
#
|
||||
# We do not want to expose this method in the public API though to
|
||||
# ensure it does not get misused.
|
||||
#
|
||||
container: Iterable[State]
|
||||
if domain is None:
|
||||
container = states._states.values() # pylint: disable=protected-access
|
||||
else:
|
||||
container = states.async_all(domain)
|
||||
for state in container:
|
||||
yield _template_state_no_collect(hass, state)
|
||||
|
||||
|
||||
|
@ -998,9 +1079,6 @@ def _get_state(hass: HomeAssistant, entity_id: str) -> TemplateState | None:
|
|||
return _get_template_state_from_state(hass, entity_id, hass.states.get(entity_id))
|
||||
|
||||
|
||||
_template_state = lru_cache(maxsize=CACHED_TEMPLATE_STATES)(TemplateState)
|
||||
|
||||
|
||||
def _get_template_state_from_state(
|
||||
hass: HomeAssistant, entity_id: str, state: State | None
|
||||
) -> TemplateState | None:
|
||||
|
|
|
@ -43,7 +43,7 @@ from homeassistant.setup import async_setup_component
|
|||
import homeassistant.util.dt as dt_util
|
||||
from homeassistant.util.unit_system import UnitSystem
|
||||
|
||||
from tests.common import MockConfigEntry
|
||||
from tests.common import MockConfigEntry, async_fire_time_changed
|
||||
|
||||
|
||||
def _set_up_units(hass: HomeAssistant) -> None:
|
||||
|
@ -4497,3 +4497,41 @@ async def test_render_to_info_with_exception(hass: HomeAssistant) -> None:
|
|||
|
||||
assert info.all_states is False
|
||||
assert info.entities == {"test_domain.object"}
|
||||
|
||||
|
||||
async def test_lru_increases_with_many_entities(hass: HomeAssistant) -> None:
|
||||
"""Test that the template internal LRU cache increases with many entities."""
|
||||
# We do not actually want to record 4096 entities so we mock the entity count
|
||||
mock_entity_count = 4096
|
||||
|
||||
assert template.CACHED_TEMPLATE_LRU.get_size() == template.CACHED_TEMPLATE_STATES
|
||||
assert (
|
||||
template.CACHED_TEMPLATE_NO_COLLECT_LRU.get_size()
|
||||
== template.CACHED_TEMPLATE_STATES
|
||||
)
|
||||
|
||||
template.async_setup(hass)
|
||||
with patch.object(
|
||||
hass.states, "async_entity_ids_count", return_value=mock_entity_count
|
||||
):
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(minutes=10))
|
||||
await hass.async_block_till_done()
|
||||
|
||||
assert template.CACHED_TEMPLATE_LRU.get_size() == int(
|
||||
round(mock_entity_count * template.ENTITY_COUNT_GROWTH_FACTOR)
|
||||
)
|
||||
assert template.CACHED_TEMPLATE_NO_COLLECT_LRU.get_size() == int(
|
||||
round(mock_entity_count * template.ENTITY_COUNT_GROWTH_FACTOR)
|
||||
)
|
||||
|
||||
await hass.async_stop()
|
||||
with patch.object(hass.states, "async_entity_ids_count", return_value=8192):
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(minutes=20))
|
||||
await hass.async_block_till_done()
|
||||
|
||||
assert template.CACHED_TEMPLATE_LRU.get_size() == int(
|
||||
round(mock_entity_count * template.ENTITY_COUNT_GROWTH_FACTOR)
|
||||
)
|
||||
assert template.CACHED_TEMPLATE_NO_COLLECT_LRU.get_size() == int(
|
||||
round(mock_entity_count * template.ENTITY_COUNT_GROWTH_FACTOR)
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue