552 lines
18 KiB
Python
552 lines
18 KiB
Python
"""The profiler integration."""
|
|
import asyncio
|
|
from contextlib import suppress
|
|
from datetime import timedelta
|
|
from functools import _lru_cache_wrapper
|
|
import logging
|
|
import reprlib
|
|
import sys
|
|
import threading
|
|
import time
|
|
import traceback
|
|
from typing import Any, cast
|
|
|
|
from lru import LRU
|
|
import voluptuous as vol
|
|
|
|
from homeassistant.components import persistent_notification
|
|
from homeassistant.config_entries import ConfigEntry
|
|
from homeassistant.const import CONF_SCAN_INTERVAL, CONF_TYPE
|
|
from homeassistant.core import HomeAssistant, ServiceCall, callback
|
|
from homeassistant.exceptions import HomeAssistantError
|
|
import homeassistant.helpers.config_validation as cv
|
|
from homeassistant.helpers.event import async_track_time_interval
|
|
from homeassistant.helpers.service import async_register_admin_service
|
|
|
|
from .const import DOMAIN
|
|
|
|
SERVICE_START = "start"
|
|
SERVICE_MEMORY = "memory"
|
|
SERVICE_START_LOG_OBJECTS = "start_log_objects"
|
|
SERVICE_STOP_LOG_OBJECTS = "stop_log_objects"
|
|
SERVICE_START_LOG_OBJECT_SOURCES = "start_log_object_sources"
|
|
SERVICE_STOP_LOG_OBJECT_SOURCES = "stop_log_object_sources"
|
|
SERVICE_DUMP_LOG_OBJECTS = "dump_log_objects"
|
|
SERVICE_LRU_STATS = "lru_stats"
|
|
SERVICE_LOG_THREAD_FRAMES = "log_thread_frames"
|
|
SERVICE_LOG_EVENT_LOOP_SCHEDULED = "log_event_loop_scheduled"
|
|
|
|
_LRU_CACHE_WRAPPER_OBJECT = _lru_cache_wrapper.__name__
|
|
_SQLALCHEMY_LRU_OBJECT = "LRUCache"
|
|
|
|
_KNOWN_LRU_CLASSES = (
|
|
"EventDataManager",
|
|
"EventTypeManager",
|
|
"StatesMetaManager",
|
|
"StateAttributesManager",
|
|
"StatisticsMetaManager",
|
|
"IntegrationMatcher",
|
|
)
|
|
|
|
SERVICES = (
|
|
SERVICE_START,
|
|
SERVICE_MEMORY,
|
|
SERVICE_START_LOG_OBJECTS,
|
|
SERVICE_STOP_LOG_OBJECTS,
|
|
SERVICE_DUMP_LOG_OBJECTS,
|
|
SERVICE_LRU_STATS,
|
|
SERVICE_LOG_THREAD_FRAMES,
|
|
SERVICE_LOG_EVENT_LOOP_SCHEDULED,
|
|
)
|
|
|
|
DEFAULT_SCAN_INTERVAL = timedelta(seconds=30)
|
|
|
|
DEFAULT_MAX_OBJECTS = 5
|
|
|
|
CONF_SECONDS = "seconds"
|
|
CONF_MAX_OBJECTS = "max_objects"
|
|
|
|
LOG_INTERVAL_SUB = "log_interval_subscription"
|
|
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
async def async_setup_entry( # noqa: C901
|
|
hass: HomeAssistant, entry: ConfigEntry
|
|
) -> bool:
|
|
"""Set up Profiler from a config entry."""
|
|
lock = asyncio.Lock()
|
|
domain_data = hass.data[DOMAIN] = {}
|
|
|
|
async def _async_run_profile(call: ServiceCall) -> None:
|
|
async with lock:
|
|
await _async_generate_profile(hass, call)
|
|
|
|
async def _async_run_memory_profile(call: ServiceCall) -> None:
|
|
async with lock:
|
|
await _async_generate_memory_profile(hass, call)
|
|
|
|
async def _async_start_log_objects(call: ServiceCall) -> None:
|
|
if LOG_INTERVAL_SUB in domain_data:
|
|
raise HomeAssistantError("Object logging already started")
|
|
|
|
persistent_notification.async_create(
|
|
hass,
|
|
(
|
|
"Object growth logging has started. See [the logs](/config/logs) to"
|
|
" track the growth of new objects."
|
|
),
|
|
title="Object growth logging started",
|
|
notification_id="profile_object_logging",
|
|
)
|
|
await hass.async_add_executor_job(_log_objects)
|
|
domain_data[LOG_INTERVAL_SUB] = async_track_time_interval(
|
|
hass, _log_objects, call.data[CONF_SCAN_INTERVAL]
|
|
)
|
|
|
|
async def _async_stop_log_objects(call: ServiceCall) -> None:
|
|
if LOG_INTERVAL_SUB not in domain_data:
|
|
raise HomeAssistantError("Object logging not running")
|
|
|
|
persistent_notification.async_dismiss(hass, "profile_object_logging")
|
|
domain_data.pop(LOG_INTERVAL_SUB)()
|
|
|
|
async def _async_start_object_sources(call: ServiceCall) -> None:
|
|
if LOG_INTERVAL_SUB in domain_data:
|
|
raise HomeAssistantError("Object logging already started")
|
|
|
|
persistent_notification.async_create(
|
|
hass,
|
|
(
|
|
"Object source logging has started. See [the logs](/config/logs) to"
|
|
" track the growth of new objects."
|
|
),
|
|
title="Object source logging started",
|
|
notification_id="profile_object_source_logging",
|
|
)
|
|
|
|
last_ids: set[int] = set()
|
|
last_stats: dict[str, int] = {}
|
|
|
|
async def _log_object_sources_with_max(*_: Any) -> None:
|
|
await hass.async_add_executor_job(
|
|
_log_object_sources, call.data[CONF_MAX_OBJECTS], last_ids, last_stats
|
|
)
|
|
|
|
await _log_object_sources_with_max()
|
|
cancel_track = async_track_time_interval(
|
|
hass, _log_object_sources_with_max, call.data[CONF_SCAN_INTERVAL]
|
|
)
|
|
|
|
@callback
|
|
def _cancel():
|
|
cancel_track()
|
|
last_ids.clear()
|
|
last_stats.clear()
|
|
|
|
domain_data[LOG_INTERVAL_SUB] = _cancel
|
|
|
|
@callback
|
|
def _async_stop_object_sources(call: ServiceCall) -> None:
|
|
if LOG_INTERVAL_SUB not in domain_data:
|
|
raise HomeAssistantError("Object logging not running")
|
|
|
|
persistent_notification.async_dismiss(hass, "profile_object_source_logging")
|
|
domain_data.pop(LOG_INTERVAL_SUB)()
|
|
|
|
def _dump_log_objects(call: ServiceCall) -> None:
|
|
# Imports deferred to avoid loading modules
|
|
# in memory since usually only one part of this
|
|
# integration is used at a time
|
|
import objgraph # pylint: disable=import-outside-toplevel
|
|
|
|
obj_type = call.data[CONF_TYPE]
|
|
|
|
for obj in objgraph.by_type(obj_type):
|
|
_LOGGER.critical(
|
|
"%s object in memory: %s",
|
|
obj_type,
|
|
_safe_repr(obj),
|
|
)
|
|
|
|
persistent_notification.create(
|
|
hass,
|
|
(
|
|
f"Objects with type {obj_type} have been dumped to the log. See [the"
|
|
" logs](/config/logs) to review the repr of the objects."
|
|
),
|
|
title="Object dump completed",
|
|
notification_id="profile_object_dump",
|
|
)
|
|
|
|
def _lru_stats(call: ServiceCall) -> None:
|
|
"""Log the stats of all lru caches."""
|
|
# Imports deferred to avoid loading modules
|
|
# in memory since usually only one part of this
|
|
# integration is used at a time
|
|
import objgraph # pylint: disable=import-outside-toplevel
|
|
|
|
for lru in objgraph.by_type(_LRU_CACHE_WRAPPER_OBJECT):
|
|
lru = cast(_lru_cache_wrapper, lru)
|
|
_LOGGER.critical(
|
|
"Cache stats for lru_cache %s at %s: %s",
|
|
lru.__wrapped__,
|
|
_get_function_absfile(lru.__wrapped__) or "unknown",
|
|
lru.cache_info(),
|
|
)
|
|
|
|
for _class in _KNOWN_LRU_CLASSES:
|
|
for class_with_lru_attr in objgraph.by_type(_class):
|
|
for maybe_lru in class_with_lru_attr.__dict__.values():
|
|
if isinstance(maybe_lru, LRU):
|
|
_LOGGER.critical(
|
|
"Cache stats for LRU %s at %s: %s",
|
|
type(class_with_lru_attr),
|
|
_get_function_absfile(class_with_lru_attr) or "unknown",
|
|
maybe_lru.get_stats(),
|
|
)
|
|
|
|
for lru in objgraph.by_type(_SQLALCHEMY_LRU_OBJECT):
|
|
if (data := getattr(lru, "_data", None)) and isinstance(data, dict):
|
|
for key, value in dict(data).items():
|
|
_LOGGER.critical(
|
|
"Cache data for sqlalchemy LRUCache %s: %s: %s", lru, key, value
|
|
)
|
|
|
|
persistent_notification.create(
|
|
hass,
|
|
(
|
|
"LRU cache states have been dumped to the log. See [the"
|
|
" logs](/config/logs) to review the stats."
|
|
),
|
|
title="LRU stats completed",
|
|
notification_id="profile_lru_stats",
|
|
)
|
|
|
|
async def _async_dump_thread_frames(call: ServiceCall) -> None:
|
|
"""Log all thread frames."""
|
|
frames = sys._current_frames() # pylint: disable=protected-access
|
|
main_thread = threading.main_thread()
|
|
for thread in threading.enumerate():
|
|
if thread == main_thread:
|
|
continue
|
|
ident = cast(int, thread.ident)
|
|
_LOGGER.critical(
|
|
"Thread [%s]: %s",
|
|
thread.name,
|
|
"".join(traceback.format_stack(frames.get(ident))).strip(),
|
|
)
|
|
|
|
async def _async_dump_scheduled(call: ServiceCall) -> None:
|
|
"""Log all scheduled in the event loop."""
|
|
arepr = reprlib.aRepr
|
|
original_maxstring = arepr.maxstring
|
|
original_maxother = arepr.maxother
|
|
arepr.maxstring = 300
|
|
arepr.maxother = 300
|
|
handle: asyncio.Handle
|
|
try:
|
|
for handle in getattr(hass.loop, "_scheduled"):
|
|
if not handle.cancelled():
|
|
_LOGGER.critical("Scheduled: %s", handle)
|
|
finally:
|
|
arepr.maxstring = original_maxstring
|
|
arepr.maxother = original_maxother
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_START,
|
|
_async_run_profile,
|
|
schema=vol.Schema(
|
|
{vol.Optional(CONF_SECONDS, default=60.0): vol.Coerce(float)}
|
|
),
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_MEMORY,
|
|
_async_run_memory_profile,
|
|
schema=vol.Schema(
|
|
{vol.Optional(CONF_SECONDS, default=60.0): vol.Coerce(float)}
|
|
),
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_START_LOG_OBJECTS,
|
|
_async_start_log_objects,
|
|
schema=vol.Schema(
|
|
{
|
|
vol.Optional(
|
|
CONF_SCAN_INTERVAL, default=DEFAULT_SCAN_INTERVAL
|
|
): cv.time_period
|
|
}
|
|
),
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_STOP_LOG_OBJECTS,
|
|
_async_stop_log_objects,
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_START_LOG_OBJECT_SOURCES,
|
|
_async_start_object_sources,
|
|
schema=vol.Schema(
|
|
{
|
|
vol.Optional(
|
|
CONF_SCAN_INTERVAL, default=DEFAULT_SCAN_INTERVAL
|
|
): cv.time_period,
|
|
vol.Optional(CONF_MAX_OBJECTS, default=DEFAULT_MAX_OBJECTS): vol.Range(
|
|
min=1, max=1024
|
|
),
|
|
}
|
|
),
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_STOP_LOG_OBJECT_SOURCES,
|
|
_async_stop_object_sources,
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_DUMP_LOG_OBJECTS,
|
|
_dump_log_objects,
|
|
schema=vol.Schema({vol.Required(CONF_TYPE): str}),
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_LRU_STATS,
|
|
_lru_stats,
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_LOG_THREAD_FRAMES,
|
|
_async_dump_thread_frames,
|
|
)
|
|
|
|
async_register_admin_service(
|
|
hass,
|
|
DOMAIN,
|
|
SERVICE_LOG_EVENT_LOOP_SCHEDULED,
|
|
_async_dump_scheduled,
|
|
)
|
|
|
|
return True
|
|
|
|
|
|
async def async_unload_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
|
|
"""Unload a config entry."""
|
|
for service in SERVICES:
|
|
hass.services.async_remove(domain=DOMAIN, service=service)
|
|
if LOG_INTERVAL_SUB in hass.data[DOMAIN]:
|
|
hass.data[DOMAIN][LOG_INTERVAL_SUB]()
|
|
hass.data.pop(DOMAIN)
|
|
return True
|
|
|
|
|
|
async def _async_generate_profile(hass: HomeAssistant, call: ServiceCall):
|
|
# Imports deferred to avoid loading modules
|
|
# in memory since usually only one part of this
|
|
# integration is used at a time
|
|
import cProfile # pylint: disable=import-outside-toplevel
|
|
|
|
start_time = int(time.time() * 1000000)
|
|
persistent_notification.async_create(
|
|
hass,
|
|
(
|
|
"The profile has started. This notification will be updated when it is"
|
|
" complete."
|
|
),
|
|
title="Profile Started",
|
|
notification_id=f"profiler_{start_time}",
|
|
)
|
|
profiler = cProfile.Profile()
|
|
profiler.enable()
|
|
await asyncio.sleep(float(call.data[CONF_SECONDS]))
|
|
profiler.disable()
|
|
|
|
cprofile_path = hass.config.path(f"profile.{start_time}.cprof")
|
|
callgrind_path = hass.config.path(f"callgrind.out.{start_time}")
|
|
await hass.async_add_executor_job(
|
|
_write_profile, profiler, cprofile_path, callgrind_path
|
|
)
|
|
persistent_notification.async_create(
|
|
hass,
|
|
(
|
|
f"Wrote cProfile data to {cprofile_path} and callgrind data to"
|
|
f" {callgrind_path}"
|
|
),
|
|
title="Profile Complete",
|
|
notification_id=f"profiler_{start_time}",
|
|
)
|
|
|
|
|
|
async def _async_generate_memory_profile(hass: HomeAssistant, call: ServiceCall):
|
|
# Imports deferred to avoid loading modules
|
|
# in memory since usually only one part of this
|
|
# integration is used at a time
|
|
from guppy import hpy # pylint: disable=import-outside-toplevel
|
|
|
|
start_time = int(time.time() * 1000000)
|
|
persistent_notification.async_create(
|
|
hass,
|
|
(
|
|
"The memory profile has started. This notification will be updated when it"
|
|
" is complete."
|
|
),
|
|
title="Profile Started",
|
|
notification_id=f"memory_profiler_{start_time}",
|
|
)
|
|
heap_profiler = hpy()
|
|
heap_profiler.setref()
|
|
await asyncio.sleep(float(call.data[CONF_SECONDS]))
|
|
heap = heap_profiler.heap()
|
|
|
|
heap_path = hass.config.path(f"heap_profile.{start_time}.hpy")
|
|
await hass.async_add_executor_job(_write_memory_profile, heap, heap_path)
|
|
persistent_notification.async_create(
|
|
hass,
|
|
f"Wrote heapy memory profile to {heap_path}",
|
|
title="Profile Complete",
|
|
notification_id=f"memory_profiler_{start_time}",
|
|
)
|
|
|
|
|
|
def _write_profile(profiler, cprofile_path, callgrind_path):
|
|
# Imports deferred to avoid loading modules
|
|
# in memory since usually only one part of this
|
|
# integration is used at a time
|
|
from pyprof2calltree import convert # pylint: disable=import-outside-toplevel
|
|
|
|
profiler.create_stats()
|
|
profiler.dump_stats(cprofile_path)
|
|
convert(profiler.getstats(), callgrind_path)
|
|
|
|
|
|
def _write_memory_profile(heap, heap_path):
|
|
heap.byrcs.dump(heap_path)
|
|
|
|
|
|
def _log_objects(*_):
|
|
# Imports deferred to avoid loading modules
|
|
# in memory since usually only one part of this
|
|
# integration is used at a time
|
|
import objgraph # pylint: disable=import-outside-toplevel
|
|
|
|
_LOGGER.critical("Memory Growth: %s", objgraph.growth(limit=1000))
|
|
|
|
|
|
def _get_function_absfile(func: Any) -> str | None:
|
|
"""Get the absolute file path of a function."""
|
|
import inspect # pylint: disable=import-outside-toplevel
|
|
|
|
abs_file: str | None = None
|
|
with suppress(Exception):
|
|
abs_file = inspect.getabsfile(func)
|
|
return abs_file
|
|
|
|
|
|
def _safe_repr(obj: Any) -> str:
|
|
"""Get the repr of an object but keep going if there is an exception.
|
|
|
|
We wrap repr to ensure if one object cannot be serialized, we can
|
|
still get the rest.
|
|
"""
|
|
try:
|
|
return repr(obj)
|
|
except Exception: # pylint: disable=broad-except
|
|
return f"Failed to serialize {type(obj)}"
|
|
|
|
|
|
def _find_backrefs_not_to_self(_object: Any) -> list[str]:
|
|
import objgraph # pylint: disable=import-outside-toplevel
|
|
|
|
return [
|
|
_safe_repr(backref)
|
|
for backref in objgraph.find_backref_chain(
|
|
_object, lambda obj: obj is not _object
|
|
)
|
|
]
|
|
|
|
|
|
def _log_object_sources(
|
|
max_objects: int, last_ids: set[int], last_stats: dict[str, int]
|
|
) -> None:
|
|
# Imports deferred to avoid loading modules
|
|
# in memory since usually only one part of this
|
|
# integration is used at a time
|
|
import gc # pylint: disable=import-outside-toplevel
|
|
|
|
gc.collect()
|
|
|
|
objects = gc.get_objects()
|
|
new_objects: list[object] = []
|
|
new_objects_overflow: dict[str, int] = {}
|
|
current_ids = set()
|
|
new_stats: dict[str, int] = {}
|
|
had_new_object_growth = False
|
|
try:
|
|
for _object in objects:
|
|
object_type = type(_object).__name__
|
|
new_stats[object_type] = new_stats.get(object_type, 0) + 1
|
|
|
|
for _object in objects:
|
|
id_ = id(_object)
|
|
current_ids.add(id_)
|
|
if id_ in last_ids:
|
|
continue
|
|
object_type = type(_object).__name__
|
|
if last_stats.get(object_type, 0) < new_stats[object_type]:
|
|
if len(new_objects) < max_objects:
|
|
new_objects.append(_object)
|
|
else:
|
|
new_objects_overflow.setdefault(object_type, 0)
|
|
new_objects_overflow[object_type] += 1
|
|
|
|
for _object in new_objects:
|
|
had_new_object_growth = True
|
|
object_type = type(_object).__name__
|
|
_LOGGER.critical(
|
|
"New object %s (%s/%s) at %s: %s",
|
|
object_type,
|
|
last_stats.get(object_type, 0),
|
|
new_stats[object_type],
|
|
_get_function_absfile(_object) or _find_backrefs_not_to_self(_object),
|
|
_safe_repr(_object),
|
|
)
|
|
|
|
for object_type, count in last_stats.items():
|
|
new_stats[object_type] = max(new_stats.get(object_type, 0), count)
|
|
finally:
|
|
# Break reference cycles
|
|
del objects
|
|
del new_objects
|
|
last_ids.clear()
|
|
last_ids.update(current_ids)
|
|
last_stats.clear()
|
|
last_stats.update(new_stats)
|
|
del new_stats
|
|
del current_ids
|
|
|
|
if new_objects_overflow:
|
|
_LOGGER.critical("New objects overflowed by %s", new_objects_overflow)
|
|
elif not had_new_object_growth:
|
|
_LOGGER.critical("No new object growth found")
|