diff --git a/homeassistant/components/recorder/models/database.py b/homeassistant/components/recorder/models/database.py index 94c5a7cc027..b86fd299793 100644 --- a/homeassistant/components/recorder/models/database.py +++ b/homeassistant/components/recorder/models/database.py @@ -32,4 +32,8 @@ class DatabaseOptimizer: # # https://jira.mariadb.org/browse/MDEV-25020 # + # PostgreSQL does not support a skip/loose index scan so its + # also slow for large distinct queries: + # https://wiki.postgresql.org/wiki/Loose_indexscan + # https://github.com/home-assistant/core/issues/126084 slow_range_in_select: bool diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 11f5accc978..881952c390d 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -346,6 +346,10 @@ def _select_unused_attributes_ids( # We now break the query into groups of 100 and use a lambda_stmt to ensure # that the query is only cached once. # + # PostgreSQL also suffers from the same issue as older MariaDB with the distinct query + # when the database gets large because it doesn't support skip/loose index scan. + # https://wiki.postgresql.org/wiki/Loose_indexscan + # https://github.com/home-assistant/core/issues/126084 groups = [iter(attributes_ids)] * 100 for attr_ids in zip_longest(*groups, fillvalue=None): seen_ids |= { diff --git a/homeassistant/components/recorder/queries.py b/homeassistant/components/recorder/queries.py index 34e9ec32f99..7ac4c19bc94 100644 --- a/homeassistant/components/recorder/queries.py +++ b/homeassistant/components/recorder/queries.py @@ -78,7 +78,7 @@ def find_states_metadata_ids(entity_ids: Iterable[str]) -> StatementLambdaElemen def _state_attrs_exist(attr: int | None) -> Select: """Check if a state attributes id exists in the states table.""" - return select(func.min(States.attributes_id)).where(States.attributes_id == attr) + return select(States.attributes_id).where(States.attributes_id == attr).limit(1) def attributes_ids_exist_in_states_with_fast_in_distinct( @@ -315,7 +315,7 @@ def data_ids_exist_in_events_with_fast_in_distinct( def _event_data_id_exist(data_id: int | None) -> Select: """Check if a event data id exists in the events table.""" - return select(func.min(Events.data_id)).where(Events.data_id == data_id) + return select(Events.data_id).where(Events.data_id == data_id).limit(1) def data_ids_exist_in_events( diff --git a/homeassistant/components/recorder/util.py b/homeassistant/components/recorder/util.py index ba4c5194689..4cf24eb79c5 100644 --- a/homeassistant/components/recorder/util.py +++ b/homeassistant/components/recorder/util.py @@ -600,6 +600,12 @@ def setup_connection_for_dialect( execute_on_connection(dbapi_connection, "SET time_zone = '+00:00'") elif dialect_name == SupportedDialect.POSTGRESQL: max_bind_vars = DEFAULT_MAX_BIND_VARS + # PostgreSQL does not support a skip/loose index scan so its + # also slow for large distinct queries: + # https://wiki.postgresql.org/wiki/Loose_indexscan + # https://github.com/home-assistant/core/issues/126084 + # so we set slow_range_in_select to True + slow_range_in_select = True if first_connection: # server_version_num was added in 2006 result = query_on_connection(dbapi_connection, "SHOW server_version") diff --git a/tests/components/recorder/test_util.py b/tests/components/recorder/test_util.py index 99bd5083489..aeeeba1865a 100644 --- a/tests/components/recorder/test_util.py +++ b/tests/components/recorder/test_util.py @@ -502,7 +502,7 @@ def test_supported_pgsql(caplog: pytest.LogCaptureFixture, pgsql_version) -> Non assert "minimum supported version" not in caplog.text assert database_engine is not None - assert database_engine.optimizer.slow_range_in_select is False + assert database_engine.optimizer.slow_range_in_select is True @pytest.mark.parametrize(