Improve purge performance for PostgreSQL with large databases (#133699)
parent
02785a4ded
commit
43fab48d4e
|
@ -32,4 +32,8 @@ class DatabaseOptimizer:
|
|||
#
|
||||
# https://jira.mariadb.org/browse/MDEV-25020
|
||||
#
|
||||
# PostgreSQL does not support a skip/loose index scan so its
|
||||
# also slow for large distinct queries:
|
||||
# https://wiki.postgresql.org/wiki/Loose_indexscan
|
||||
# https://github.com/home-assistant/core/issues/126084
|
||||
slow_range_in_select: bool
|
||||
|
|
|
@ -346,6 +346,10 @@ def _select_unused_attributes_ids(
|
|||
# We now break the query into groups of 100 and use a lambda_stmt to ensure
|
||||
# that the query is only cached once.
|
||||
#
|
||||
# PostgreSQL also suffers from the same issue as older MariaDB with the distinct query
|
||||
# when the database gets large because it doesn't support skip/loose index scan.
|
||||
# https://wiki.postgresql.org/wiki/Loose_indexscan
|
||||
# https://github.com/home-assistant/core/issues/126084
|
||||
groups = [iter(attributes_ids)] * 100
|
||||
for attr_ids in zip_longest(*groups, fillvalue=None):
|
||||
seen_ids |= {
|
||||
|
|
|
@ -78,7 +78,7 @@ def find_states_metadata_ids(entity_ids: Iterable[str]) -> StatementLambdaElemen
|
|||
|
||||
def _state_attrs_exist(attr: int | None) -> Select:
|
||||
"""Check if a state attributes id exists in the states table."""
|
||||
return select(func.min(States.attributes_id)).where(States.attributes_id == attr)
|
||||
return select(States.attributes_id).where(States.attributes_id == attr).limit(1)
|
||||
|
||||
|
||||
def attributes_ids_exist_in_states_with_fast_in_distinct(
|
||||
|
@ -315,7 +315,7 @@ def data_ids_exist_in_events_with_fast_in_distinct(
|
|||
|
||||
def _event_data_id_exist(data_id: int | None) -> Select:
|
||||
"""Check if a event data id exists in the events table."""
|
||||
return select(func.min(Events.data_id)).where(Events.data_id == data_id)
|
||||
return select(Events.data_id).where(Events.data_id == data_id).limit(1)
|
||||
|
||||
|
||||
def data_ids_exist_in_events(
|
||||
|
|
|
@ -600,6 +600,12 @@ def setup_connection_for_dialect(
|
|||
execute_on_connection(dbapi_connection, "SET time_zone = '+00:00'")
|
||||
elif dialect_name == SupportedDialect.POSTGRESQL:
|
||||
max_bind_vars = DEFAULT_MAX_BIND_VARS
|
||||
# PostgreSQL does not support a skip/loose index scan so its
|
||||
# also slow for large distinct queries:
|
||||
# https://wiki.postgresql.org/wiki/Loose_indexscan
|
||||
# https://github.com/home-assistant/core/issues/126084
|
||||
# so we set slow_range_in_select to True
|
||||
slow_range_in_select = True
|
||||
if first_connection:
|
||||
# server_version_num was added in 2006
|
||||
result = query_on_connection(dbapi_connection, "SHOW server_version")
|
||||
|
|
|
@ -502,7 +502,7 @@ def test_supported_pgsql(caplog: pytest.LogCaptureFixture, pgsql_version) -> Non
|
|||
|
||||
assert "minimum supported version" not in caplog.text
|
||||
assert database_engine is not None
|
||||
assert database_engine.optimizer.slow_range_in_select is False
|
||||
assert database_engine.optimizer.slow_range_in_select is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
Loading…
Reference in New Issue