From 91e0395c1c191e3f29983446e6b0f7e084a5ca48 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 23 Jun 2020 12:57:52 -0500 Subject: [PATCH] Optimize database indexes for existing queries (#37036) Cleanup indexes as >50% of the db size was indexes, many of them unused in any current query Logbook search was having to filter event_types without an index: Created ix_events_event_type_time_fired Dropped ix_events_event_type States had a redundant keys on composite index: Dropped ix_states_entity_id Its unused since we have ix_states_entity_id_last_updated De-duplicate storage of context in states as its always stored in events and can be found by joining the state on the event_id. Dropped ix_states_context_id Dropped ix_states_context_parent_id Dropped ix_states_context_user_id After schema v9: STATES............................................ 10186 40.9% EVENTS............................................ 5502 22.1% IX_STATES_ENTITY_ID_LAST_UPDATED.................. 2177 8.7% IX_EVENTS_EVENT_TYPE_TIME_FIRED................... 1910 7.7% IX_EVENTS_CONTEXT_ID.............................. 1592 6.4% IX_EVENTS_TIME_FIRED.............................. 1383 5.6% IX_STATES_LAST_UPDATED............................ 1079 4.3% IX_STATES_EVENT_ID................................ 375 1.5% IX_EVENTS_CONTEXT_PARENT_ID....................... 347 1.4% IX_EVENTS_CONTEXT_USER_ID......................... 346 1.4% IX_RECORDER_RUNS_START_END........................ 1 0.004% RECORDER_RUNS..................................... 1 0.004% SCHEMA_CHANGES.................................... 1 0.004% SQLITE_MASTER..................................... 1 0.004% --- homeassistant/components/history/__init__.py | 7 +--- .../components/recorder/migration.py | 29 +++++++++++++--- homeassistant/components/recorder/models.py | 33 ++++++++++--------- tests/components/recorder/test_init.py | 24 +++++++++----- tests/components/recorder/test_models.py | 3 ++ 5 files changed, 63 insertions(+), 33 deletions(-) diff --git a/homeassistant/components/history/__init__.py b/homeassistant/components/history/__init__.py index 72f0fac481b..2f14f59f34f 100644 --- a/homeassistant/components/history/__init__.py +++ b/homeassistant/components/history/__init__.py @@ -76,8 +76,6 @@ QUERY_STATES = [ States.last_changed, States.last_updated, States.created, - States.context_id, - States.context_user_id, ] @@ -649,9 +647,7 @@ class LazyState(State): def context(self): """State context.""" if not self._context: - self._context = Context( - id=self._row.context_id, user_id=self._row.context_user_id - ) + self._context = Context(id=None) return self._context @property # type: ignore @@ -685,5 +681,4 @@ class LazyState(State): and self.entity_id == other.entity_id and self.state == other.state and self.attributes == other.attributes - and self.context == other.context ) diff --git a/homeassistant/components/recorder/migration.py b/homeassistant/components/recorder/migration.py index 061b568b4ae..15d179f7fd3 100644 --- a/homeassistant/components/recorder/migration.py +++ b/homeassistant/components/recorder/migration.py @@ -64,11 +64,15 @@ def _create_index(engine, table_name, index_name): within the table definition described in the models """ table = Table(table_name, Base.metadata) - _LOGGER.debug("Looking up index for table %s", table_name) + _LOGGER.debug("Looking up index %s for table %s", index_name, table_name) # Look up the index object by name from the table is the models - index = next(idx for idx in table.indexes if idx.name == index_name) + index_list = [idx for idx in table.indexes if idx.name == index_name] + if not index_list: + _LOGGER.debug("The index %s no longer exists", index_name) + return + index = index_list[0] _LOGGER.debug("Creating %s index", index_name) - _LOGGER.info( + _LOGGER.warning( "Adding index `%s` to database. Note: this can take several " "minutes on large databases and slow computers. Please " "be patient!", @@ -155,7 +159,7 @@ def _drop_index(engine, table_name, index_name): def _add_columns(engine, table_name, columns_def): """Add columns to a table.""" - _LOGGER.info( + _LOGGER.warning( "Adding columns %s to table %s. Note: this can take several " "minutes on large databases and slow computers. Please " "be patient!", @@ -254,6 +258,23 @@ def _apply_update(engine, new_version, old_version): _add_columns(engine, "states", ["old_state_id INTEGER"]) _create_index(engine, "states", "ix_states_context_parent_id") _create_index(engine, "events", "ix_events_context_parent_id") + elif new_version == 9: + # We now get the context from events with a join + # since its always there on state_changed events + # + # Ideally we would drop the columns from the states + # table as well but sqlite doesn't support that + # and we would have to move to something like + # sqlalchemy alembic to make that work + # + _drop_index(engine, "states", "ix_states_context_id") + _drop_index(engine, "states", "ix_states_context_user_id") + _drop_index(engine, "states", "ix_states_context_parent_id") + # Redundant keys on composite index: + # We already have ix_states_entity_id_last_updated + _drop_index(engine, "states", "ix_states_entity_id") + _create_index(engine, "events", "ix_events_event_type_time_fired") + _drop_index(engine, "events", "ix_events_event_type") else: raise ValueError(f"No schema migration defined for version {new_version}") diff --git a/homeassistant/components/recorder/models.py b/homeassistant/components/recorder/models.py index 3eac7a3cdb5..03c81726310 100644 --- a/homeassistant/components/recorder/models.py +++ b/homeassistant/components/recorder/models.py @@ -24,7 +24,7 @@ import homeassistant.util.dt as dt_util # pylint: disable=invalid-name Base = declarative_base() -SCHEMA_VERSION = 8 +SCHEMA_VERSION = 9 _LOGGER = logging.getLogger(__name__) @@ -36,7 +36,7 @@ class Events(Base): # type: ignore __tablename__ = "events" event_id = Column(Integer, primary_key=True) - event_type = Column(String(32), index=True) + event_type = Column(String(32)) event_data = Column(Text) origin = Column(String(32)) time_fired = Column(DateTime(timezone=True), index=True) @@ -45,6 +45,12 @@ class Events(Base): # type: ignore context_user_id = Column(String(36), index=True) context_parent_id = Column(String(36), index=True) + __table_args__ = ( + # Used for fetching events at a specific time + # see logbook + Index("ix_events_event_type_time_fired", "event_type", "time_fired"), + ) + @staticmethod def from_event(event): """Create an event database object from a native event.""" @@ -60,7 +66,11 @@ class Events(Base): # type: ignore def to_native(self): """Convert to a natve HA Event.""" - context = Context(id=self.context_id, user_id=self.context_user_id) + context = Context( + id=self.context_id, + user_id=self.context_user_id, + parent_id=self.context_parent_id, + ) try: return Event( self.event_type, @@ -81,16 +91,13 @@ class States(Base): # type: ignore __tablename__ = "states" state_id = Column(Integer, primary_key=True) domain = Column(String(64)) - entity_id = Column(String(255), index=True) + entity_id = Column(String(255)) state = Column(String(255)) attributes = Column(Text) event_id = Column(Integer, ForeignKey("events.event_id"), index=True) last_changed = Column(DateTime(timezone=True), default=dt_util.utcnow) last_updated = Column(DateTime(timezone=True), default=dt_util.utcnow, index=True) created = Column(DateTime(timezone=True), default=dt_util.utcnow) - context_id = Column(String(36), index=True) - context_user_id = Column(String(36), index=True) - context_parent_id = Column(String(36), index=True) old_state_id = Column(Integer) __table_args__ = ( @@ -105,12 +112,7 @@ class States(Base): # type: ignore entity_id = event.data["entity_id"] state = event.data.get("new_state") - dbstate = States( - entity_id=entity_id, - context_id=event.context.id, - context_user_id=event.context.user_id, - context_parent_id=event.context.parent_id, - ) + dbstate = States(entity_id=entity_id) # State got deleted if state is None: @@ -130,7 +132,6 @@ class States(Base): # type: ignore def to_native(self, validate_entity_id=True): """Convert to an HA state object.""" - context = Context(id=self.context_id, user_id=self.context_user_id) try: return State( self.entity_id, @@ -138,7 +139,9 @@ class States(Base): # type: ignore json.loads(self.attributes), process_timestamp(self.last_changed), process_timestamp(self.last_updated), - context=context, + # Join the events table on event_id to get the context instead + # as it will always be there for state_changed events + context=Context(id=None), validate_entity_id=validate_entity_id, ) except ValueError: diff --git a/tests/components/recorder/test_init.py b/tests/components/recorder/test_init.py index f525d2ce39c..862b60b8acb 100644 --- a/tests/components/recorder/test_init.py +++ b/tests/components/recorder/test_init.py @@ -15,7 +15,7 @@ from homeassistant.components.recorder.const import DATA_INSTANCE from homeassistant.components.recorder.models import Events, RecorderRuns, States from homeassistant.components.recorder.util import session_scope from homeassistant.const import MATCH_ALL -from homeassistant.core import ATTR_NOW, EVENT_TIME_CHANGED, callback +from homeassistant.core import ATTR_NOW, EVENT_TIME_CHANGED, Context, callback from homeassistant.setup import async_setup_component from homeassistant.util import dt as dt_util @@ -55,7 +55,7 @@ class TestRecorder(unittest.TestCase): assert db_states[0].event_id > 0 state = db_states[0].to_native() - assert state == self.hass.states.get(entity_id) + assert state == _state_empty_context(self.hass, entity_id) def test_saving_event(self): """Test saving and restoring an event.""" @@ -135,13 +135,21 @@ def _add_events(hass, events): return [ev.to_native() for ev in session.query(Events)] +def _state_empty_context(hass, entity_id): + # We don't restore context unless we need it by joining the + # events table on the event_id for state_changed events + state = hass.states.get(entity_id) + state.context = Context(id=None) + return state + + # pylint: disable=redefined-outer-name,invalid-name def test_saving_state_include_domains(hass_recorder): """Test saving and restoring a state.""" hass = hass_recorder({"include": {"domains": "test2"}}) states = _add_entities(hass, ["test.recorder", "test2.recorder"]) assert len(states) == 1 - assert hass.states.get("test2.recorder") == states[0] + assert _state_empty_context(hass, "test2.recorder") == states[0] def test_saving_state_incl_entities(hass_recorder): @@ -149,7 +157,7 @@ def test_saving_state_incl_entities(hass_recorder): hass = hass_recorder({"include": {"entities": "test2.recorder"}}) states = _add_entities(hass, ["test.recorder", "test2.recorder"]) assert len(states) == 1 - assert hass.states.get("test2.recorder") == states[0] + assert _state_empty_context(hass, "test2.recorder") == states[0] def test_saving_event_exclude_event_type(hass_recorder): @@ -165,7 +173,7 @@ def test_saving_state_exclude_domains(hass_recorder): hass = hass_recorder({"exclude": {"domains": "test"}}) states = _add_entities(hass, ["test.recorder", "test2.recorder"]) assert len(states) == 1 - assert hass.states.get("test2.recorder") == states[0] + assert _state_empty_context(hass, "test2.recorder") == states[0] def test_saving_state_exclude_entities(hass_recorder): @@ -173,7 +181,7 @@ def test_saving_state_exclude_entities(hass_recorder): hass = hass_recorder({"exclude": {"entities": "test.recorder"}}) states = _add_entities(hass, ["test.recorder", "test2.recorder"]) assert len(states) == 1 - assert hass.states.get("test2.recorder") == states[0] + assert _state_empty_context(hass, "test2.recorder") == states[0] def test_saving_state_exclude_domain_include_entity(hass_recorder): @@ -192,8 +200,8 @@ def test_saving_state_include_domain_exclude_entity(hass_recorder): ) states = _add_entities(hass, ["test.recorder", "test2.recorder", "test.ok"]) assert len(states) == 1 - assert hass.states.get("test.ok") == states[0] - assert hass.states.get("test.ok").state == "state2" + assert _state_empty_context(hass, "test.ok") == states[0] + assert _state_empty_context(hass, "test.ok").state == "state2" def test_recorder_setup_failure(): diff --git a/tests/components/recorder/test_models.py b/tests/components/recorder/test_models.py index e56c4193dd5..bf659282e3e 100644 --- a/tests/components/recorder/test_models.py +++ b/tests/components/recorder/test_models.py @@ -68,6 +68,9 @@ class TestStates(unittest.TestCase): {"entity_id": "sensor.temperature", "old_state": None, "new_state": state}, context=state.context, ) + # We don't restore context unless we need it by joining the + # events table on the event_id for state_changed events + state.context = ha.Context(id=None) assert state == States.from_event(event).to_native() def test_from_event_to_delete_state(self):