Optimize database indexes for existing queries (#37036)

Cleanup indexes as >50% of the db size was indexes,
many of them unused in any current query

Logbook search was having to filter event_types without
an index:

  Created ix_events_event_type_time_fired
  Dropped ix_events_event_type

States had a redundant keys on composite index:

  Dropped ix_states_entity_id
  Its unused since we have ix_states_entity_id_last_updated

De-duplicate storage of context in states as
its always stored in events and can be found
by joining the state on the event_id.

  Dropped ix_states_context_id
  Dropped ix_states_context_parent_id
  Dropped ix_states_context_user_id

After schema v9:

STATES............................................ 10186       40.9%
EVENTS............................................ 5502        22.1%
IX_STATES_ENTITY_ID_LAST_UPDATED.................. 2177         8.7%
IX_EVENTS_EVENT_TYPE_TIME_FIRED................... 1910         7.7%
IX_EVENTS_CONTEXT_ID.............................. 1592         6.4%
IX_EVENTS_TIME_FIRED.............................. 1383         5.6%
IX_STATES_LAST_UPDATED............................ 1079         4.3%
IX_STATES_EVENT_ID................................ 375          1.5%
IX_EVENTS_CONTEXT_PARENT_ID....................... 347          1.4%
IX_EVENTS_CONTEXT_USER_ID......................... 346          1.4%
IX_RECORDER_RUNS_START_END........................ 1            0.004%
RECORDER_RUNS..................................... 1            0.004%
SCHEMA_CHANGES.................................... 1            0.004%
SQLITE_MASTER..................................... 1            0.004%
pull/37033/head^2
J. Nick Koston 2020-06-23 12:57:52 -05:00 committed by GitHub
parent a71e0a4b29
commit 91e0395c1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 63 additions and 33 deletions

View File

@ -76,8 +76,6 @@ QUERY_STATES = [
States.last_changed,
States.last_updated,
States.created,
States.context_id,
States.context_user_id,
]
@ -649,9 +647,7 @@ class LazyState(State):
def context(self):
"""State context."""
if not self._context:
self._context = Context(
id=self._row.context_id, user_id=self._row.context_user_id
)
self._context = Context(id=None)
return self._context
@property # type: ignore
@ -685,5 +681,4 @@ class LazyState(State):
and self.entity_id == other.entity_id
and self.state == other.state
and self.attributes == other.attributes
and self.context == other.context
)

View File

@ -64,11 +64,15 @@ def _create_index(engine, table_name, index_name):
within the table definition described in the models
"""
table = Table(table_name, Base.metadata)
_LOGGER.debug("Looking up index for table %s", table_name)
_LOGGER.debug("Looking up index %s for table %s", index_name, table_name)
# Look up the index object by name from the table is the models
index = next(idx for idx in table.indexes if idx.name == index_name)
index_list = [idx for idx in table.indexes if idx.name == index_name]
if not index_list:
_LOGGER.debug("The index %s no longer exists", index_name)
return
index = index_list[0]
_LOGGER.debug("Creating %s index", index_name)
_LOGGER.info(
_LOGGER.warning(
"Adding index `%s` to database. Note: this can take several "
"minutes on large databases and slow computers. Please "
"be patient!",
@ -155,7 +159,7 @@ def _drop_index(engine, table_name, index_name):
def _add_columns(engine, table_name, columns_def):
"""Add columns to a table."""
_LOGGER.info(
_LOGGER.warning(
"Adding columns %s to table %s. Note: this can take several "
"minutes on large databases and slow computers. Please "
"be patient!",
@ -254,6 +258,23 @@ def _apply_update(engine, new_version, old_version):
_add_columns(engine, "states", ["old_state_id INTEGER"])
_create_index(engine, "states", "ix_states_context_parent_id")
_create_index(engine, "events", "ix_events_context_parent_id")
elif new_version == 9:
# We now get the context from events with a join
# since its always there on state_changed events
#
# Ideally we would drop the columns from the states
# table as well but sqlite doesn't support that
# and we would have to move to something like
# sqlalchemy alembic to make that work
#
_drop_index(engine, "states", "ix_states_context_id")
_drop_index(engine, "states", "ix_states_context_user_id")
_drop_index(engine, "states", "ix_states_context_parent_id")
# Redundant keys on composite index:
# We already have ix_states_entity_id_last_updated
_drop_index(engine, "states", "ix_states_entity_id")
_create_index(engine, "events", "ix_events_event_type_time_fired")
_drop_index(engine, "events", "ix_events_event_type")
else:
raise ValueError(f"No schema migration defined for version {new_version}")

View File

@ -24,7 +24,7 @@ import homeassistant.util.dt as dt_util
# pylint: disable=invalid-name
Base = declarative_base()
SCHEMA_VERSION = 8
SCHEMA_VERSION = 9
_LOGGER = logging.getLogger(__name__)
@ -36,7 +36,7 @@ class Events(Base): # type: ignore
__tablename__ = "events"
event_id = Column(Integer, primary_key=True)
event_type = Column(String(32), index=True)
event_type = Column(String(32))
event_data = Column(Text)
origin = Column(String(32))
time_fired = Column(DateTime(timezone=True), index=True)
@ -45,6 +45,12 @@ class Events(Base): # type: ignore
context_user_id = Column(String(36), index=True)
context_parent_id = Column(String(36), index=True)
__table_args__ = (
# Used for fetching events at a specific time
# see logbook
Index("ix_events_event_type_time_fired", "event_type", "time_fired"),
)
@staticmethod
def from_event(event):
"""Create an event database object from a native event."""
@ -60,7 +66,11 @@ class Events(Base): # type: ignore
def to_native(self):
"""Convert to a natve HA Event."""
context = Context(id=self.context_id, user_id=self.context_user_id)
context = Context(
id=self.context_id,
user_id=self.context_user_id,
parent_id=self.context_parent_id,
)
try:
return Event(
self.event_type,
@ -81,16 +91,13 @@ class States(Base): # type: ignore
__tablename__ = "states"
state_id = Column(Integer, primary_key=True)
domain = Column(String(64))
entity_id = Column(String(255), index=True)
entity_id = Column(String(255))
state = Column(String(255))
attributes = Column(Text)
event_id = Column(Integer, ForeignKey("events.event_id"), index=True)
last_changed = Column(DateTime(timezone=True), default=dt_util.utcnow)
last_updated = Column(DateTime(timezone=True), default=dt_util.utcnow, index=True)
created = Column(DateTime(timezone=True), default=dt_util.utcnow)
context_id = Column(String(36), index=True)
context_user_id = Column(String(36), index=True)
context_parent_id = Column(String(36), index=True)
old_state_id = Column(Integer)
__table_args__ = (
@ -105,12 +112,7 @@ class States(Base): # type: ignore
entity_id = event.data["entity_id"]
state = event.data.get("new_state")
dbstate = States(
entity_id=entity_id,
context_id=event.context.id,
context_user_id=event.context.user_id,
context_parent_id=event.context.parent_id,
)
dbstate = States(entity_id=entity_id)
# State got deleted
if state is None:
@ -130,7 +132,6 @@ class States(Base): # type: ignore
def to_native(self, validate_entity_id=True):
"""Convert to an HA state object."""
context = Context(id=self.context_id, user_id=self.context_user_id)
try:
return State(
self.entity_id,
@ -138,7 +139,9 @@ class States(Base): # type: ignore
json.loads(self.attributes),
process_timestamp(self.last_changed),
process_timestamp(self.last_updated),
context=context,
# Join the events table on event_id to get the context instead
# as it will always be there for state_changed events
context=Context(id=None),
validate_entity_id=validate_entity_id,
)
except ValueError:

View File

@ -15,7 +15,7 @@ from homeassistant.components.recorder.const import DATA_INSTANCE
from homeassistant.components.recorder.models import Events, RecorderRuns, States
from homeassistant.components.recorder.util import session_scope
from homeassistant.const import MATCH_ALL
from homeassistant.core import ATTR_NOW, EVENT_TIME_CHANGED, callback
from homeassistant.core import ATTR_NOW, EVENT_TIME_CHANGED, Context, callback
from homeassistant.setup import async_setup_component
from homeassistant.util import dt as dt_util
@ -55,7 +55,7 @@ class TestRecorder(unittest.TestCase):
assert db_states[0].event_id > 0
state = db_states[0].to_native()
assert state == self.hass.states.get(entity_id)
assert state == _state_empty_context(self.hass, entity_id)
def test_saving_event(self):
"""Test saving and restoring an event."""
@ -135,13 +135,21 @@ def _add_events(hass, events):
return [ev.to_native() for ev in session.query(Events)]
def _state_empty_context(hass, entity_id):
# We don't restore context unless we need it by joining the
# events table on the event_id for state_changed events
state = hass.states.get(entity_id)
state.context = Context(id=None)
return state
# pylint: disable=redefined-outer-name,invalid-name
def test_saving_state_include_domains(hass_recorder):
"""Test saving and restoring a state."""
hass = hass_recorder({"include": {"domains": "test2"}})
states = _add_entities(hass, ["test.recorder", "test2.recorder"])
assert len(states) == 1
assert hass.states.get("test2.recorder") == states[0]
assert _state_empty_context(hass, "test2.recorder") == states[0]
def test_saving_state_incl_entities(hass_recorder):
@ -149,7 +157,7 @@ def test_saving_state_incl_entities(hass_recorder):
hass = hass_recorder({"include": {"entities": "test2.recorder"}})
states = _add_entities(hass, ["test.recorder", "test2.recorder"])
assert len(states) == 1
assert hass.states.get("test2.recorder") == states[0]
assert _state_empty_context(hass, "test2.recorder") == states[0]
def test_saving_event_exclude_event_type(hass_recorder):
@ -165,7 +173,7 @@ def test_saving_state_exclude_domains(hass_recorder):
hass = hass_recorder({"exclude": {"domains": "test"}})
states = _add_entities(hass, ["test.recorder", "test2.recorder"])
assert len(states) == 1
assert hass.states.get("test2.recorder") == states[0]
assert _state_empty_context(hass, "test2.recorder") == states[0]
def test_saving_state_exclude_entities(hass_recorder):
@ -173,7 +181,7 @@ def test_saving_state_exclude_entities(hass_recorder):
hass = hass_recorder({"exclude": {"entities": "test.recorder"}})
states = _add_entities(hass, ["test.recorder", "test2.recorder"])
assert len(states) == 1
assert hass.states.get("test2.recorder") == states[0]
assert _state_empty_context(hass, "test2.recorder") == states[0]
def test_saving_state_exclude_domain_include_entity(hass_recorder):
@ -192,8 +200,8 @@ def test_saving_state_include_domain_exclude_entity(hass_recorder):
)
states = _add_entities(hass, ["test.recorder", "test2.recorder", "test.ok"])
assert len(states) == 1
assert hass.states.get("test.ok") == states[0]
assert hass.states.get("test.ok").state == "state2"
assert _state_empty_context(hass, "test.ok") == states[0]
assert _state_empty_context(hass, "test.ok").state == "state2"
def test_recorder_setup_failure():

View File

@ -68,6 +68,9 @@ class TestStates(unittest.TestCase):
{"entity_id": "sensor.temperature", "old_state": None, "new_state": state},
context=state.context,
)
# We don't restore context unless we need it by joining the
# events table on the event_id for state_changed events
state.context = ha.Context(id=None)
assert state == States.from_event(event).to_native()
def test_from_event_to_delete_state(self):