From 3d0073534199bac2af13d2c3e460aab2a0480b18 Mon Sep 17 00:00:00 2001 From: Sean Dague Date: Sat, 23 Jan 2016 07:00:52 -0500 Subject: [PATCH 1/2] Add recording of domain to state tables Some domains, like thermostat, need all state records, not just state change ones, to provide accurate graphs. This introduces a new db migration which adds a 'domain' column to all states so that is a fast query. Indexes were added to help with query performance. This includes a data migration which post-date populates domain. On large HA dbs this might take real time, as it has to touch every state row. 100 MB db (91k states) updated in a couple of seconds on my reasonably fast server. Be forewarned. This is part of bug #881 --- homeassistant/components/recorder.py | 39 +++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/homeassistant/components/recorder.py b/homeassistant/components/recorder.py index 802634715e9..90a4b00cc87 100644 --- a/homeassistant/components/recorder.py +++ b/homeassistant/components/recorder.py @@ -226,22 +226,28 @@ class Recorder(threading.Thread): # State got deleted if state is None: state_state = '' + state_domain = '' state_attr = '{}' last_changed = last_updated = now else: + state_domain = state.domain state_state = state.state state_attr = json.dumps(state.attributes) last_changed = state.last_changed last_updated = state.last_updated info = ( - entity_id, state_state, state_attr, last_changed, last_updated, + entity_id, state_domain, state_state, state_attr, + last_changed, last_updated, now, self.utc_offset, event_id) self.query( - "INSERT INTO states (" - "entity_id, state, attributes, last_changed, last_updated," - "created, utc_offset, event_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + """ + INSERT INTO states ( + entity_id, domain, state, attributes, last_changed, last_updated, + created, utc_offset, event_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, info) def record_event(self, event): @@ -404,6 +410,31 @@ class Recorder(threading.Thread): save_migration(4) + if migration_id < 5: + # Add domain so that thermostat graphs look right + self.query(""" + ALTER TABLE states + ADD COLUMN domain text + """) + + # populate domain with defaults + rows = self.query("select distinct entity_id from states") + for row in rows: + entity_id = row[0] + domain = entity_id.split(".")[0] + self.query( + "UPDATE states set domain=? where entity_id=?", + domain, entity_id) + + # add indexes we are going to use a lot on selects + self.query(""" + CREATE INDEX states__state_changes ON + states (last_changed, last_updated, entity_id)""") + self.query(""" + CREATE INDEX states__significant_changes ON + states (domain, last_updated, entity_id)""") + save_migration(5) + def _close_connection(self): """ Close connection to the database. """ _LOGGER.info("Closing database") From abc253c4c582438c6a42135799393f285e23c2e7 Mon Sep 17 00:00:00 2001 From: Sean Dague Date: Sat, 23 Jan 2016 15:36:43 -0500 Subject: [PATCH 2/2] implement get_significant_states This adds a new function to history module which returns significant states. For most domains this is the list of state changes. For the thermostat domain this also includes attribute changes, so that changes in the current_temperature are exposed to the graphing layer. Closes #881 --- homeassistant/components/history.py | 77 +++++++++++++++++++++++------ tests/components/test_history.py | 59 +++++++++++++++++++++- 2 files changed, 120 insertions(+), 16 deletions(-) diff --git a/homeassistant/components/history.py b/homeassistant/components/history.py index b71f2de7398..d07fc518083 100644 --- a/homeassistant/components/history.py +++ b/homeassistant/components/history.py @@ -18,6 +18,8 @@ from homeassistant.const import HTTP_BAD_REQUEST DOMAIN = 'history' DEPENDENCIES = ['recorder', 'http'] +SIGNIFICANT_DOMAINS = ('thermostat',) + URL_HISTORY_PERIOD = re.compile( r'/api/history/period(?:/(?P\d{4}-\d{1,2}-\d{1,2})|)') @@ -35,6 +37,37 @@ def last_5_states(entity_id): return recorder.query_states(query, (entity_id, )) +def get_significant_states(start_time, end_time=None, entity_id=None): + """Return states changes during UTC period start_time - end_time. + + Significant states are all states where there is a state change, + as well as all states from certain domains (for instance + thermostat so that we get current temperature in our graphs). + + """ + where = """ + (domain in ({}) or last_changed=last_updated) + AND last_updated > ? + """.format(",".join(["'%s'" % x for x in SIGNIFICANT_DOMAINS])) + + data = [start_time] + + if end_time is not None: + where += "AND last_updated < ? " + data.append(end_time) + + if entity_id is not None: + where += "AND entity_id = ? " + data.append(entity_id.lower()) + + query = ("SELECT * FROM states WHERE {} " + "ORDER BY entity_id, last_updated ASC").format(where) + + states = recorder.query_states(query, data) + + return states_to_json(states, start_time, entity_id) + + def state_changes_during_period(start_time, end_time=None, entity_id=None): """ Return states changes during UTC period start_time - end_time. @@ -55,20 +88,7 @@ def state_changes_during_period(start_time, end_time=None, entity_id=None): states = recorder.query_states(query, data) - result = defaultdict(list) - - entity_ids = [entity_id] if entity_id is not None else None - - # Get the states at the start time - for state in get_states(start_time, entity_ids): - state.last_changed = start_time - result[state.entity_id].append(state) - - # Append all changes to it - for entity_id, group in groupby(states, lambda state: state.entity_id): - result[entity_id].extend(group) - - return result + return states_to_json(states, start_time, entity_id) def get_states(utc_point_in_time, entity_ids=None, run=None): @@ -100,6 +120,33 @@ def get_states(utc_point_in_time, entity_ids=None, run=None): return recorder.query_states(query, where_data) +def states_to_json(states, start_time, entity_id): + """Converts SQL results into JSON friendly data structure. + + This takes our state list and turns it into a JSON friendly data + structure {'entity_id': [list of states], 'entity_id2': [list of states]} + + We also need to go back and create a synthetic zero data point for + each list of states, otherwise our graphs won't start on the Y + axis correctly. + """ + + result = defaultdict(list) + + entity_ids = [entity_id] if entity_id is not None else None + + # Get the states at the start time + for state in get_states(start_time, entity_ids): + state.last_changed = start_time + state.last_updated = start_time + result[state.entity_id].append(state) + + # Append all changes to it + for entity_id, group in groupby(states, lambda state: state.entity_id): + result[entity_id].extend(group) + return result + + def get_state(utc_point_in_time, entity_id, run=None): """ Return a state at a specific point in time. """ states = get_states(utc_point_in_time, (entity_id,), run) @@ -152,4 +199,4 @@ def _api_history_period(handler, path_match, data): entity_id = data.get('filter_entity_id') handler.write_json( - state_changes_during_period(start_time, end_time, entity_id).values()) + get_significant_states(start_time, end_time, entity_id).values()) diff --git a/tests/components/test_history.py b/tests/components/test_history.py index f9e773c499a..f9b8e94d286 100644 --- a/tests/components/test_history.py +++ b/tests/components/test_history.py @@ -8,7 +8,7 @@ Tests the history component. from datetime import timedelta import os import unittest -from unittest.mock import patch +from unittest.mock import patch, sentinel import homeassistant.core as ha import homeassistant.util.dt as dt_util @@ -143,3 +143,60 @@ class TestComponentHistory(unittest.TestCase): hist = history.state_changes_during_period(start, end, entity_id) self.assertEqual(states, hist[entity_id]) + + def test_get_significant_states(self): + """test that only significant states are returned with + get_significant_states. + + We inject a bunch of state updates from media player and + thermostat. We should get back every thermostat change that + includes an attribute change, but only the state updates for + media player (attribute changes are not significant and not returned). + + """ + self.init_recorder() + mp = 'media_player.test' + therm = 'thermostat.test' + + def set_state(entity_id, state, **kwargs): + self.hass.states.set(entity_id, state, **kwargs) + self.wait_recording_done() + return self.hass.states.get(entity_id) + + zero = dt_util.utcnow() + one = zero + timedelta(seconds=1) + two = one + timedelta(seconds=1) + three = two + timedelta(seconds=1) + four = three + timedelta(seconds=1) + + states = {therm: [], mp: []} + with patch('homeassistant.components.recorder.dt_util.utcnow', + return_value=one): + states[mp].append( + set_state(mp, 'idle', + attributes={'media_title': str(sentinel.mt1)})) + states[mp].append( + set_state(mp, 'YouTube', + attributes={'media_title': str(sentinel.mt2)})) + states[therm].append( + set_state(therm, 20, attributes={'current_temperature': 19.5})) + + with patch('homeassistant.components.recorder.dt_util.utcnow', + return_value=two): + # this state will be skipped only different in time + set_state(mp, 'YouTube', + attributes={'media_title': str(sentinel.mt3)}) + states[therm].append( + set_state(therm, 21, attributes={'current_temperature': 19.8})) + + with patch('homeassistant.components.recorder.dt_util.utcnow', + return_value=three): + states[mp].append( + set_state(mp, 'Netflix', + attributes={'media_title': str(sentinel.mt4)})) + # attributes changed even though state is the same + states[therm].append( + set_state(therm, 21, attributes={'current_temperature': 20})) + + hist = history.get_significant_states(zero, four) + self.assertEqual(states, hist)