2016-07-02 18:22:51 +00:00
|
|
|
"""Models for SQLAlchemy."""
|
2019-12-08 17:48:18 +00:00
|
|
|
import json
|
2016-07-02 18:22:51 +00:00
|
|
|
import logging
|
|
|
|
|
2017-05-02 16:18:47 +00:00
|
|
|
from sqlalchemy import (
|
2019-07-31 19:25:30 +00:00
|
|
|
Boolean,
|
|
|
|
Column,
|
|
|
|
DateTime,
|
|
|
|
ForeignKey,
|
|
|
|
Index,
|
|
|
|
Integer,
|
|
|
|
String,
|
|
|
|
Text,
|
|
|
|
distinct,
|
|
|
|
)
|
2016-07-02 18:22:51 +00:00
|
|
|
from sqlalchemy.ext.declarative import declarative_base
|
2019-10-18 17:14:54 +00:00
|
|
|
from sqlalchemy.orm.session import Session
|
2016-07-11 07:46:56 +00:00
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
from homeassistant.core import Context, Event, EventOrigin, State, split_entity_id
|
2018-08-21 13:49:58 +00:00
|
|
|
from homeassistant.helpers.json import JSONEncoder
|
2019-12-08 17:48:18 +00:00
|
|
|
import homeassistant.util.dt as dt_util
|
2016-07-02 18:22:51 +00:00
|
|
|
|
|
|
|
# SQLAlchemy Schema
|
|
|
|
# pylint: disable=invalid-name
|
|
|
|
Base = declarative_base()
|
|
|
|
|
2020-06-23 17:57:52 +00:00
|
|
|
SCHEMA_VERSION = 9
|
2017-02-03 03:04:14 +00:00
|
|
|
|
2016-07-02 18:22:51 +00:00
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
|
2020-06-22 17:06:02 +00:00
|
|
|
DB_TIMEZONE = "+00:00"
|
Improve history api performance (#35822)
* Improve history api performance
A new option "minimal_response" reduces the amount of data
sent between the first and last history states to only the
"last_changed" and "state" fields.
Calling to_native is now avoided where possible and only
done at the end for rows that will be returned in the response.
When sending the `minimal_response` option, the history
api now returns a json response similar to the following
for an entity
Testing:
History API Response time for 1 day
Average of 10 runs with minimal_response
Before: 19.89s. (content length : 3427428)
After: 8.44s (content length: 592199)
```
[{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-18T23:20:03.213000+00:00",
"last_updated": "2020-05-18T23:20:03.213000+00:00",
"state": "on"
},
...
{
"last_changed": "2020-05-19T00:41:08Z",
"state": "unavailable"
},
...
{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-19T00:42:08.069698+00:00",
"last_updated": "2020-05-19T00:42:08.069698+00:00",
"state": "on"
}]
```
* Remove impossible state check
* Remove another impossible state check
* Update homeassistant/components/history/__init__.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
* Reorder to save some indent per review
* Make query response make sense with to_native=False
* Update test for 00:00 to Z change
* Update homeassistant/components/recorder/models.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
2020-05-27 02:53:56 +00:00
|
|
|
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2016-07-23 18:25:17 +00:00
|
|
|
class Events(Base): # type: ignore
|
2016-07-02 18:22:51 +00:00
|
|
|
"""Event history data."""
|
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
__tablename__ = "events"
|
2016-07-02 18:22:51 +00:00
|
|
|
event_id = Column(Integer, primary_key=True)
|
2020-06-23 17:57:52 +00:00
|
|
|
event_type = Column(String(32))
|
2016-07-02 18:22:51 +00:00
|
|
|
event_data = Column(Text)
|
|
|
|
origin = Column(String(32))
|
2017-02-03 03:04:14 +00:00
|
|
|
time_fired = Column(DateTime(timezone=True), index=True)
|
2020-05-19 17:13:27 +00:00
|
|
|
created = Column(DateTime(timezone=True), default=dt_util.utcnow)
|
2018-08-10 16:09:01 +00:00
|
|
|
context_id = Column(String(36), index=True)
|
|
|
|
context_user_id = Column(String(36), index=True)
|
2020-06-18 03:26:41 +00:00
|
|
|
context_parent_id = Column(String(36), index=True)
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2020-06-23 17:57:52 +00:00
|
|
|
__table_args__ = (
|
|
|
|
# Used for fetching events at a specific time
|
|
|
|
# see logbook
|
|
|
|
Index("ix_events_event_type_time_fired", "event_type", "time_fired"),
|
|
|
|
)
|
|
|
|
|
2016-07-02 18:22:51 +00:00
|
|
|
@staticmethod
|
2016-07-11 07:46:56 +00:00
|
|
|
def from_event(event):
|
2016-07-11 19:38:35 +00:00
|
|
|
"""Create an event database object from a native event."""
|
2019-03-01 18:08:38 +00:00
|
|
|
return Events(
|
|
|
|
event_type=event.event_type,
|
|
|
|
event_data=json.dumps(event.data, cls=JSONEncoder),
|
|
|
|
origin=str(event.origin),
|
|
|
|
time_fired=event.time_fired,
|
|
|
|
context_id=event.context.id,
|
|
|
|
context_user_id=event.context.user_id,
|
2020-06-18 03:26:41 +00:00
|
|
|
context_parent_id=event.context.parent_id,
|
2019-03-01 18:08:38 +00:00
|
|
|
)
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2020-06-26 17:27:45 +00:00
|
|
|
def to_native(self, validate_entity_id=True):
|
2016-07-02 18:22:51 +00:00
|
|
|
"""Convert to a natve HA Event."""
|
2020-06-23 17:57:52 +00:00
|
|
|
context = Context(
|
|
|
|
id=self.context_id,
|
|
|
|
user_id=self.context_user_id,
|
|
|
|
parent_id=self.context_parent_id,
|
|
|
|
)
|
2016-07-02 18:22:51 +00:00
|
|
|
try:
|
|
|
|
return Event(
|
|
|
|
self.event_type,
|
2020-02-25 20:07:16 +00:00
|
|
|
json.loads(self.event_data),
|
2016-07-02 18:22:51 +00:00
|
|
|
EventOrigin(self.origin),
|
Improve history api performance (#35822)
* Improve history api performance
A new option "minimal_response" reduces the amount of data
sent between the first and last history states to only the
"last_changed" and "state" fields.
Calling to_native is now avoided where possible and only
done at the end for rows that will be returned in the response.
When sending the `minimal_response` option, the history
api now returns a json response similar to the following
for an entity
Testing:
History API Response time for 1 day
Average of 10 runs with minimal_response
Before: 19.89s. (content length : 3427428)
After: 8.44s (content length: 592199)
```
[{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-18T23:20:03.213000+00:00",
"last_updated": "2020-05-18T23:20:03.213000+00:00",
"state": "on"
},
...
{
"last_changed": "2020-05-19T00:41:08Z",
"state": "unavailable"
},
...
{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-19T00:42:08.069698+00:00",
"last_updated": "2020-05-19T00:42:08.069698+00:00",
"state": "on"
}]
```
* Remove impossible state check
* Remove another impossible state check
* Update homeassistant/components/history/__init__.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
* Reorder to save some indent per review
* Make query response make sense with to_native=False
* Update test for 00:00 to Z change
* Update homeassistant/components/recorder/models.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
2020-05-27 02:53:56 +00:00
|
|
|
process_timestamp(self.time_fired),
|
2018-08-10 16:09:01 +00:00
|
|
|
context=context,
|
2016-07-02 18:22:51 +00:00
|
|
|
)
|
|
|
|
except ValueError:
|
|
|
|
# When json.loads fails
|
|
|
|
_LOGGER.exception("Error converting to event: %s", self)
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
class States(Base): # type: ignore
|
2016-07-02 18:22:51 +00:00
|
|
|
"""State change history."""
|
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
__tablename__ = "states"
|
2016-07-02 18:22:51 +00:00
|
|
|
state_id = Column(Integer, primary_key=True)
|
|
|
|
domain = Column(String(64))
|
2020-06-23 17:57:52 +00:00
|
|
|
entity_id = Column(String(255))
|
2016-07-02 18:22:51 +00:00
|
|
|
state = Column(String(255))
|
|
|
|
attributes = Column(Text)
|
2019-07-31 19:25:30 +00:00
|
|
|
event_id = Column(Integer, ForeignKey("events.event_id"), index=True)
|
2020-05-19 17:13:27 +00:00
|
|
|
last_changed = Column(DateTime(timezone=True), default=dt_util.utcnow)
|
|
|
|
last_updated = Column(DateTime(timezone=True), default=dt_util.utcnow, index=True)
|
|
|
|
created = Column(DateTime(timezone=True), default=dt_util.utcnow)
|
2020-06-18 03:26:41 +00:00
|
|
|
old_state_id = Column(Integer)
|
2016-07-02 18:22:51 +00:00
|
|
|
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-05 06:16:53 +00:00
|
|
|
__table_args__ = (
|
|
|
|
# Used for fetching the state of entities at a specific time
|
|
|
|
# (get_states in history.py)
|
2019-07-31 19:25:30 +00:00
|
|
|
Index("ix_states_entity_id_last_updated", "entity_id", "last_updated"),
|
2018-11-19 09:36:00 +00:00
|
|
|
)
|
2016-07-02 18:22:51 +00:00
|
|
|
|
|
|
|
@staticmethod
|
2016-07-11 07:46:56 +00:00
|
|
|
def from_event(event):
|
|
|
|
"""Create object from a state_changed event."""
|
2019-07-31 19:25:30 +00:00
|
|
|
entity_id = event.data["entity_id"]
|
|
|
|
state = event.data.get("new_state")
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2020-06-23 17:57:52 +00:00
|
|
|
dbstate = States(entity_id=entity_id)
|
2016-07-02 18:22:51 +00:00
|
|
|
|
|
|
|
# State got deleted
|
|
|
|
if state is None:
|
2019-07-31 19:25:30 +00:00
|
|
|
dbstate.state = ""
|
2016-07-11 07:46:56 +00:00
|
|
|
dbstate.domain = split_entity_id(entity_id)[0]
|
2019-07-31 19:25:30 +00:00
|
|
|
dbstate.attributes = "{}"
|
2016-07-11 07:46:56 +00:00
|
|
|
dbstate.last_changed = event.time_fired
|
|
|
|
dbstate.last_updated = event.time_fired
|
2016-07-02 18:22:51 +00:00
|
|
|
else:
|
|
|
|
dbstate.domain = state.domain
|
|
|
|
dbstate.state = state.state
|
2019-07-31 19:25:30 +00:00
|
|
|
dbstate.attributes = json.dumps(dict(state.attributes), cls=JSONEncoder)
|
2016-07-02 18:22:51 +00:00
|
|
|
dbstate.last_changed = state.last_changed
|
|
|
|
dbstate.last_updated = state.last_updated
|
|
|
|
|
2016-07-11 07:46:56 +00:00
|
|
|
return dbstate
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2020-06-22 04:58:57 +00:00
|
|
|
def to_native(self, validate_entity_id=True):
|
2016-07-02 18:22:51 +00:00
|
|
|
"""Convert to an HA state object."""
|
|
|
|
try:
|
|
|
|
return State(
|
2019-07-31 19:25:30 +00:00
|
|
|
self.entity_id,
|
|
|
|
self.state,
|
2020-02-25 20:07:16 +00:00
|
|
|
json.loads(self.attributes),
|
Improve history api performance (#35822)
* Improve history api performance
A new option "minimal_response" reduces the amount of data
sent between the first and last history states to only the
"last_changed" and "state" fields.
Calling to_native is now avoided where possible and only
done at the end for rows that will be returned in the response.
When sending the `minimal_response` option, the history
api now returns a json response similar to the following
for an entity
Testing:
History API Response time for 1 day
Average of 10 runs with minimal_response
Before: 19.89s. (content length : 3427428)
After: 8.44s (content length: 592199)
```
[{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-18T23:20:03.213000+00:00",
"last_updated": "2020-05-18T23:20:03.213000+00:00",
"state": "on"
},
...
{
"last_changed": "2020-05-19T00:41:08Z",
"state": "unavailable"
},
...
{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-19T00:42:08.069698+00:00",
"last_updated": "2020-05-19T00:42:08.069698+00:00",
"state": "on"
}]
```
* Remove impossible state check
* Remove another impossible state check
* Update homeassistant/components/history/__init__.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
* Reorder to save some indent per review
* Make query response make sense with to_native=False
* Update test for 00:00 to Z change
* Update homeassistant/components/recorder/models.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
2020-05-27 02:53:56 +00:00
|
|
|
process_timestamp(self.last_changed),
|
|
|
|
process_timestamp(self.last_updated),
|
2020-06-23 17:57:52 +00:00
|
|
|
# Join the events table on event_id to get the context instead
|
|
|
|
# as it will always be there for state_changed events
|
|
|
|
context=Context(id=None),
|
2020-06-22 04:58:57 +00:00
|
|
|
validate_entity_id=validate_entity_id,
|
2016-07-02 18:22:51 +00:00
|
|
|
)
|
|
|
|
except ValueError:
|
|
|
|
# When json.loads fails
|
|
|
|
_LOGGER.exception("Error converting row to state: %s", self)
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
class RecorderRuns(Base): # type: ignore
|
2016-07-02 18:22:51 +00:00
|
|
|
"""Representation of recorder run."""
|
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
__tablename__ = "recorder_runs"
|
2016-07-02 18:22:51 +00:00
|
|
|
run_id = Column(Integer, primary_key=True)
|
2020-05-19 17:13:27 +00:00
|
|
|
start = Column(DateTime(timezone=True), default=dt_util.utcnow)
|
2016-07-02 18:22:51 +00:00
|
|
|
end = Column(DateTime(timezone=True))
|
|
|
|
closed_incorrect = Column(Boolean, default=False)
|
2020-05-19 17:13:27 +00:00
|
|
|
created = Column(DateTime(timezone=True), default=dt_util.utcnow)
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
__table_args__ = (Index("ix_recorder_runs_start_end", "start", "end"),)
|
2017-03-24 03:48:31 +00:00
|
|
|
|
2016-07-02 18:22:51 +00:00
|
|
|
def entity_ids(self, point_in_time=None):
|
|
|
|
"""Return the entity ids that existed in this run.
|
|
|
|
|
|
|
|
Specify point_in_time if you want to know which existed at that point
|
|
|
|
in time inside the run.
|
|
|
|
"""
|
2016-07-11 07:46:56 +00:00
|
|
|
session = Session.object_session(self)
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
assert session is not None, "RecorderRuns need to be persisted"
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2016-07-11 07:46:56 +00:00
|
|
|
query = session.query(distinct(States.entity_id)).filter(
|
2019-07-31 19:25:30 +00:00
|
|
|
States.last_updated >= self.start
|
|
|
|
)
|
2016-07-11 07:46:56 +00:00
|
|
|
|
|
|
|
if point_in_time is not None:
|
|
|
|
query = query.filter(States.last_updated < point_in_time)
|
|
|
|
elif self.end is not None:
|
|
|
|
query = query.filter(States.last_updated < self.end)
|
|
|
|
|
|
|
|
return [row[0] for row in query]
|
2016-07-02 18:22:51 +00:00
|
|
|
|
2020-06-26 17:27:45 +00:00
|
|
|
def to_native(self, validate_entity_id=True):
|
2016-07-02 18:22:51 +00:00
|
|
|
"""Return self, native format is this model."""
|
|
|
|
return self
|
2016-07-11 07:46:56 +00:00
|
|
|
|
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
class SchemaChanges(Base): # type: ignore
|
2017-02-03 03:04:14 +00:00
|
|
|
"""Representation of schema version changes."""
|
|
|
|
|
2019-07-31 19:25:30 +00:00
|
|
|
__tablename__ = "schema_changes"
|
2017-02-03 03:04:14 +00:00
|
|
|
change_id = Column(Integer, primary_key=True)
|
|
|
|
schema_version = Column(Integer)
|
2020-05-19 17:13:27 +00:00
|
|
|
changed = Column(DateTime(timezone=True), default=dt_util.utcnow)
|
2017-02-03 03:04:14 +00:00
|
|
|
|
|
|
|
|
Improve history api performance (#35822)
* Improve history api performance
A new option "minimal_response" reduces the amount of data
sent between the first and last history states to only the
"last_changed" and "state" fields.
Calling to_native is now avoided where possible and only
done at the end for rows that will be returned in the response.
When sending the `minimal_response` option, the history
api now returns a json response similar to the following
for an entity
Testing:
History API Response time for 1 day
Average of 10 runs with minimal_response
Before: 19.89s. (content length : 3427428)
After: 8.44s (content length: 592199)
```
[{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-18T23:20:03.213000+00:00",
"last_updated": "2020-05-18T23:20:03.213000+00:00",
"state": "on"
},
...
{
"last_changed": "2020-05-19T00:41:08Z",
"state": "unavailable"
},
...
{
"attributes": {--TRUNCATED--},
"context": {--TRUNCATED--},
"entity_id": "binary_sensor.powerwall_status",
"last_changed": "2020-05-19T00:42:08.069698+00:00",
"last_updated": "2020-05-19T00:42:08.069698+00:00",
"state": "on"
}]
```
* Remove impossible state check
* Remove another impossible state check
* Update homeassistant/components/history/__init__.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
* Reorder to save some indent per review
* Make query response make sense with to_native=False
* Update test for 00:00 to Z change
* Update homeassistant/components/recorder/models.py
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
2020-05-27 02:53:56 +00:00
|
|
|
def process_timestamp(ts):
|
2016-07-11 07:46:56 +00:00
|
|
|
"""Process a timestamp into datetime object."""
|
|
|
|
if ts is None:
|
|
|
|
return None
|
2018-07-23 08:16:05 +00:00
|
|
|
if ts.tzinfo is None:
|
2020-06-15 18:53:05 +00:00
|
|
|
return ts.replace(tzinfo=dt_util.UTC)
|
2017-07-06 06:30:01 +00:00
|
|
|
|
|
|
|
return dt_util.as_utc(ts)
|
2020-06-22 17:06:02 +00:00
|
|
|
|
|
|
|
|
|
|
|
def process_timestamp_to_utc_isoformat(ts):
|
|
|
|
"""Process a timestamp into UTC isotime."""
|
|
|
|
if ts is None:
|
|
|
|
return None
|
|
|
|
if ts.tzinfo is None:
|
|
|
|
return f"{ts.isoformat()}{DB_TIMEZONE}"
|
|
|
|
|
|
|
|
return dt_util.as_utc(ts).isoformat()
|