Generate statistics for all sensors with a supported state_class (#54882)

* Generate statistics for all sensors

* Fix bugs, add tests

* Address review comments

* Cleanup warnings

* Simplify tests

* Simplify selection of statistics

* Fix tests
pull/55220/head
Erik Montnemery 2021-08-25 13:00:35 +02:00 committed by GitHub
parent 0d654fa6b3
commit ffbd2d79c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 272 additions and 62 deletions

View File

@ -227,6 +227,19 @@ def _get_metadata(
return metadata
def get_metadata(
hass: HomeAssistant,
statistic_id: str,
) -> dict[str, str] | None:
"""Return metadata for a statistic_id."""
statistic_ids = [statistic_id]
with session_scope(hass=hass) as session:
metadata_ids = _get_metadata_ids(hass, session, [statistic_id])
if not metadata_ids:
return None
return _get_metadata(hass, session, statistic_ids, None).get(metadata_ids[0])
def _configured_unit(unit: str, units: UnitSystem) -> str:
"""Return the pressure and temperature units configured by the user."""
if unit == PRESSURE_PA:

View File

@ -9,10 +9,8 @@ from typing import Callable
from homeassistant.components.recorder import history, statistics
from homeassistant.components.sensor import (
ATTR_STATE_CLASS,
DEVICE_CLASS_BATTERY,
DEVICE_CLASS_ENERGY,
DEVICE_CLASS_GAS,
DEVICE_CLASS_HUMIDITY,
DEVICE_CLASS_MONETARY,
DEVICE_CLASS_PRESSURE,
DEVICE_CLASS_TEMPERATURE,
@ -26,7 +24,6 @@ from homeassistant.const import (
DEVICE_CLASS_POWER,
ENERGY_KILO_WATT_HOUR,
ENERGY_WATT_HOUR,
PERCENTAGE,
POWER_KILO_WATT,
POWER_WATT,
PRESSURE_BAR,
@ -51,24 +48,18 @@ from . import ATTR_LAST_RESET, DOMAIN
_LOGGER = logging.getLogger(__name__)
DEVICE_CLASS_OR_UNIT_STATISTICS = {
DEVICE_CLASS_STATISTICS: dict[str, dict[str, set[str]]] = {
STATE_CLASS_MEASUREMENT: {
DEVICE_CLASS_BATTERY: {"mean", "min", "max"},
DEVICE_CLASS_HUMIDITY: {"mean", "min", "max"},
DEVICE_CLASS_POWER: {"mean", "min", "max"},
DEVICE_CLASS_PRESSURE: {"mean", "min", "max"},
DEVICE_CLASS_TEMPERATURE: {"mean", "min", "max"},
PERCENTAGE: {"mean", "min", "max"},
# Deprecated, support will be removed in Home Assistant 2021.11
DEVICE_CLASS_ENERGY: {"sum"},
DEVICE_CLASS_GAS: {"sum"},
DEVICE_CLASS_MONETARY: {"sum"},
},
STATE_CLASS_TOTAL_INCREASING: {
DEVICE_CLASS_ENERGY: {"sum"},
DEVICE_CLASS_GAS: {"sum"},
DEVICE_CLASS_MONETARY: {"sum"},
},
STATE_CLASS_TOTAL_INCREASING: {},
}
DEFAULT_STATISTICS = {
STATE_CLASS_MEASUREMENT: {"mean", "min", "max"},
STATE_CLASS_TOTAL_INCREASING: {"sum"},
}
# Normalized units which will be stored in the statistics table
@ -116,31 +107,20 @@ UNIT_CONVERSIONS: dict[str, dict[str, Callable]] = {
}
# Keep track of entities for which a warning about unsupported unit has been logged
WARN_UNSUPPORTED_UNIT = set()
WARN_UNSUPPORTED_UNIT = "sensor_warn_unsupported_unit"
WARN_UNSTABLE_UNIT = "sensor_warn_unstable_unit"
def _get_entities(hass: HomeAssistant) -> list[tuple[str, str, str]]:
"""Get (entity_id, state_class, key) of all sensors for which to compile statistics.
Key is either a device class or a unit and is used to index the
DEVICE_CLASS_OR_UNIT_STATISTICS map.
"""
def _get_entities(hass: HomeAssistant) -> list[tuple[str, str, str | None]]:
"""Get (entity_id, state_class, device_class) of all sensors for which to compile statistics."""
all_sensors = hass.states.all(DOMAIN)
entity_ids = []
for state in all_sensors:
if (state_class := state.attributes.get(ATTR_STATE_CLASS)) not in STATE_CLASSES:
continue
if (
key := state.attributes.get(ATTR_DEVICE_CLASS)
) in DEVICE_CLASS_OR_UNIT_STATISTICS[state_class]:
entity_ids.append((state.entity_id, state_class, key))
if (
key := state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
) in DEVICE_CLASS_OR_UNIT_STATISTICS[state_class]:
entity_ids.append((state.entity_id, state_class, key))
device_class = state.attributes.get(ATTR_DEVICE_CLASS)
entity_ids.append((state.entity_id, state_class, device_class))
return entity_ids
@ -190,18 +170,39 @@ def _time_weighted_average(
return accumulated / (end - start).total_seconds()
def _get_units(fstates: list[tuple[float, State]]) -> set[str | None]:
"""Return True if all states have the same unit."""
return {item[1].attributes.get(ATTR_UNIT_OF_MEASUREMENT) for item in fstates}
def _normalize_states(
entity_history: list[State], key: str, entity_id: str
hass: HomeAssistant,
entity_history: list[State],
device_class: str | None,
entity_id: str,
) -> tuple[str | None, list[tuple[float, State]]]:
"""Normalize units."""
unit = None
if key not in UNIT_CONVERSIONS:
if device_class not in UNIT_CONVERSIONS:
# We're not normalizing this device class, return the state as they are
fstates = [
(float(el.state), el) for el in entity_history if _is_number(el.state)
]
if fstates:
all_units = _get_units(fstates)
if len(all_units) > 1:
if WARN_UNSTABLE_UNIT not in hass.data:
hass.data[WARN_UNSTABLE_UNIT] = set()
if entity_id not in hass.data[WARN_UNSTABLE_UNIT]:
hass.data[WARN_UNSTABLE_UNIT].add(entity_id)
_LOGGER.warning(
"The unit of %s is changing, got %s, generation of long term "
"statistics will be suppressed unless the unit is stable",
entity_id,
all_units,
)
return None, []
unit = fstates[0][1].attributes.get(ATTR_UNIT_OF_MEASUREMENT)
return unit, fstates
@ -215,15 +216,17 @@ def _normalize_states(
fstate = float(state.state)
unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
# Exclude unsupported units from statistics
if unit not in UNIT_CONVERSIONS[key]:
if entity_id not in WARN_UNSUPPORTED_UNIT:
WARN_UNSUPPORTED_UNIT.add(entity_id)
if unit not in UNIT_CONVERSIONS[device_class]:
if WARN_UNSUPPORTED_UNIT not in hass.data:
hass.data[WARN_UNSUPPORTED_UNIT] = set()
if entity_id not in hass.data[WARN_UNSUPPORTED_UNIT]:
hass.data[WARN_UNSUPPORTED_UNIT].add(entity_id)
_LOGGER.warning("%s has unknown unit %s", entity_id, unit)
continue
fstates.append((UNIT_CONVERSIONS[key][unit](fstate), state))
fstates.append((UNIT_CONVERSIONS[device_class][unit](fstate), state))
return DEVICE_CLASS_UNITS[key], fstates
return DEVICE_CLASS_UNITS[device_class], fstates
def reset_detected(state: float, previous_state: float | None) -> bool:
@ -247,18 +250,39 @@ def compile_statistics(
hass, start - datetime.timedelta.resolution, end, [i[0] for i in entities]
)
for entity_id, state_class, key in entities:
wanted_statistics = DEVICE_CLASS_OR_UNIT_STATISTICS[state_class][key]
for entity_id, state_class, device_class in entities:
if device_class in DEVICE_CLASS_STATISTICS[state_class]:
wanted_statistics = DEVICE_CLASS_STATISTICS[state_class][device_class]
else:
wanted_statistics = DEFAULT_STATISTICS[state_class]
if entity_id not in history_list:
continue
entity_history = history_list[entity_id]
unit, fstates = _normalize_states(entity_history, key, entity_id)
unit, fstates = _normalize_states(hass, entity_history, device_class, entity_id)
if not fstates:
continue
# Check metadata
if old_metadata := statistics.get_metadata(hass, entity_id):
if old_metadata["unit_of_measurement"] != unit:
if WARN_UNSTABLE_UNIT not in hass.data:
hass.data[WARN_UNSTABLE_UNIT] = set()
if entity_id not in hass.data[WARN_UNSTABLE_UNIT]:
hass.data[WARN_UNSTABLE_UNIT].add(entity_id)
_LOGGER.warning(
"The unit of %s (%s) does not match the unit of already "
"compiled statistics (%s). Generation of long term statistics "
"will be suppressed unless the unit changes back to %s",
entity_id,
unit,
old_metadata["unit_of_measurement"],
unit,
)
continue
result[entity_id] = {}
# Set meta data
@ -370,8 +394,11 @@ def list_statistic_ids(hass: HomeAssistant, statistic_type: str | None = None) -
statistic_ids = {}
for entity_id, state_class, key in entities:
provided_statistics = DEVICE_CLASS_OR_UNIT_STATISTICS[state_class][key]
for entity_id, state_class, device_class in entities:
if device_class in DEVICE_CLASS_STATISTICS[state_class]:
provided_statistics = DEVICE_CLASS_STATISTICS[state_class][device_class]
else:
provided_statistics = DEFAULT_STATISTICS[state_class]
if statistic_type is not None and statistic_type not in provided_statistics:
continue
@ -386,16 +413,20 @@ def list_statistic_ids(hass: HomeAssistant, statistic_type: str | None = None) -
):
continue
native_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
metadata = statistics.get_metadata(hass, entity_id)
if metadata:
native_unit: str | None = metadata["unit_of_measurement"]
else:
native_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
if key not in UNIT_CONVERSIONS:
if device_class not in UNIT_CONVERSIONS:
statistic_ids[entity_id] = native_unit
continue
if native_unit not in UNIT_CONVERSIONS[key]:
if native_unit not in UNIT_CONVERSIONS[device_class]:
continue
statistics_unit = DEVICE_CLASS_UNITS[key]
statistics_unit = DEVICE_CLASS_UNITS[device_class]
statistic_ids[entity_id] = statistics_unit
return statistic_ids

View File

@ -107,24 +107,34 @@ def test_compile_hourly_statistics(
@pytest.mark.parametrize("attributes", [TEMPERATURE_SENSOR_ATTRIBUTES])
def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes):
"""Test compiling hourly statistics for unsupported sensor."""
attributes = dict(attributes)
zero = dt_util.utcnow()
hass = hass_recorder()
recorder = hass.data[DATA_INSTANCE]
setup_component(hass, "sensor", {})
four, states = record_states(hass, zero, "sensor.test1", attributes)
if "unit_of_measurement" in attributes:
attributes["unit_of_measurement"] = "invalid"
_, _states = record_states(hass, zero, "sensor.test2", attributes)
states = {**states, **_states}
attributes.pop("unit_of_measurement")
_, _states = record_states(hass, zero, "sensor.test3", attributes)
states = {**states, **_states}
attributes["state_class"] = "invalid"
_, _states = record_states(hass, zero, "sensor.test4", attributes)
attributes_tmp = dict(attributes)
attributes_tmp["unit_of_measurement"] = "invalid"
_, _states = record_states(hass, zero, "sensor.test2", attributes_tmp)
states = {**states, **_states}
attributes.pop("state_class")
_, _states = record_states(hass, zero, "sensor.test5", attributes)
attributes_tmp.pop("unit_of_measurement")
_, _states = record_states(hass, zero, "sensor.test3", attributes_tmp)
states = {**states, **_states}
attributes_tmp = dict(attributes)
attributes_tmp["state_class"] = "invalid"
_, _states = record_states(hass, zero, "sensor.test4", attributes_tmp)
states = {**states, **_states}
attributes_tmp.pop("state_class")
_, _states = record_states(hass, zero, "sensor.test5", attributes_tmp)
states = {**states, **_states}
attributes_tmp = dict(attributes)
attributes_tmp["device_class"] = "invalid"
_, _states = record_states(hass, zero, "sensor.test6", attributes_tmp)
states = {**states, **_states}
attributes_tmp.pop("device_class")
_, _states = record_states(hass, zero, "sensor.test7", attributes_tmp)
states = {**states, **_states}
hist = history.get_significant_states(hass, zero, four)
@ -134,7 +144,9 @@ def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes
wait_recording_done(hass)
statistic_ids = list_statistic_ids(hass)
assert statistic_ids == [
{"statistic_id": "sensor.test1", "unit_of_measurement": "°C"}
{"statistic_id": "sensor.test1", "unit_of_measurement": "°C"},
{"statistic_id": "sensor.test6", "unit_of_measurement": "°C"},
{"statistic_id": "sensor.test7", "unit_of_measurement": "°C"},
]
stats = statistics_during_period(hass, zero)
assert stats == {
@ -149,7 +161,31 @@ def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes
"state": None,
"sum": None,
}
]
],
"sensor.test6": [
{
"statistic_id": "sensor.test6",
"start": process_timestamp_to_utc_isoformat(zero),
"mean": approx(16.440677966101696),
"min": approx(10.0),
"max": approx(30.0),
"last_reset": None,
"state": None,
"sum": None,
}
],
"sensor.test7": [
{
"statistic_id": "sensor.test7",
"start": process_timestamp_to_utc_isoformat(zero),
"mean": approx(16.440677966101696),
"min": approx(10.0),
"max": approx(30.0),
"last_reset": None,
"state": None,
"sum": None,
}
],
}
assert "Error while processing event StatisticsTask" not in caplog.text
@ -859,6 +895,136 @@ def test_list_statistic_ids_unsupported(hass_recorder, caplog, _attributes):
hass.states.set("sensor.test6", 0, attributes=attributes)
@pytest.mark.parametrize(
"device_class,unit,native_unit,mean,min,max",
[
(None, None, None, 16.440677, 10, 30),
(None, "%", "%", 16.440677, 10, 30),
("battery", "%", "%", 16.440677, 10, 30),
("battery", None, None, 16.440677, 10, 30),
],
)
def test_compile_hourly_statistics_changing_units_1(
hass_recorder, caplog, device_class, unit, native_unit, mean, min, max
):
"""Test compiling hourly statistics where units change from one hour to the next."""
zero = dt_util.utcnow()
hass = hass_recorder()
recorder = hass.data[DATA_INSTANCE]
setup_component(hass, "sensor", {})
attributes = {
"device_class": device_class,
"state_class": "measurement",
"unit_of_measurement": unit,
}
four, states = record_states(hass, zero, "sensor.test1", attributes)
attributes["unit_of_measurement"] = "cats"
four, _states = record_states(
hass, zero + timedelta(hours=1), "sensor.test1", attributes
)
states["sensor.test1"] += _states["sensor.test1"]
four, _states = record_states(
hass, zero + timedelta(hours=2), "sensor.test1", attributes
)
states["sensor.test1"] += _states["sensor.test1"]
hist = history.get_significant_states(hass, zero, four)
assert dict(states) == dict(hist)
recorder.do_adhoc_statistics(period="hourly", start=zero)
wait_recording_done(hass)
assert "does not match the unit of already compiled" not in caplog.text
statistic_ids = list_statistic_ids(hass)
assert statistic_ids == [
{"statistic_id": "sensor.test1", "unit_of_measurement": native_unit}
]
stats = statistics_during_period(hass, zero)
assert stats == {
"sensor.test1": [
{
"statistic_id": "sensor.test1",
"start": process_timestamp_to_utc_isoformat(zero),
"mean": approx(mean),
"min": approx(min),
"max": approx(max),
"last_reset": None,
"state": None,
"sum": None,
}
]
}
recorder.do_adhoc_statistics(period="hourly", start=zero + timedelta(hours=2))
wait_recording_done(hass)
assert (
"The unit of sensor.test1 (cats) does not match the unit of already compiled "
f"statistics ({native_unit})" in caplog.text
)
statistic_ids = list_statistic_ids(hass)
assert statistic_ids == [
{"statistic_id": "sensor.test1", "unit_of_measurement": native_unit}
]
stats = statistics_during_period(hass, zero)
assert stats == {
"sensor.test1": [
{
"statistic_id": "sensor.test1",
"start": process_timestamp_to_utc_isoformat(zero),
"mean": approx(mean),
"min": approx(min),
"max": approx(max),
"last_reset": None,
"state": None,
"sum": None,
}
]
}
assert "Error while processing event StatisticsTask" not in caplog.text
@pytest.mark.parametrize(
"device_class,unit,native_unit,mean,min,max",
[
(None, None, None, 16.440677, 10, 30),
(None, "%", "%", 16.440677, 10, 30),
("battery", "%", "%", 16.440677, 10, 30),
("battery", None, None, 16.440677, 10, 30),
],
)
def test_compile_hourly_statistics_changing_units_2(
hass_recorder, caplog, device_class, unit, native_unit, mean, min, max
):
"""Test compiling hourly statistics where units change during an hour."""
zero = dt_util.utcnow()
hass = hass_recorder()
recorder = hass.data[DATA_INSTANCE]
setup_component(hass, "sensor", {})
attributes = {
"device_class": device_class,
"state_class": "measurement",
"unit_of_measurement": unit,
}
four, states = record_states(hass, zero, "sensor.test1", attributes)
attributes["unit_of_measurement"] = "cats"
four, _states = record_states(
hass, zero + timedelta(hours=1), "sensor.test1", attributes
)
states["sensor.test1"] += _states["sensor.test1"]
hist = history.get_significant_states(hass, zero, four)
assert dict(states) == dict(hist)
recorder.do_adhoc_statistics(period="hourly", start=zero + timedelta(minutes=30))
wait_recording_done(hass)
assert "The unit of sensor.test1 is changing" in caplog.text
statistic_ids = list_statistic_ids(hass)
assert statistic_ids == [
{"statistic_id": "sensor.test1", "unit_of_measurement": "cats"}
]
stats = statistics_during_period(hass, zero)
assert stats == {}
assert "Error while processing event StatisticsTask" not in caplog.text
def record_states(hass, zero, entity_id, attributes):
"""Record some test states.