Generate statistics for all sensors with a supported state_class (#54882)

* Generate statistics for all sensors * Fix bugs, add tests * Address review comments * Cleanup warnings * Simplify tests * Simplify selection of statistics * Fix tests
2021-08-25 13:00:35 +02:00 · 2021-08-25 13:00:35 +02:00 · ffbd2d79c8
parent 0d654fa6b3
commit ffbd2d79c8
3 changed files with 272 additions and 62 deletions
--- a/homeassistant/components/recorder/statistics.py
+++ b/homeassistant/components/recorder/statistics.py
@ -227,6 +227,19 @@ def _get_metadata(
    return metadata


+def get_metadata(
+    hass: HomeAssistant,
+    statistic_id: str,
+) -> dict[str, str] | None:
+    """Return metadata for a statistic_id."""
+    statistic_ids = [statistic_id]
+    with session_scope(hass=hass) as session:
+        metadata_ids = _get_metadata_ids(hass, session, [statistic_id])
+        if not metadata_ids:
+            return None
+        return _get_metadata(hass, session, statistic_ids, None).get(metadata_ids[0])
+
+
 def _configured_unit(unit: str, units: UnitSystem) -> str:
    """Return the pressure and temperature units configured by the user."""
    if unit == PRESSURE_PA:
--- a/homeassistant/components/sensor/recorder.py
+++ b/homeassistant/components/sensor/recorder.py
@ -9,10 +9,8 @@ from typing import Callable
 from homeassistant.components.recorder import history, statistics
 from homeassistant.components.sensor import (
    ATTR_STATE_CLASS,
-    DEVICE_CLASS_BATTERY,
    DEVICE_CLASS_ENERGY,
    DEVICE_CLASS_GAS,
-    DEVICE_CLASS_HUMIDITY,
    DEVICE_CLASS_MONETARY,
    DEVICE_CLASS_PRESSURE,
    DEVICE_CLASS_TEMPERATURE,
@ -26,7 +24,6 @@ from homeassistant.const import (
    DEVICE_CLASS_POWER,
    ENERGY_KILO_WATT_HOUR,
    ENERGY_WATT_HOUR,
-    PERCENTAGE,
    POWER_KILO_WATT,
    POWER_WATT,
    PRESSURE_BAR,
@ -51,24 +48,18 @@ from . import ATTR_LAST_RESET, DOMAIN

 _LOGGER = logging.getLogger(__name__)

-DEVICE_CLASS_OR_UNIT_STATISTICS = {
+DEVICE_CLASS_STATISTICS: dict[str, dict[str, set[str]]] = {
    STATE_CLASS_MEASUREMENT: {
-        DEVICE_CLASS_BATTERY: {"mean", "min", "max"},
-        DEVICE_CLASS_HUMIDITY: {"mean", "min", "max"},
-        DEVICE_CLASS_POWER: {"mean", "min", "max"},
-        DEVICE_CLASS_PRESSURE: {"mean", "min", "max"},
-        DEVICE_CLASS_TEMPERATURE: {"mean", "min", "max"},
-        PERCENTAGE: {"mean", "min", "max"},
        # Deprecated, support will be removed in Home Assistant 2021.11
        DEVICE_CLASS_ENERGY: {"sum"},
        DEVICE_CLASS_GAS: {"sum"},
        DEVICE_CLASS_MONETARY: {"sum"},
    },
-    STATE_CLASS_TOTAL_INCREASING: {
-        DEVICE_CLASS_ENERGY: {"sum"},
-        DEVICE_CLASS_GAS: {"sum"},
-        DEVICE_CLASS_MONETARY: {"sum"},
-    },
+    STATE_CLASS_TOTAL_INCREASING: {},
+}
+DEFAULT_STATISTICS = {
+    STATE_CLASS_MEASUREMENT: {"mean", "min", "max"},
+    STATE_CLASS_TOTAL_INCREASING: {"sum"},
 }

 # Normalized units which will be stored in the statistics table
@ -116,31 +107,20 @@ UNIT_CONVERSIONS: dict[str, dict[str, Callable]] = {
 }

 # Keep track of entities for which a warning about unsupported unit has been logged
-WARN_UNSUPPORTED_UNIT = set()
+WARN_UNSUPPORTED_UNIT = "sensor_warn_unsupported_unit"
+WARN_UNSTABLE_UNIT = "sensor_warn_unstable_unit"


-def _get_entities(hass: HomeAssistant) -> list[tuple[str, str, str]]:
-    """Get (entity_id, state_class, key) of all sensors for which to compile statistics.
-
-    Key is either a device class or a unit and is used to index the
-    DEVICE_CLASS_OR_UNIT_STATISTICS map.
-    """
+def _get_entities(hass: HomeAssistant) -> list[tuple[str, str, str | None]]:
+    """Get (entity_id, state_class, device_class) of all sensors for which to compile statistics."""
    all_sensors = hass.states.all(DOMAIN)
    entity_ids = []

    for state in all_sensors:
        if (state_class := state.attributes.get(ATTR_STATE_CLASS)) not in STATE_CLASSES:
            continue
-
-        if (
-            key := state.attributes.get(ATTR_DEVICE_CLASS)
-        ) in DEVICE_CLASS_OR_UNIT_STATISTICS[state_class]:
-            entity_ids.append((state.entity_id, state_class, key))
-
-        if (
-            key := state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
-        ) in DEVICE_CLASS_OR_UNIT_STATISTICS[state_class]:
-            entity_ids.append((state.entity_id, state_class, key))
+        device_class = state.attributes.get(ATTR_DEVICE_CLASS)
+        entity_ids.append((state.entity_id, state_class, device_class))

    return entity_ids

@ -190,18 +170,39 @@ def _time_weighted_average(
    return accumulated / (end - start).total_seconds()


+def _get_units(fstates: list[tuple[float, State]]) -> set[str | None]:
+    """Return True if all states have the same unit."""
+    return {item[1].attributes.get(ATTR_UNIT_OF_MEASUREMENT) for item in fstates}
+
+
 def _normalize_states(
-    entity_history: list[State], key: str, entity_id: str
+    hass: HomeAssistant,
+    entity_history: list[State],
+    device_class: str | None,
+    entity_id: str,
 ) -> tuple[str | None, list[tuple[float, State]]]:
    """Normalize units."""
    unit = None

-    if key not in UNIT_CONVERSIONS:
+    if device_class not in UNIT_CONVERSIONS:
        # We're not normalizing this device class, return the state as they are
        fstates = [
            (float(el.state), el) for el in entity_history if _is_number(el.state)
        ]
        if fstates:
+            all_units = _get_units(fstates)
+            if len(all_units) > 1:
+                if WARN_UNSTABLE_UNIT not in hass.data:
+                    hass.data[WARN_UNSTABLE_UNIT] = set()
+                if entity_id not in hass.data[WARN_UNSTABLE_UNIT]:
+                    hass.data[WARN_UNSTABLE_UNIT].add(entity_id)
+                    _LOGGER.warning(
+                        "The unit of %s is changing, got %s, generation of long term "
+                        "statistics will be suppressed unless the unit is stable",
+                        entity_id,
+                        all_units,
+                    )
+                return None, []
            unit = fstates[0][1].attributes.get(ATTR_UNIT_OF_MEASUREMENT)
        return unit, fstates

@ -215,15 +216,17 @@ def _normalize_states(
        fstate = float(state.state)
        unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
        # Exclude unsupported units from statistics
-        if unit not in UNIT_CONVERSIONS[key]:
-            if entity_id not in WARN_UNSUPPORTED_UNIT:
-                WARN_UNSUPPORTED_UNIT.add(entity_id)
+        if unit not in UNIT_CONVERSIONS[device_class]:
+            if WARN_UNSUPPORTED_UNIT not in hass.data:
+                hass.data[WARN_UNSUPPORTED_UNIT] = set()
+            if entity_id not in hass.data[WARN_UNSUPPORTED_UNIT]:
+                hass.data[WARN_UNSUPPORTED_UNIT].add(entity_id)
                _LOGGER.warning("%s has unknown unit %s", entity_id, unit)
            continue

-        fstates.append((UNIT_CONVERSIONS[key][unit](fstate), state))
+        fstates.append((UNIT_CONVERSIONS[device_class][unit](fstate), state))

-    return DEVICE_CLASS_UNITS[key], fstates
+    return DEVICE_CLASS_UNITS[device_class], fstates


 def reset_detected(state: float, previous_state: float | None) -> bool:
@ -247,18 +250,39 @@ def compile_statistics(
        hass, start - datetime.timedelta.resolution, end, [i[0] for i in entities]
    )

-    for entity_id, state_class, key in entities:
-        wanted_statistics = DEVICE_CLASS_OR_UNIT_STATISTICS[state_class][key]
+    for entity_id, state_class, device_class in entities:
+        if device_class in DEVICE_CLASS_STATISTICS[state_class]:
+            wanted_statistics = DEVICE_CLASS_STATISTICS[state_class][device_class]
+        else:
+            wanted_statistics = DEFAULT_STATISTICS[state_class]

        if entity_id not in history_list:
            continue

        entity_history = history_list[entity_id]
-        unit, fstates = _normalize_states(entity_history, key, entity_id)
+        unit, fstates = _normalize_states(hass, entity_history, device_class, entity_id)

        if not fstates:
            continue

+        # Check metadata
+        if old_metadata := statistics.get_metadata(hass, entity_id):
+            if old_metadata["unit_of_measurement"] != unit:
+                if WARN_UNSTABLE_UNIT not in hass.data:
+                    hass.data[WARN_UNSTABLE_UNIT] = set()
+                if entity_id not in hass.data[WARN_UNSTABLE_UNIT]:
+                    hass.data[WARN_UNSTABLE_UNIT].add(entity_id)
+                    _LOGGER.warning(
+                        "The unit of %s (%s) does not match the unit of already "
+                        "compiled statistics (%s). Generation of long term statistics "
+                        "will be suppressed unless the unit changes back to %s",
+                        entity_id,
+                        unit,
+                        old_metadata["unit_of_measurement"],
+                        unit,
+                    )
+                continue
+
        result[entity_id] = {}

        # Set meta data
@ -370,8 +394,11 @@ def list_statistic_ids(hass: HomeAssistant, statistic_type: str | None = None) -

    statistic_ids = {}

-    for entity_id, state_class, key in entities:
-        provided_statistics = DEVICE_CLASS_OR_UNIT_STATISTICS[state_class][key]
+    for entity_id, state_class, device_class in entities:
+        if device_class in DEVICE_CLASS_STATISTICS[state_class]:
+            provided_statistics = DEVICE_CLASS_STATISTICS[state_class][device_class]
+        else:
+            provided_statistics = DEFAULT_STATISTICS[state_class]

        if statistic_type is not None and statistic_type not in provided_statistics:
            continue
@ -386,16 +413,20 @@ def list_statistic_ids(hass: HomeAssistant, statistic_type: str | None = None) -
        ):
            continue

-        native_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
+        metadata = statistics.get_metadata(hass, entity_id)
+        if metadata:
+            native_unit: str | None = metadata["unit_of_measurement"]
+        else:
+            native_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)

-        if key not in UNIT_CONVERSIONS:
+        if device_class not in UNIT_CONVERSIONS:
            statistic_ids[entity_id] = native_unit
            continue

-        if native_unit not in UNIT_CONVERSIONS[key]:
+        if native_unit not in UNIT_CONVERSIONS[device_class]:
            continue

-        statistics_unit = DEVICE_CLASS_UNITS[key]
+        statistics_unit = DEVICE_CLASS_UNITS[device_class]
        statistic_ids[entity_id] = statistics_unit

    return statistic_ids
--- a/tests/components/sensor/test_recorder.py
+++ b/tests/components/sensor/test_recorder.py
@ -107,24 +107,34 @@ def test_compile_hourly_statistics(
@pytest.mark.parametrize("attributes", [TEMPERATURE_SENSOR_ATTRIBUTES])
 def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes):
    """Test compiling hourly statistics for unsupported sensor."""
-    attributes = dict(attributes)
    zero = dt_util.utcnow()
    hass = hass_recorder()
    recorder = hass.data[DATA_INSTANCE]
    setup_component(hass, "sensor", {})
    four, states = record_states(hass, zero, "sensor.test1", attributes)
-    if "unit_of_measurement" in attributes:
-        attributes["unit_of_measurement"] = "invalid"
-        _, _states = record_states(hass, zero, "sensor.test2", attributes)
-        states = {**states, **_states}
-        attributes.pop("unit_of_measurement")
-        _, _states = record_states(hass, zero, "sensor.test3", attributes)
-        states = {**states, **_states}
-    attributes["state_class"] = "invalid"
-    _, _states = record_states(hass, zero, "sensor.test4", attributes)
+
+    attributes_tmp = dict(attributes)
+    attributes_tmp["unit_of_measurement"] = "invalid"
+    _, _states = record_states(hass, zero, "sensor.test2", attributes_tmp)
    states = {**states, **_states}
-    attributes.pop("state_class")
-    _, _states = record_states(hass, zero, "sensor.test5", attributes)
+    attributes_tmp.pop("unit_of_measurement")
+    _, _states = record_states(hass, zero, "sensor.test3", attributes_tmp)
+    states = {**states, **_states}
+
+    attributes_tmp = dict(attributes)
+    attributes_tmp["state_class"] = "invalid"
+    _, _states = record_states(hass, zero, "sensor.test4", attributes_tmp)
+    states = {**states, **_states}
+    attributes_tmp.pop("state_class")
+    _, _states = record_states(hass, zero, "sensor.test5", attributes_tmp)
+    states = {**states, **_states}
+
+    attributes_tmp = dict(attributes)
+    attributes_tmp["device_class"] = "invalid"
+    _, _states = record_states(hass, zero, "sensor.test6", attributes_tmp)
+    states = {**states, **_states}
+    attributes_tmp.pop("device_class")
+    _, _states = record_states(hass, zero, "sensor.test7", attributes_tmp)
    states = {**states, **_states}

    hist = history.get_significant_states(hass, zero, four)
@ -134,7 +144,9 @@ def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes
    wait_recording_done(hass)
    statistic_ids = list_statistic_ids(hass)
    assert statistic_ids == [
-        {"statistic_id": "sensor.test1", "unit_of_measurement": "°C"}
+        {"statistic_id": "sensor.test1", "unit_of_measurement": "°C"},
+        {"statistic_id": "sensor.test6", "unit_of_measurement": "°C"},
+        {"statistic_id": "sensor.test7", "unit_of_measurement": "°C"},
    ]
    stats = statistics_during_period(hass, zero)
    assert stats == {
@ -149,7 +161,31 @@ def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes
                "state": None,
                "sum": None,
            }
-        ]
+        ],
+        "sensor.test6": [
+            {
+                "statistic_id": "sensor.test6",
+                "start": process_timestamp_to_utc_isoformat(zero),
+                "mean": approx(16.440677966101696),
+                "min": approx(10.0),
+                "max": approx(30.0),
+                "last_reset": None,
+                "state": None,
+                "sum": None,
+            }
+        ],
+        "sensor.test7": [
+            {
+                "statistic_id": "sensor.test7",
+                "start": process_timestamp_to_utc_isoformat(zero),
+                "mean": approx(16.440677966101696),
+                "min": approx(10.0),
+                "max": approx(30.0),
+                "last_reset": None,
+                "state": None,
+                "sum": None,
+            }
+        ],
    }
    assert "Error while processing event StatisticsTask" not in caplog.text

@ -859,6 +895,136 @@ def test_list_statistic_ids_unsupported(hass_recorder, caplog, _attributes):
    hass.states.set("sensor.test6", 0, attributes=attributes)


+@pytest.mark.parametrize(
+    "device_class,unit,native_unit,mean,min,max",
+    [
+        (None, None, None, 16.440677, 10, 30),
+        (None, "%", "%", 16.440677, 10, 30),
+        ("battery", "%", "%", 16.440677, 10, 30),
+        ("battery", None, None, 16.440677, 10, 30),
+    ],
+)
+def test_compile_hourly_statistics_changing_units_1(
+    hass_recorder, caplog, device_class, unit, native_unit, mean, min, max
+):
+    """Test compiling hourly statistics where units change from one hour to the next."""
+    zero = dt_util.utcnow()
+    hass = hass_recorder()
+    recorder = hass.data[DATA_INSTANCE]
+    setup_component(hass, "sensor", {})
+    attributes = {
+        "device_class": device_class,
+        "state_class": "measurement",
+        "unit_of_measurement": unit,
+    }
+    four, states = record_states(hass, zero, "sensor.test1", attributes)
+    attributes["unit_of_measurement"] = "cats"
+    four, _states = record_states(
+        hass, zero + timedelta(hours=1), "sensor.test1", attributes
+    )
+    states["sensor.test1"] += _states["sensor.test1"]
+    four, _states = record_states(
+        hass, zero + timedelta(hours=2), "sensor.test1", attributes
+    )
+    states["sensor.test1"] += _states["sensor.test1"]
+    hist = history.get_significant_states(hass, zero, four)
+    assert dict(states) == dict(hist)
+
+    recorder.do_adhoc_statistics(period="hourly", start=zero)
+    wait_recording_done(hass)
+    assert "does not match the unit of already compiled" not in caplog.text
+    statistic_ids = list_statistic_ids(hass)
+    assert statistic_ids == [
+        {"statistic_id": "sensor.test1", "unit_of_measurement": native_unit}
+    ]
+    stats = statistics_during_period(hass, zero)
+    assert stats == {
+        "sensor.test1": [
+            {
+                "statistic_id": "sensor.test1",
+                "start": process_timestamp_to_utc_isoformat(zero),
+                "mean": approx(mean),
+                "min": approx(min),
+                "max": approx(max),
+                "last_reset": None,
+                "state": None,
+                "sum": None,
+            }
+        ]
+    }
+
+    recorder.do_adhoc_statistics(period="hourly", start=zero + timedelta(hours=2))
+    wait_recording_done(hass)
+    assert (
+        "The unit of sensor.test1 (cats) does not match the unit of already compiled "
+        f"statistics ({native_unit})" in caplog.text
+    )
+    statistic_ids = list_statistic_ids(hass)
+    assert statistic_ids == [
+        {"statistic_id": "sensor.test1", "unit_of_measurement": native_unit}
+    ]
+    stats = statistics_during_period(hass, zero)
+    assert stats == {
+        "sensor.test1": [
+            {
+                "statistic_id": "sensor.test1",
+                "start": process_timestamp_to_utc_isoformat(zero),
+                "mean": approx(mean),
+                "min": approx(min),
+                "max": approx(max),
+                "last_reset": None,
+                "state": None,
+                "sum": None,
+            }
+        ]
+    }
+    assert "Error while processing event StatisticsTask" not in caplog.text
+
+
+@pytest.mark.parametrize(
+    "device_class,unit,native_unit,mean,min,max",
+    [
+        (None, None, None, 16.440677, 10, 30),
+        (None, "%", "%", 16.440677, 10, 30),
+        ("battery", "%", "%", 16.440677, 10, 30),
+        ("battery", None, None, 16.440677, 10, 30),
+    ],
+)
+def test_compile_hourly_statistics_changing_units_2(
+    hass_recorder, caplog, device_class, unit, native_unit, mean, min, max
+):
+    """Test compiling hourly statistics where units change during an hour."""
+    zero = dt_util.utcnow()
+    hass = hass_recorder()
+    recorder = hass.data[DATA_INSTANCE]
+    setup_component(hass, "sensor", {})
+    attributes = {
+        "device_class": device_class,
+        "state_class": "measurement",
+        "unit_of_measurement": unit,
+    }
+    four, states = record_states(hass, zero, "sensor.test1", attributes)
+    attributes["unit_of_measurement"] = "cats"
+    four, _states = record_states(
+        hass, zero + timedelta(hours=1), "sensor.test1", attributes
+    )
+    states["sensor.test1"] += _states["sensor.test1"]
+    hist = history.get_significant_states(hass, zero, four)
+    assert dict(states) == dict(hist)
+
+    recorder.do_adhoc_statistics(period="hourly", start=zero + timedelta(minutes=30))
+    wait_recording_done(hass)
+    assert "The unit of sensor.test1 is changing" in caplog.text
+    statistic_ids = list_statistic_ids(hass)
+    assert statistic_ids == [
+        {"statistic_id": "sensor.test1", "unit_of_measurement": "cats"}
+    ]
+    stats = statistics_during_period(hass, zero)
+    assert stats == {}
+
+    assert "Error while processing event StatisticsTask" not in caplog.text
+
+
 def record_states(hass, zero, entity_id, attributes):
    """Record some test states.