233 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			233 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Python
		
	
	
"""Data update coordinator for RSS/Atom feeds."""
 | 
						|
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from calendar import timegm
 | 
						|
from datetime import datetime
 | 
						|
from logging import getLogger
 | 
						|
from time import gmtime, struct_time
 | 
						|
from urllib.error import URLError
 | 
						|
 | 
						|
import feedparser
 | 
						|
 | 
						|
from homeassistant.config_entries import ConfigEntry
 | 
						|
from homeassistant.core import HomeAssistant, callback
 | 
						|
from homeassistant.helpers.storage import Store
 | 
						|
from homeassistant.helpers.update_coordinator import DataUpdateCoordinator, UpdateFailed
 | 
						|
from homeassistant.util import dt as dt_util
 | 
						|
 | 
						|
from .const import DEFAULT_SCAN_INTERVAL, DOMAIN, EVENT_FEEDREADER
 | 
						|
 | 
						|
DELAY_SAVE = 30
 | 
						|
STORAGE_VERSION = 1
 | 
						|
 | 
						|
 | 
						|
_LOGGER = getLogger(__name__)
 | 
						|
 | 
						|
 | 
						|
class FeedReaderCoordinator(
 | 
						|
    DataUpdateCoordinator[list[feedparser.FeedParserDict] | None]
 | 
						|
):
 | 
						|
    """Abstraction over Feedparser module."""
 | 
						|
 | 
						|
    config_entry: ConfigEntry
 | 
						|
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        hass: HomeAssistant,
 | 
						|
        url: str,
 | 
						|
        max_entries: int,
 | 
						|
        storage: StoredData,
 | 
						|
    ) -> None:
 | 
						|
        """Initialize the FeedManager object, poll as per scan interval."""
 | 
						|
        super().__init__(
 | 
						|
            hass=hass,
 | 
						|
            logger=_LOGGER,
 | 
						|
            name=f"{DOMAIN} {url}",
 | 
						|
            update_interval=DEFAULT_SCAN_INTERVAL,
 | 
						|
        )
 | 
						|
        self.url = url
 | 
						|
        self.feed_author: str | None = None
 | 
						|
        self.feed_version: str | None = None
 | 
						|
        self._max_entries = max_entries
 | 
						|
        self._storage = storage
 | 
						|
        self._last_entry_timestamp: struct_time | None = None
 | 
						|
        self._event_type = EVENT_FEEDREADER
 | 
						|
        self._feed: feedparser.FeedParserDict | None = None
 | 
						|
        self._feed_id = url
 | 
						|
 | 
						|
    @callback
 | 
						|
    def _log_no_entries(self) -> None:
 | 
						|
        """Send no entries log at debug level."""
 | 
						|
        _LOGGER.debug("No new entries to be published in feed %s", self.url)
 | 
						|
 | 
						|
    async def _async_fetch_feed(self) -> feedparser.FeedParserDict:
 | 
						|
        """Fetch the feed data."""
 | 
						|
        _LOGGER.debug("Fetching new data from feed %s", self.url)
 | 
						|
 | 
						|
        def _parse_feed() -> feedparser.FeedParserDict:
 | 
						|
            return feedparser.parse(
 | 
						|
                self.url,
 | 
						|
                etag=None if not self._feed else self._feed.get("etag"),
 | 
						|
                modified=None if not self._feed else self._feed.get("modified"),
 | 
						|
            )
 | 
						|
 | 
						|
        feed = await self.hass.async_add_executor_job(_parse_feed)
 | 
						|
 | 
						|
        if not feed:
 | 
						|
            raise UpdateFailed(f"Error fetching feed data from {self.url}")
 | 
						|
 | 
						|
        # The 'bozo' flag really only indicates that there was an issue
 | 
						|
        # during the initial parsing of the XML, but it doesn't indicate
 | 
						|
        # whether this is an unrecoverable error. In this case the
 | 
						|
        # feedparser lib is trying a less strict parsing approach.
 | 
						|
        # If an error is detected here, log warning message but continue
 | 
						|
        # processing the feed entries if present.
 | 
						|
        if feed.bozo != 0:
 | 
						|
            if isinstance(feed.bozo_exception, URLError):
 | 
						|
                raise UpdateFailed(
 | 
						|
                    f"Error fetching feed data from {self.url} : {feed.bozo_exception}"
 | 
						|
                )
 | 
						|
 | 
						|
            # no connection issue, but parsing issue
 | 
						|
            _LOGGER.warning(
 | 
						|
                "Possible issue parsing feed %s: %s",
 | 
						|
                self.url,
 | 
						|
                feed.bozo_exception,
 | 
						|
            )
 | 
						|
        return feed
 | 
						|
 | 
						|
    async def async_setup(self) -> None:
 | 
						|
        """Set up the feed manager."""
 | 
						|
        feed = await self._async_fetch_feed()
 | 
						|
        self.logger.debug("Feed data fetched from %s : %s", self.url, feed["feed"])
 | 
						|
        self.feed_author = feed["feed"].get("author")
 | 
						|
        self.feed_version = feedparser.api.SUPPORTED_VERSIONS.get(feed["version"])
 | 
						|
        self._feed = feed
 | 
						|
 | 
						|
    async def _async_update_data(self) -> list[feedparser.FeedParserDict] | None:
 | 
						|
        """Update the feed and publish new entries to the event bus."""
 | 
						|
        assert self._feed is not None
 | 
						|
        # _last_entry_timestamp is not set during async_setup, but we have already
 | 
						|
        # fetched data, so we can use them, instead of fetch again
 | 
						|
        if self._last_entry_timestamp:
 | 
						|
            self._feed = await self._async_fetch_feed()
 | 
						|
 | 
						|
        # Using etag and modified, if there's no new data available,
 | 
						|
        # the entries list will be empty
 | 
						|
        _LOGGER.debug(
 | 
						|
            "%s entri(es) available in feed %s",
 | 
						|
            len(self._feed.entries),
 | 
						|
            self.url,
 | 
						|
        )
 | 
						|
        if not isinstance(self._feed.entries, list):
 | 
						|
            self._log_no_entries()
 | 
						|
            return None
 | 
						|
 | 
						|
        self._filter_entries()
 | 
						|
        self._publish_new_entries()
 | 
						|
 | 
						|
        _LOGGER.debug("Fetch from feed %s completed", self.url)
 | 
						|
 | 
						|
        if self._last_entry_timestamp:
 | 
						|
            self._storage.async_put_timestamp(self._feed_id, self._last_entry_timestamp)
 | 
						|
 | 
						|
        return self._feed.entries
 | 
						|
 | 
						|
    @callback
 | 
						|
    def _filter_entries(self) -> None:
 | 
						|
        """Filter the entries provided and return the ones to keep."""
 | 
						|
        assert self._feed is not None
 | 
						|
        if len(self._feed.entries) > self._max_entries:
 | 
						|
            _LOGGER.debug(
 | 
						|
                "Processing only the first %s entries in feed %s",
 | 
						|
                self._max_entries,
 | 
						|
                self.url,
 | 
						|
            )
 | 
						|
            self._feed.entries = self._feed.entries[0 : self._max_entries]
 | 
						|
 | 
						|
    @callback
 | 
						|
    def _update_and_fire_entry(self, entry: feedparser.FeedParserDict) -> None:
 | 
						|
        """Update last_entry_timestamp and fire entry."""
 | 
						|
        # Check if the entry has a updated or published date.
 | 
						|
        # Start from a updated date because generally `updated` > `published`.
 | 
						|
        if time_stamp := entry.get("updated_parsed") or entry.get("published_parsed"):
 | 
						|
            self._last_entry_timestamp = time_stamp
 | 
						|
        else:
 | 
						|
            _LOGGER.debug(
 | 
						|
                "No updated_parsed or published_parsed info available for entry %s",
 | 
						|
                entry,
 | 
						|
            )
 | 
						|
        entry["feed_url"] = self.url
 | 
						|
        self.hass.bus.async_fire(self._event_type, entry)
 | 
						|
        _LOGGER.debug("New event fired for entry %s", entry.get("link"))
 | 
						|
 | 
						|
    @callback
 | 
						|
    def _publish_new_entries(self) -> None:
 | 
						|
        """Publish new entries to the event bus."""
 | 
						|
        assert self._feed is not None
 | 
						|
        new_entry_count = 0
 | 
						|
        firstrun = False
 | 
						|
        self._last_entry_timestamp = self._storage.get_timestamp(self._feed_id)
 | 
						|
        if not self._last_entry_timestamp:
 | 
						|
            firstrun = True
 | 
						|
            # Set last entry timestamp as epoch time if not available
 | 
						|
            self._last_entry_timestamp = dt_util.utc_from_timestamp(0).timetuple()
 | 
						|
        # locally cache self._last_entry_timestamp so that entries published at identical times can be processed
 | 
						|
        last_entry_timestamp = self._last_entry_timestamp
 | 
						|
        for entry in self._feed.entries:
 | 
						|
            if firstrun or (
 | 
						|
                (
 | 
						|
                    time_stamp := entry.get("updated_parsed")
 | 
						|
                    or entry.get("published_parsed")
 | 
						|
                )
 | 
						|
                and time_stamp > last_entry_timestamp
 | 
						|
            ):
 | 
						|
                self._update_and_fire_entry(entry)
 | 
						|
                new_entry_count += 1
 | 
						|
            else:
 | 
						|
                _LOGGER.debug("Already processed entry %s", entry.get("link"))
 | 
						|
        if new_entry_count == 0:
 | 
						|
            self._log_no_entries()
 | 
						|
        else:
 | 
						|
            _LOGGER.debug("%d entries published in feed %s", new_entry_count, self.url)
 | 
						|
 | 
						|
 | 
						|
class StoredData:
 | 
						|
    """Represent a data storage."""
 | 
						|
 | 
						|
    def __init__(self, hass: HomeAssistant) -> None:
 | 
						|
        """Initialize data storage."""
 | 
						|
        self._data: dict[str, struct_time] = {}
 | 
						|
        self.hass = hass
 | 
						|
        self._store: Store[dict[str, str]] = Store(hass, STORAGE_VERSION, DOMAIN)
 | 
						|
        self.is_initialized = False
 | 
						|
 | 
						|
    async def async_setup(self) -> None:
 | 
						|
        """Set up storage."""
 | 
						|
        if (store_data := await self._store.async_load()) is not None:
 | 
						|
            # Make sure that dst is set to 0, by using gmtime() on the timestamp.
 | 
						|
            self._data = {
 | 
						|
                feed_id: gmtime(datetime.fromisoformat(timestamp_string).timestamp())
 | 
						|
                for feed_id, timestamp_string in store_data.items()
 | 
						|
            }
 | 
						|
        self.is_initialized = True
 | 
						|
 | 
						|
    def get_timestamp(self, feed_id: str) -> struct_time | None:
 | 
						|
        """Return stored timestamp for given feed id."""
 | 
						|
        return self._data.get(feed_id)
 | 
						|
 | 
						|
    @callback
 | 
						|
    def async_put_timestamp(self, feed_id: str, timestamp: struct_time) -> None:
 | 
						|
        """Update timestamp for given feed id."""
 | 
						|
        self._data[feed_id] = timestamp
 | 
						|
        self._store.async_delay_save(self._async_save_data, DELAY_SAVE)
 | 
						|
 | 
						|
    @callback
 | 
						|
    def _async_save_data(self) -> dict[str, str]:
 | 
						|
        """Save feed data to storage."""
 | 
						|
        return {
 | 
						|
            feed_id: dt_util.utc_from_timestamp(timegm(struct_utc)).isoformat()
 | 
						|
            for feed_id, struct_utc in self._data.items()
 | 
						|
        }
 |