core/homeassistant/components/feedreader.py

178 lines
6.7 KiB
Python

"""
Support for RSS/Atom feeds.
For more details about this component, please refer to the documentation at
https://home-assistant.io/components/feedreader/
"""
from datetime import datetime
from logging import getLogger
from os.path import exists
from threading import Lock
import pickle
import voluptuous as vol
from homeassistant.const import EVENT_HOMEASSISTANT_START
from homeassistant.helpers.event import track_utc_time_change
import homeassistant.helpers.config_validation as cv
REQUIREMENTS = ['feedparser==5.2.1']
_LOGGER = getLogger(__name__)
CONF_URLS = 'urls'
DOMAIN = 'feedreader'
EVENT_FEEDREADER = 'feedreader'
MAX_ENTRIES = 20
CONFIG_SCHEMA = vol.Schema({
DOMAIN: {
vol.Required(CONF_URLS): vol.All(cv.ensure_list, [cv.url]),
}
}, extra=vol.ALLOW_EXTRA)
def setup(hass, config):
"""Set up the Feedreader component."""
urls = config.get(DOMAIN)[CONF_URLS]
data_file = hass.config.path("{}.pickle".format(DOMAIN))
storage = StoredData(data_file)
feeds = [FeedManager(url, hass, storage) for url in urls]
return len(feeds) > 0
class FeedManager(object):
"""Abstraction over Feedparser module."""
def __init__(self, url, hass, storage):
"""Initialize the FeedManager object, poll every hour."""
self._url = url
self._feed = None
self._hass = hass
self._firstrun = True
self._storage = storage
self._last_entry_timestamp = None
self._has_published_parsed = False
hass.bus.listen_once(
EVENT_HOMEASSISTANT_START, lambda _: self._update())
track_utc_time_change(
hass, lambda now: self._update(), minute=0, second=0)
def _log_no_entries(self):
"""Send no entries log at debug level."""
_LOGGER.debug("No new entries to be published in feed %s", self._url)
def _update(self):
"""Update the feed and publish new entries to the event bus."""
import feedparser
_LOGGER.info("Fetching new data from feed %s", self._url)
self._feed = feedparser.parse(self._url,
etag=None if not self._feed
else self._feed.get('etag'),
modified=None if not self._feed
else self._feed.get('modified'))
if not self._feed:
_LOGGER.error("Error fetching feed data from %s", self._url)
else:
if self._feed.bozo != 0:
_LOGGER.error("Error parsing feed %s", self._url)
# Using etag and modified, if there's no new data available,
# the entries list will be empty
elif self._feed.entries:
_LOGGER.debug("%s entri(es) available in feed %s",
len(self._feed.entries), self._url)
if len(self._feed.entries) > MAX_ENTRIES:
_LOGGER.debug("Processing only the first %s entries "
"in feed %s", MAX_ENTRIES, self._url)
self._feed.entries = self._feed.entries[0:MAX_ENTRIES]
self._publish_new_entries()
if self._has_published_parsed:
self._storage.put_timestamp(
self._url, self._last_entry_timestamp)
else:
self._log_no_entries()
_LOGGER.info("Fetch from feed %s completed", self._url)
def _update_and_fire_entry(self, entry):
"""Update last_entry_timestamp and fire entry."""
# We are lucky, `published_parsed` data available, let's make use of
# it to publish only new available entries since the last run
if 'published_parsed' in entry.keys():
self._has_published_parsed = True
self._last_entry_timestamp = max(
entry.published_parsed, self._last_entry_timestamp)
else:
self._has_published_parsed = False
_LOGGER.debug("No published_parsed info available for entry %s",
entry.title)
entry.update({'feed_url': self._url})
self._hass.bus.fire(EVENT_FEEDREADER, entry)
def _publish_new_entries(self):
"""Publish new entries to the event bus."""
new_entries = False
self._last_entry_timestamp = self._storage.get_timestamp(self._url)
if self._last_entry_timestamp:
self._firstrun = False
else:
# Set last entry timestamp as epoch time if not available
self._last_entry_timestamp = \
datetime.utcfromtimestamp(0).timetuple()
for entry in self._feed.entries:
if self._firstrun or (
'published_parsed' in entry.keys() and
entry.published_parsed > self._last_entry_timestamp):
self._update_and_fire_entry(entry)
new_entries = True
else:
_LOGGER.debug("Entry %s already processed", entry.title)
if not new_entries:
self._log_no_entries()
self._firstrun = False
class StoredData(object):
"""Abstraction over pickle data storage."""
def __init__(self, data_file):
"""Initialize pickle data storage."""
self._data_file = data_file
self._lock = Lock()
self._cache_outdated = True
self._data = {}
self._fetch_data()
def _fetch_data(self):
"""Fetch data stored into pickle file."""
if self._cache_outdated and exists(self._data_file):
try:
_LOGGER.debug("Fetching data from file %s", self._data_file)
with self._lock, open(self._data_file, 'rb') as myfile:
self._data = pickle.load(myfile) or {}
self._cache_outdated = False
except: # noqa: E722 # pylint: disable=bare-except
_LOGGER.error("Error loading data from pickled file %s",
self._data_file)
def get_timestamp(self, url):
"""Return stored timestamp for given url."""
self._fetch_data()
return self._data.get(url)
def put_timestamp(self, url, timestamp):
"""Update timestamp for given URL."""
self._fetch_data()
with self._lock, open(self._data_file, 'wb') as myfile:
self._data.update({url: timestamp})
_LOGGER.debug("Overwriting feed %s timestamp in storage file %s",
url, self._data_file)
try:
pickle.dump(self._data, myfile)
except: # noqa: E722 # pylint: disable=bare-except
_LOGGER.error(
"Error saving pickled data to %s", self._data_file)
self._cache_outdated = True