""" Support for getting data from websites with scraping. For more details about this platform, please refer to the documentation at https://home-assistant.io/components/sensor.scrape/ """ import logging import voluptuous as vol from requests.auth import HTTPBasicAuth, HTTPDigestAuth from homeassistant.components.sensor import PLATFORM_SCHEMA from homeassistant.components.sensor.rest import RestData from homeassistant.const import ( CONF_NAME, CONF_RESOURCE, CONF_UNIT_OF_MEASUREMENT, CONF_VALUE_TEMPLATE, CONF_VERIFY_SSL, CONF_USERNAME, CONF_HEADERS, CONF_PASSWORD, CONF_AUTHENTICATION, HTTP_BASIC_AUTHENTICATION, HTTP_DIGEST_AUTHENTICATION) from homeassistant.helpers.entity import Entity from homeassistant.exceptions import PlatformNotReady import homeassistant.helpers.config_validation as cv REQUIREMENTS = ['beautifulsoup4==4.7.1'] _LOGGER = logging.getLogger(__name__) CONF_ATTR = 'attribute' CONF_SELECT = 'select' CONF_INDEX = 'index' DEFAULT_NAME = 'Web scrape' DEFAULT_VERIFY_SSL = True PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend({ vol.Required(CONF_RESOURCE): cv.string, vol.Required(CONF_SELECT): cv.string, vol.Optional(CONF_ATTR): cv.string, vol.Optional(CONF_INDEX, default=0): cv.positive_int, vol.Optional(CONF_AUTHENTICATION): vol.In([HTTP_BASIC_AUTHENTICATION, HTTP_DIGEST_AUTHENTICATION]), vol.Optional(CONF_HEADERS): vol.Schema({cv.string: cv.string}), vol.Optional(CONF_NAME, default=DEFAULT_NAME): cv.string, vol.Optional(CONF_PASSWORD): cv.string, vol.Optional(CONF_UNIT_OF_MEASUREMENT): cv.string, vol.Optional(CONF_USERNAME): cv.string, vol.Optional(CONF_VALUE_TEMPLATE): cv.template, vol.Optional(CONF_VERIFY_SSL, default=DEFAULT_VERIFY_SSL): cv.boolean, }) def setup_platform(hass, config, add_entities, discovery_info=None): """Set up the Web scrape sensor.""" name = config.get(CONF_NAME) resource = config.get(CONF_RESOURCE) method = 'GET' payload = None headers = config.get(CONF_HEADERS) verify_ssl = config.get(CONF_VERIFY_SSL) select = config.get(CONF_SELECT) attr = config.get(CONF_ATTR) index = config.get(CONF_INDEX) unit = config.get(CONF_UNIT_OF_MEASUREMENT) username = config.get(CONF_USERNAME) password = config.get(CONF_PASSWORD) value_template = config.get(CONF_VALUE_TEMPLATE) if value_template is not None: value_template.hass = hass if username and password: if config.get(CONF_AUTHENTICATION) == HTTP_DIGEST_AUTHENTICATION: auth = HTTPDigestAuth(username, password) else: auth = HTTPBasicAuth(username, password) else: auth = None rest = RestData(method, resource, auth, headers, payload, verify_ssl) rest.update() if rest.data is None: raise PlatformNotReady add_entities([ ScrapeSensor(rest, name, select, attr, index, value_template, unit)], True) class ScrapeSensor(Entity): """Representation of a web scrape sensor.""" def __init__(self, rest, name, select, attr, index, value_template, unit): """Initialize a web scrape sensor.""" self.rest = rest self._name = name self._state = None self._select = select self._attr = attr self._index = index self._value_template = value_template self._unit_of_measurement = unit @property def name(self): """Return the name of the sensor.""" return self._name @property def unit_of_measurement(self): """Return the unit the value is expressed in.""" return self._unit_of_measurement @property def state(self): """Return the state of the device.""" return self._state def update(self): """Get the latest data from the source and updates the state.""" self.rest.update() from bs4 import BeautifulSoup raw_data = BeautifulSoup(self.rest.data, 'html.parser') _LOGGER.debug(raw_data) try: if self._attr is not None: value = raw_data.select(self._select)[self._index][self._attr] else: value = raw_data.select(self._select)[self._index].text _LOGGER.debug(value) except IndexError: _LOGGER.error("Unable to extract data from HTML") return if self._value_template is not None: self._state = self._value_template.render_with_possible_json_value( value, None) else: self._state = value