""" Support for getting data from websites with scraping. For more details about this platform, please refer to the documentation at https://home-assistant.io/components/sensor.scrape/ """ import logging import voluptuous as vol from homeassistant.components.sensor import PLATFORM_SCHEMA from homeassistant.components.sensor.rest import RestData from homeassistant.const import ( CONF_NAME, CONF_RESOURCE, CONF_UNIT_OF_MEASUREMENT, STATE_UNKNOWN, CONF_VALUE_TEMPLATE, CONF_VERIFY_SSL) from homeassistant.helpers.entity import Entity import homeassistant.helpers.config_validation as cv REQUIREMENTS = ['beautifulsoup4==4.6.0'] _LOGGER = logging.getLogger(__name__) CONF_SELECT = 'select' DEFAULT_NAME = 'Web scrape' DEFAULT_VERIFY_SSL = True PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend({ vol.Required(CONF_RESOURCE): cv.string, vol.Required(CONF_SELECT): cv.string, vol.Optional(CONF_NAME, default=DEFAULT_NAME): cv.string, vol.Optional(CONF_UNIT_OF_MEASUREMENT): cv.string, vol.Optional(CONF_VALUE_TEMPLATE): cv.template, vol.Optional(CONF_VERIFY_SSL, default=DEFAULT_VERIFY_SSL): cv.boolean, }) def setup_platform(hass, config, add_devices, discovery_info=None): """Set up the Web scrape sensor.""" name = config.get(CONF_NAME) resource = config.get(CONF_RESOURCE) method = 'GET' payload = auth = headers = None verify_ssl = config.get(CONF_VERIFY_SSL) select = config.get(CONF_SELECT) unit = config.get(CONF_UNIT_OF_MEASUREMENT) value_template = config.get(CONF_VALUE_TEMPLATE) if value_template is not None: value_template.hass = hass rest = RestData(method, resource, auth, headers, payload, verify_ssl) rest.update() if rest.data is None: _LOGGER.error("Unable to fetch data from %s", resource) return False add_devices([ ScrapeSensor(hass, rest, name, select, value_template, unit) ], True) class ScrapeSensor(Entity): """Representation of a web scrape sensor.""" def __init__(self, hass, rest, name, select, value_template, unit): """Initialize a web scrape sensor.""" self.rest = rest self._name = name self._state = STATE_UNKNOWN self._select = select self._value_template = value_template self._unit_of_measurement = unit @property def name(self): """Return the name of the sensor.""" return self._name @property def unit_of_measurement(self): """Return the unit the value is expressed in.""" return self._unit_of_measurement @property def state(self): """Return the state of the device.""" return self._state def update(self): """Get the latest data from the source and updates the state.""" self.rest.update() from bs4 import BeautifulSoup raw_data = BeautifulSoup(self.rest.data, 'html.parser') _LOGGER.debug(raw_data) value = raw_data.select(self._select)[0].text _LOGGER.debug(value) if self._value_template is not None: self._state = self._value_template.render_with_possible_json_value( value, STATE_UNKNOWN) else: self._state = value