core/homeassistant/components/scrape/sensor.py

151 lines
4.7 KiB
Python
Raw Normal View History

"""Support for getting data from websites with scraping."""
import logging
from bs4 import BeautifulSoup
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
import voluptuous as vol
Consolidate all platforms that have tests (#22109) * Moved climate components with tests into platform dirs. * Updated tests from climate component. * Moved binary_sensor components with tests into platform dirs. * Updated tests from binary_sensor component. * Moved calendar components with tests into platform dirs. * Updated tests from calendar component. * Moved camera components with tests into platform dirs. * Updated tests from camera component. * Moved cover components with tests into platform dirs. * Updated tests from cover component. * Moved device_tracker components with tests into platform dirs. * Updated tests from device_tracker component. * Moved fan components with tests into platform dirs. * Updated tests from fan component. * Moved geo_location components with tests into platform dirs. * Updated tests from geo_location component. * Moved image_processing components with tests into platform dirs. * Updated tests from image_processing component. * Moved light components with tests into platform dirs. * Updated tests from light component. * Moved lock components with tests into platform dirs. * Moved media_player components with tests into platform dirs. * Updated tests from media_player component. * Moved scene components with tests into platform dirs. * Moved sensor components with tests into platform dirs. * Updated tests from sensor component. * Moved switch components with tests into platform dirs. * Updated tests from sensor component. * Moved vacuum components with tests into platform dirs. * Updated tests from vacuum component. * Moved weather components with tests into platform dirs. * Fixed __init__.py files * Fixes for stuff moved as part of this branch. * Fix stuff needed to merge with balloob's branch. * Formatting issues. * Missing __init__.py files. * Fix-ups * Fixup * Regenerated requirements. * Linting errors fixed. * Fixed more broken tests. * Missing init files. * Fix broken tests. * More broken tests * There seems to be a thread race condition. I suspect the logger stuff is running in another thread, which means waiting until the aio loop is done is missing the log messages. Used sleep instead because that allows the logger thread to run. I think the api_streams sensor might not be thread safe. * Disabled tests, will remove sensor in #22147 * Updated coverage and codeowners.
2019-03-19 06:07:39 +00:00
from homeassistant.components.rest.sensor import RestData
from homeassistant.components.sensor import PLATFORM_SCHEMA
from homeassistant.const import (
CONF_AUTHENTICATION,
CONF_HEADERS,
2019-07-31 19:25:30 +00:00
CONF_NAME,
CONF_PASSWORD,
2019-07-31 19:25:30 +00:00
CONF_RESOURCE,
CONF_UNIT_OF_MEASUREMENT,
CONF_USERNAME,
2019-07-31 19:25:30 +00:00
CONF_VALUE_TEMPLATE,
CONF_VERIFY_SSL,
HTTP_BASIC_AUTHENTICATION,
HTTP_DIGEST_AUTHENTICATION,
)
from homeassistant.exceptions import PlatformNotReady
import homeassistant.helpers.config_validation as cv
from homeassistant.helpers.entity import Entity
_LOGGER = logging.getLogger(__name__)
2019-07-31 19:25:30 +00:00
CONF_ATTR = "attribute"
CONF_SELECT = "select"
CONF_INDEX = "index"
2019-07-31 19:25:30 +00:00
DEFAULT_NAME = "Web scrape"
DEFAULT_VERIFY_SSL = True
2019-07-31 19:25:30 +00:00
PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
{
vol.Required(CONF_RESOURCE): cv.string,
vol.Required(CONF_SELECT): cv.string,
vol.Optional(CONF_ATTR): cv.string,
vol.Optional(CONF_INDEX, default=0): cv.positive_int,
vol.Optional(CONF_AUTHENTICATION): vol.In(
[HTTP_BASIC_AUTHENTICATION, HTTP_DIGEST_AUTHENTICATION]
),
vol.Optional(CONF_HEADERS): vol.Schema({cv.string: cv.string}),
vol.Optional(CONF_NAME, default=DEFAULT_NAME): cv.string,
vol.Optional(CONF_PASSWORD): cv.string,
vol.Optional(CONF_UNIT_OF_MEASUREMENT): cv.string,
vol.Optional(CONF_USERNAME): cv.string,
vol.Optional(CONF_VALUE_TEMPLATE): cv.template,
vol.Optional(CONF_VERIFY_SSL, default=DEFAULT_VERIFY_SSL): cv.boolean,
}
)
def setup_platform(hass, config, add_entities, discovery_info=None):
"""Set up the Web scrape sensor."""
name = config.get(CONF_NAME)
resource = config.get(CONF_RESOURCE)
2019-07-31 19:25:30 +00:00
method = "GET"
payload = None
headers = config.get(CONF_HEADERS)
verify_ssl = config.get(CONF_VERIFY_SSL)
select = config.get(CONF_SELECT)
attr = config.get(CONF_ATTR)
index = config.get(CONF_INDEX)
unit = config.get(CONF_UNIT_OF_MEASUREMENT)
username = config.get(CONF_USERNAME)
password = config.get(CONF_PASSWORD)
value_template = config.get(CONF_VALUE_TEMPLATE)
if value_template is not None:
value_template.hass = hass
if username and password:
if config.get(CONF_AUTHENTICATION) == HTTP_DIGEST_AUTHENTICATION:
auth = HTTPDigestAuth(username, password)
else:
auth = HTTPBasicAuth(username, password)
else:
auth = None
rest = RestData(method, resource, auth, headers, payload, verify_ssl)
rest.update()
if rest.data is None:
raise PlatformNotReady
2019-07-31 19:25:30 +00:00
add_entities(
[ScrapeSensor(rest, name, select, attr, index, value_template, unit)], True
)
class ScrapeSensor(Entity):
"""Representation of a web scrape sensor."""
def __init__(self, rest, name, select, attr, index, value_template, unit):
"""Initialize a web scrape sensor."""
self.rest = rest
self._name = name
self._state = None
self._select = select
self._attr = attr
self._index = index
self._value_template = value_template
self._unit_of_measurement = unit
@property
def name(self):
"""Return the name of the sensor."""
return self._name
@property
def unit_of_measurement(self):
"""Return the unit the value is expressed in."""
return self._unit_of_measurement
@property
def state(self):
"""Return the state of the device."""
return self._state
def update(self):
"""Get the latest data from the source and updates the state."""
self.rest.update()
if self.rest.data is None:
_LOGGER.error("Unable to retrieve data for %s", self.name)
return
2019-07-31 19:25:30 +00:00
raw_data = BeautifulSoup(self.rest.data, "html.parser")
_LOGGER.debug(raw_data)
try:
if self._attr is not None:
value = raw_data.select(self._select)[self._index][self._attr]
else:
tag = raw_data.select(self._select)[self._index]
if tag.name in ("style", "script", "template"):
value = tag.string
else:
value = tag.text
_LOGGER.debug(value)
except IndexError:
_LOGGER.error("Unable to extract data from HTML for %s", self.name)
return
if self._value_template is not None:
self._state = self._value_template.render_with_possible_json_value(
2019-07-31 19:25:30 +00:00
value, None
)
else:
self._state = value