Improve reliability of ONVIF subscription renewals (#92551)
* Improve reliablity of onvif subscription renewals
upstream changelog: https://github.com/hunterjm/python-onvif-zeep-async/compare/v2.0.0...v2.1.0
* ```
Traceback (most recent call last):
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/onvif/client.py", line 75, in _async_wrap_connection_error_retry
return await func(*args, **kwargs)
File "/Users/bdraco/home-assistant/homeassistant/components/onvif/event.py", line 441, in _async_call_pullpoint_subscription_renew
await self._pullpoint_subscription.Renew(SUBSCRIPTION_RELATIVE_TIME)
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/zeep/proxy.py", line 64, in __call__
return await self._proxy._binding.send_async(
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/zeep/wsdl/bindings/soap.py", line 156, in send_async
response = await client.transport.post_xml(
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/zeep/transports.py", line 235, in post_xml
response = await self.post(address, message, headers)
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/zeep/transports.py", line 220, in post
response = await self.client.post(
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_client.py", line 1845, in post
return await self.request(
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_client.py", line 1530, in request
return await self.send(request, auth=auth, follow_redirects=follow_redirects)
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_client.py", line 1617, in send
response = await self._send_handling_auth(
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_client.py", line 1645, in _send_handling_auth
response = await self._send_handling_redirects(
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_client.py", line 1682, in _send_handling_redirects
response = await self._send_single_request(request)
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_client.py", line 1719, in _send_single_request
response = await transport.handle_async_request(request)
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_transports/default.py", line 352, in handle_async_request
with map_httpcore_exceptions():
File "/opt/homebrew/Cellar/python@3.10/3.10.10_1/Frameworks/Python.framework/Versions/3.10/lib/python3.10/contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "/Users/bdraco/home-assistant/venv/lib/python3.10/site-packages/httpx/_transports/default.py", line 77, in map_httpcore_exceptions
raise mapped_exc(message) from exc
httpx.ReadTimeout
```
* adjust timeouts for slower tplink cameras
* tweak
* more debug
* tweak
* adjust message
* tweak
* Revert "tweak"
This reverts commit 10ee2a8de7
.
* give time in seconds
* revert
* revert
* Update homeassistant/components/onvif/event.py
* Update homeassistant/components/onvif/event.py
pull/92627/head
parent
53e533af6b
commit
9a589a3a54
|
@ -9,7 +9,7 @@ import datetime as dt
|
|||
from aiohttp.web import Request
|
||||
from httpx import RemoteProtocolError, RequestError, TransportError
|
||||
from onvif import ONVIFCamera, ONVIFService
|
||||
from onvif.client import NotificationManager
|
||||
from onvif.client import NotificationManager, retry_connection_error
|
||||
from onvif.exceptions import ONVIFError
|
||||
from zeep.exceptions import Fault, ValidationError, XMLParseError
|
||||
|
||||
|
@ -40,8 +40,8 @@ SET_SYNCHRONIZATION_POINT_ERRORS = (*SUBSCRIPTION_ERRORS, TypeError)
|
|||
UNSUBSCRIBE_ERRORS = (XMLParseError, *SUBSCRIPTION_ERRORS)
|
||||
RENEW_ERRORS = (ONVIFError, RequestError, XMLParseError, *SUBSCRIPTION_ERRORS)
|
||||
#
|
||||
# We only keep the subscription alive for 3 minutes, and will keep
|
||||
# renewing it every 1.5 minutes. This is to avoid the camera
|
||||
# We only keep the subscription alive for 10 minutes, and will keep
|
||||
# renewing it every 8 minutes. This is to avoid the camera
|
||||
# accumulating subscriptions which will be impossible to clean up
|
||||
# since ONVIF does not provide a way to list existing subscriptions.
|
||||
#
|
||||
|
@ -49,12 +49,25 @@ RENEW_ERRORS = (ONVIFError, RequestError, XMLParseError, *SUBSCRIPTION_ERRORS)
|
|||
# sending events to us, and we will not be able to recover until
|
||||
# the subscriptions expire or the camera is rebooted.
|
||||
#
|
||||
SUBSCRIPTION_TIME = dt.timedelta(minutes=3)
|
||||
SUBSCRIPTION_RELATIVE_TIME = (
|
||||
"PT3M" # use relative time since the time on the camera is not reliable
|
||||
)
|
||||
SUBSCRIPTION_RENEW_INTERVAL = SUBSCRIPTION_TIME.total_seconds() / 2
|
||||
SUBSCRIPTION_RENEW_INTERVAL_ON_ERROR = 60.0
|
||||
SUBSCRIPTION_TIME = dt.timedelta(minutes=10)
|
||||
|
||||
# SUBSCRIPTION_RELATIVE_TIME uses a relative time since the time on the camera
|
||||
# is not reliable. We use 600 seconds (10 minutes) since some cameras cannot
|
||||
# parse time in the format "PT10M" (10 minutes).
|
||||
SUBSCRIPTION_RELATIVE_TIME = "PT600S"
|
||||
|
||||
# SUBSCRIPTION_RENEW_INTERVAL Must be less than the
|
||||
# overall timeout of 90 * (SUBSCRIPTION_ATTEMPTS) 2 = 180 seconds
|
||||
#
|
||||
# We use 8 minutes between renewals to make sure we never hit the
|
||||
# 10 minute limit even if the first renewal attempt fails
|
||||
SUBSCRIPTION_RENEW_INTERVAL = 8 * 60
|
||||
|
||||
# The number of attempts to make when creating or renewing a subscription
|
||||
SUBSCRIPTION_ATTEMPTS = 2
|
||||
|
||||
# The time to wait before trying to restart the subscription if it fails
|
||||
SUBSCRIPTION_RESTART_INTERVAL_ON_ERROR = 60
|
||||
|
||||
PULLPOINT_POLL_TIME = dt.timedelta(seconds=60)
|
||||
PULLPOINT_MESSAGE_LIMIT = 100
|
||||
|
@ -327,20 +340,7 @@ class PullPointManager:
|
|||
async def _async_start_pullpoint(self) -> bool:
|
||||
"""Start pullpoint subscription."""
|
||||
try:
|
||||
try:
|
||||
started = await self._async_create_pullpoint_subscription()
|
||||
except RequestError:
|
||||
#
|
||||
# We should only need to retry on RemoteProtocolError but some cameras
|
||||
# are flaky and sometimes do not respond to the Renew request so we
|
||||
# retry on RequestError as well.
|
||||
#
|
||||
# For RemoteProtocolError:
|
||||
# http://datatracker.ietf.org/doc/html/rfc2616#section-8.1.4 allows the server
|
||||
# to close the connection at any time, we treat this as a normal and try again
|
||||
# once since we do not want to declare the camera as not supporting PullPoint
|
||||
# if it just happened to close the connection at the wrong time.
|
||||
started = await self._async_create_pullpoint_subscription()
|
||||
started = await self._async_create_pullpoint_subscription()
|
||||
except CREATE_ERRORS as err:
|
||||
LOGGER.debug(
|
||||
"%s: Device does not support PullPoint service or has too many subscriptions: %s",
|
||||
|
@ -372,16 +372,16 @@ class PullPointManager:
|
|||
# scheduled when the current one is done if needed.
|
||||
return
|
||||
async with self._renew_lock:
|
||||
next_attempt = SUBSCRIPTION_RENEW_INTERVAL_ON_ERROR
|
||||
next_attempt = SUBSCRIPTION_RESTART_INTERVAL_ON_ERROR
|
||||
try:
|
||||
if (
|
||||
await self._async_renew_pullpoint()
|
||||
or await self._async_restart_pullpoint()
|
||||
):
|
||||
if await self._async_renew_pullpoint():
|
||||
next_attempt = SUBSCRIPTION_RENEW_INTERVAL
|
||||
else:
|
||||
await self._async_restart_pullpoint()
|
||||
finally:
|
||||
self.async_schedule_pullpoint_renew(next_attempt)
|
||||
|
||||
@retry_connection_error(SUBSCRIPTION_ATTEMPTS)
|
||||
async def _async_create_pullpoint_subscription(self) -> bool:
|
||||
"""Create pullpoint subscription."""
|
||||
|
||||
|
@ -447,6 +447,11 @@ class PullPointManager:
|
|||
)
|
||||
self._pullpoint_subscription = None
|
||||
|
||||
@retry_connection_error(SUBSCRIPTION_ATTEMPTS)
|
||||
async def _async_call_pullpoint_subscription_renew(self) -> None:
|
||||
"""Call PullPoint subscription Renew."""
|
||||
await self._pullpoint_subscription.Renew(SUBSCRIPTION_RELATIVE_TIME)
|
||||
|
||||
async def _async_renew_pullpoint(self) -> bool:
|
||||
"""Renew the PullPoint subscription."""
|
||||
if (
|
||||
|
@ -458,20 +463,7 @@ class PullPointManager:
|
|||
# The first time we renew, we may get a Fault error so we
|
||||
# suppress it. The subscription will be restarted in
|
||||
# async_restart later.
|
||||
try:
|
||||
await self._pullpoint_subscription.Renew(SUBSCRIPTION_RELATIVE_TIME)
|
||||
except RequestError:
|
||||
#
|
||||
# We should only need to retry on RemoteProtocolError but some cameras
|
||||
# are flaky and sometimes do not respond to the Renew request so we
|
||||
# retry on RequestError as well.
|
||||
#
|
||||
# For RemoteProtocolError:
|
||||
# http://datatracker.ietf.org/doc/html/rfc2616#section-8.1.4 allows the server
|
||||
# to close the connection at any time, we treat this as a normal and try again
|
||||
# once since we do not want to mark events as stale
|
||||
# if it just happened to close the connection at the wrong time.
|
||||
await self._pullpoint_subscription.Renew(SUBSCRIPTION_RELATIVE_TIME)
|
||||
await self._async_call_pullpoint_subscription_renew()
|
||||
LOGGER.debug("%s: Renewed PullPoint subscription", self._name)
|
||||
return True
|
||||
except RENEW_ERRORS as err:
|
||||
|
@ -521,7 +513,7 @@ class PullPointManager:
|
|||
stringify_onvif_error(err),
|
||||
)
|
||||
return True
|
||||
except (XMLParseError, *SUBSCRIPTION_ERRORS) as err:
|
||||
except Fault as err:
|
||||
# Device may not support subscriptions so log at debug level
|
||||
# when we get an XMLParseError
|
||||
LOGGER.debug(
|
||||
|
@ -532,6 +524,16 @@ class PullPointManager:
|
|||
# Treat errors as if the camera restarted. Assume that the pullpoint
|
||||
# subscription is no longer valid.
|
||||
return False
|
||||
except (XMLParseError, RequestError, TimeoutError, TransportError) as err:
|
||||
LOGGER.debug(
|
||||
"%s: PullPoint subscription encountered an unexpected error and will be retried "
|
||||
"(this is normal for some cameras): %s",
|
||||
self._name,
|
||||
stringify_onvif_error(err),
|
||||
)
|
||||
# Avoid renewing the subscription too often since it causes problems
|
||||
# for some cameras, mainly the Tapo ones.
|
||||
return True
|
||||
|
||||
if self.state != PullPointManagerState.STARTED:
|
||||
# If the webhook became started working during the long poll,
|
||||
|
@ -655,6 +657,7 @@ class WebHookManager:
|
|||
self._renew_or_restart_job,
|
||||
)
|
||||
|
||||
@retry_connection_error(SUBSCRIPTION_ATTEMPTS)
|
||||
async def _async_create_webhook_subscription(self) -> None:
|
||||
"""Create webhook subscription."""
|
||||
LOGGER.debug(
|
||||
|
@ -689,20 +692,7 @@ class WebHookManager:
|
|||
async def _async_start_webhook(self) -> bool:
|
||||
"""Start webhook."""
|
||||
try:
|
||||
try:
|
||||
await self._async_create_webhook_subscription()
|
||||
except RequestError:
|
||||
#
|
||||
# We should only need to retry on RemoteProtocolError but some cameras
|
||||
# are flaky and sometimes do not respond to the Renew request so we
|
||||
# retry on RequestError as well.
|
||||
#
|
||||
# For RemoteProtocolError:
|
||||
# http://datatracker.ietf.org/doc/html/rfc2616#section-8.1.4 allows the server
|
||||
# to close the connection at any time, we treat this as a normal and try again
|
||||
# once since we do not want to declare the camera as not supporting webhooks
|
||||
# if it just happened to close the connection at the wrong time.
|
||||
await self._async_create_webhook_subscription()
|
||||
await self._async_create_webhook_subscription()
|
||||
except CREATE_ERRORS as err:
|
||||
self._event_manager.async_webhook_failed()
|
||||
LOGGER.debug(
|
||||
|
@ -720,6 +710,12 @@ class WebHookManager:
|
|||
await self._async_unsubscribe_webhook()
|
||||
return await self._async_start_webhook()
|
||||
|
||||
@retry_connection_error(SUBSCRIPTION_ATTEMPTS)
|
||||
async def _async_call_webhook_subscription_renew(self) -> None:
|
||||
"""Call PullPoint subscription Renew."""
|
||||
assert self._webhook_subscription is not None
|
||||
await self._webhook_subscription.Renew(SUBSCRIPTION_RELATIVE_TIME)
|
||||
|
||||
async def _async_renew_webhook(self) -> bool:
|
||||
"""Renew webhook subscription."""
|
||||
if (
|
||||
|
@ -728,20 +724,7 @@ class WebHookManager:
|
|||
):
|
||||
return False
|
||||
try:
|
||||
try:
|
||||
await self._webhook_subscription.Renew(SUBSCRIPTION_RELATIVE_TIME)
|
||||
except RequestError:
|
||||
#
|
||||
# We should only need to retry on RemoteProtocolError but some cameras
|
||||
# are flaky and sometimes do not respond to the Renew request so we
|
||||
# retry on RequestError as well.
|
||||
#
|
||||
# For RemoteProtocolError:
|
||||
# http://datatracker.ietf.org/doc/html/rfc2616#section-8.1.4 allows the server
|
||||
# to close the connection at any time, we treat this as a normal and try again
|
||||
# once since we do not want to mark events as stale
|
||||
# if it just happened to close the connection at the wrong time.
|
||||
await self._webhook_subscription.Renew(SUBSCRIPTION_RELATIVE_TIME)
|
||||
await self._async_call_webhook_subscription_renew()
|
||||
LOGGER.debug("%s: Renewed Webhook subscription", self._name)
|
||||
return True
|
||||
except RENEW_ERRORS as err:
|
||||
|
@ -765,13 +748,12 @@ class WebHookManager:
|
|||
# scheduled when the current one is done if needed.
|
||||
return
|
||||
async with self._renew_lock:
|
||||
next_attempt = SUBSCRIPTION_RENEW_INTERVAL_ON_ERROR
|
||||
next_attempt = SUBSCRIPTION_RESTART_INTERVAL_ON_ERROR
|
||||
try:
|
||||
if (
|
||||
await self._async_renew_webhook()
|
||||
or await self._async_restart_webhook()
|
||||
):
|
||||
if await self._async_renew_webhook():
|
||||
next_attempt = SUBSCRIPTION_RENEW_INTERVAL
|
||||
else:
|
||||
await self._async_restart_webhook()
|
||||
finally:
|
||||
self._async_schedule_webhook_renew(next_attempt)
|
||||
|
||||
|
|
|
@ -8,5 +8,5 @@
|
|||
"documentation": "https://www.home-assistant.io/integrations/onvif",
|
||||
"iot_class": "local_push",
|
||||
"loggers": ["onvif", "wsdiscovery", "zeep"],
|
||||
"requirements": ["onvif-zeep-async==2.0.0", "WSDiscovery==2.0.0"]
|
||||
"requirements": ["onvif-zeep-async==2.1.1", "WSDiscovery==2.0.0"]
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ def stringify_onvif_error(error: Exception) -> str:
|
|||
message += f" (actor:{error.actor})"
|
||||
else:
|
||||
message = str(error)
|
||||
return message or "Device sent empty error"
|
||||
return message or f"Device sent empty error with type {type(error)}"
|
||||
|
||||
|
||||
def is_auth_error(error: Exception) -> bool:
|
||||
|
|
|
@ -1258,7 +1258,7 @@ ondilo==0.2.0
|
|||
onkyo-eiscp==1.2.7
|
||||
|
||||
# homeassistant.components.onvif
|
||||
onvif-zeep-async==2.0.0
|
||||
onvif-zeep-async==2.1.1
|
||||
|
||||
# homeassistant.components.opengarage
|
||||
open-garage==0.2.0
|
||||
|
|
|
@ -945,7 +945,7 @@ omnilogic==0.4.5
|
|||
ondilo==0.2.0
|
||||
|
||||
# homeassistant.components.onvif
|
||||
onvif-zeep-async==2.0.0
|
||||
onvif-zeep-async==2.1.1
|
||||
|
||||
# homeassistant.components.opengarage
|
||||
open-garage==0.2.0
|
||||
|
|
Loading…
Reference in New Issue