VoIP listening tone and "not configured" message (#91762)
* Play tone when starting a VoIP call * Play audio message when call is rejected * Add option to disable tone for tests * Send RTP audio in executor to reduce jitter * Don't start pipeline until speech * Bump voip utilspull/91783/head
parent
f4f3962ee9
commit
5080654776
|
@ -7,5 +7,5 @@
|
|||
"documentation": "https://www.home-assistant.io/integrations/voip",
|
||||
"iot_class": "local_push",
|
||||
"quality_scale": "internal",
|
||||
"requirements": ["voip-utils==0.0.2"]
|
||||
"requirements": ["voip-utils==0.0.5"]
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -3,8 +3,10 @@ from __future__ import annotations
|
|||
|
||||
import asyncio
|
||||
from collections import deque
|
||||
from collections.abc import AsyncIterable
|
||||
from collections.abc import AsyncIterable, MutableSequence, Sequence
|
||||
from functools import partial
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
@ -22,6 +24,7 @@ from homeassistant.components.assist_pipeline import (
|
|||
from homeassistant.components.assist_pipeline.vad import VoiceCommandSegmenter
|
||||
from homeassistant.const import __version__
|
||||
from homeassistant.core import Context, HomeAssistant
|
||||
from homeassistant.util.ulid import ulid
|
||||
|
||||
from .const import DOMAIN
|
||||
|
||||
|
@ -29,6 +32,9 @@ if TYPE_CHECKING:
|
|||
from .devices import VoIPDevice, VoIPDevices
|
||||
|
||||
_BUFFERED_CHUNKS_BEFORE_SPEECH = 100 # ~2 seconds
|
||||
_TONE_DELAY = 0.2 # seconds before playing tone
|
||||
_MESSAGE_DELAY = 1.0 # seconds before playing "not configured" message
|
||||
_LOOP_DELAY = 2.0 # seconds before replaying not-configured message
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -44,11 +50,14 @@ class HassVoipDatagramProtocol(VoipDatagramProtocol):
|
|||
session_name="voip_hass",
|
||||
version=__version__,
|
||||
),
|
||||
protocol_factory=lambda call_info: PipelineRtpDatagramProtocol(
|
||||
valid_protocol_factory=lambda call_info: PipelineRtpDatagramProtocol(
|
||||
hass,
|
||||
hass.config.language,
|
||||
devices.async_get_or_create(call_info),
|
||||
),
|
||||
invalid_protocol_factory=lambda call_info: NotConfiguredRtpDatagramProtocol(
|
||||
hass,
|
||||
),
|
||||
)
|
||||
self.hass = hass
|
||||
self.devices = devices
|
||||
|
@ -69,6 +78,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
|
|||
voip_device: VoIPDevice,
|
||||
pipeline_timeout: float = 30.0,
|
||||
audio_timeout: float = 2.0,
|
||||
listening_tone_enabled: bool = True,
|
||||
) -> None:
|
||||
"""Set up pipeline RTP server."""
|
||||
# STT expects 16Khz mono with 16-bit samples
|
||||
|
@ -80,11 +90,14 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
|
|||
self.pipeline: Pipeline | None = None
|
||||
self.pipeline_timeout = pipeline_timeout
|
||||
self.audio_timeout = audio_timeout
|
||||
self.listening_tone_enabled = listening_tone_enabled
|
||||
|
||||
self._audio_queue: asyncio.Queue[bytes] = asyncio.Queue()
|
||||
self._context = Context()
|
||||
self._conversation_id: str | None = None
|
||||
self._pipeline_task: asyncio.Task | None = None
|
||||
self._session_id: str | None = None
|
||||
self._tone_bytes: bytes | None = None
|
||||
|
||||
def connection_made(self, transport):
|
||||
"""Server is ready."""
|
||||
|
@ -113,23 +126,42 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
|
|||
self,
|
||||
) -> None:
|
||||
"""Forward audio to pipeline STT and handle TTS."""
|
||||
_LOGGER.debug("Starting pipeline")
|
||||
|
||||
async def stt_stream():
|
||||
try:
|
||||
async for chunk in self._segment_audio():
|
||||
yield chunk
|
||||
except asyncio.TimeoutError:
|
||||
# Expected after caller hangs up
|
||||
_LOGGER.debug("Audio timeout")
|
||||
|
||||
if self.transport is not None:
|
||||
self.transport.close()
|
||||
self.transport = None
|
||||
finally:
|
||||
self._clear_audio_queue()
|
||||
if self._session_id is None:
|
||||
self._session_id = ulid()
|
||||
if self.listening_tone_enabled:
|
||||
await self._play_listening_tone()
|
||||
|
||||
try:
|
||||
# Wait for speech before starting pipeline
|
||||
segmenter = VoiceCommandSegmenter()
|
||||
chunk_buffer: deque[bytes] = deque(
|
||||
maxlen=_BUFFERED_CHUNKS_BEFORE_SPEECH,
|
||||
)
|
||||
speech_detected = await self._wait_for_speech(
|
||||
segmenter,
|
||||
chunk_buffer,
|
||||
)
|
||||
if not speech_detected:
|
||||
_LOGGER.debug("No speech detected")
|
||||
return
|
||||
|
||||
_LOGGER.debug("Starting pipeline")
|
||||
|
||||
async def stt_stream():
|
||||
try:
|
||||
async for chunk in self._segment_audio(
|
||||
segmenter,
|
||||
chunk_buffer,
|
||||
):
|
||||
yield chunk
|
||||
except asyncio.TimeoutError:
|
||||
# Expected after caller hangs up
|
||||
_LOGGER.debug("Audio timeout")
|
||||
self._session_id = None
|
||||
self.disconnect()
|
||||
finally:
|
||||
self._clear_audio_queue()
|
||||
|
||||
# Run pipeline with a timeout
|
||||
async with async_timeout.timeout(self.pipeline_timeout):
|
||||
await async_pipeline_from_audio_stream(
|
||||
|
@ -155,17 +187,48 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
|
|||
except asyncio.TimeoutError:
|
||||
# Expected after caller hangs up
|
||||
_LOGGER.debug("Pipeline timeout")
|
||||
|
||||
if self.transport is not None:
|
||||
self.transport.close()
|
||||
self.transport = None
|
||||
self._session_id = None
|
||||
self.disconnect()
|
||||
finally:
|
||||
# Allow pipeline to run again
|
||||
self._pipeline_task = None
|
||||
|
||||
async def _segment_audio(self) -> AsyncIterable[bytes]:
|
||||
segmenter = VoiceCommandSegmenter()
|
||||
chunk_buffer: deque[bytes] = deque(maxlen=_BUFFERED_CHUNKS_BEFORE_SPEECH)
|
||||
async def _wait_for_speech(
|
||||
self,
|
||||
segmenter: VoiceCommandSegmenter,
|
||||
chunk_buffer: MutableSequence[bytes],
|
||||
):
|
||||
"""Buffer audio chunks until speech is detected.
|
||||
|
||||
Returns True if speech was detected, False otherwise.
|
||||
"""
|
||||
# Timeout if no audio comes in for a while.
|
||||
# This means the caller hung up.
|
||||
async with async_timeout.timeout(self.audio_timeout):
|
||||
chunk = await self._audio_queue.get()
|
||||
|
||||
while chunk:
|
||||
segmenter.process(chunk)
|
||||
if segmenter.in_command:
|
||||
return True
|
||||
|
||||
# Buffer until command starts
|
||||
chunk_buffer.append(chunk)
|
||||
|
||||
async with async_timeout.timeout(self.audio_timeout):
|
||||
chunk = await self._audio_queue.get()
|
||||
|
||||
return False
|
||||
|
||||
async def _segment_audio(
|
||||
self,
|
||||
segmenter: VoiceCommandSegmenter,
|
||||
chunk_buffer: Sequence[bytes],
|
||||
) -> AsyncIterable[bytes]:
|
||||
"""Yield audio chunks until voice command has finished."""
|
||||
# Buffered chunks first
|
||||
for buffered_chunk in chunk_buffer:
|
||||
yield buffered_chunk
|
||||
|
||||
# Timeout if no audio comes in for a while.
|
||||
# This means the caller hung up.
|
||||
|
@ -177,18 +240,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
|
|||
# Voice command is finished
|
||||
break
|
||||
|
||||
if segmenter.in_command:
|
||||
if chunk_buffer:
|
||||
# Release audio in buffer first
|
||||
for buffered_chunk in chunk_buffer:
|
||||
yield buffered_chunk
|
||||
|
||||
chunk_buffer.clear()
|
||||
|
||||
yield chunk
|
||||
else:
|
||||
# Buffer until command starts
|
||||
chunk_buffer.append(chunk)
|
||||
yield chunk
|
||||
|
||||
async with async_timeout.timeout(self.audio_timeout):
|
||||
chunk = await self._audio_queue.get()
|
||||
|
@ -225,4 +277,74 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
|
|||
_LOGGER.debug("Sending %s byte(s) of audio", len(audio_bytes))
|
||||
|
||||
# Assume TTS audio is 16Khz 16-bit mono
|
||||
await self.send_audio(audio_bytes, rate=16000, width=2, channels=1)
|
||||
await self.hass.async_add_executor_job(
|
||||
partial(self.send_audio, audio_bytes, rate=16000, width=2, channels=1)
|
||||
)
|
||||
|
||||
async def _play_listening_tone(self) -> None:
|
||||
"""Play a tone to indicate that Home Assistant is listening."""
|
||||
if self._tone_bytes is None:
|
||||
# Do I/O in executor
|
||||
self._tone_bytes = await self.hass.async_add_executor_job(
|
||||
self._load_tone,
|
||||
)
|
||||
|
||||
await self.hass.async_add_executor_job(
|
||||
partial(
|
||||
self.send_audio,
|
||||
self._tone_bytes,
|
||||
rate=16000,
|
||||
width=2,
|
||||
channels=1,
|
||||
silence_before=_TONE_DELAY,
|
||||
)
|
||||
)
|
||||
|
||||
def _load_tone(self) -> bytes:
|
||||
"""Load raw tone audio (16Khz, 16-bit mono)."""
|
||||
return (Path(__file__).parent / "tone.raw").read_bytes()
|
||||
|
||||
|
||||
class NotConfiguredRtpDatagramProtocol(RtpDatagramProtocol):
|
||||
"""Plays audio on a loop to inform the user to configure the phone in Home Assistant."""
|
||||
|
||||
def __init__(self, hass: HomeAssistant) -> None:
|
||||
"""Set up RTP server."""
|
||||
super().__init__(rate=16000, width=2, channels=1)
|
||||
self.hass = hass
|
||||
self._audio_task: asyncio.Task | None = None
|
||||
self._audio_bytes: bytes | None = None
|
||||
|
||||
def on_chunk(self, audio_bytes: bytes) -> None:
|
||||
"""Handle raw audio chunk."""
|
||||
if self.transport is None:
|
||||
return
|
||||
|
||||
if self._audio_bytes is None:
|
||||
# 16Khz, 16-bit mono audio message
|
||||
self._audio_bytes = (
|
||||
Path(__file__).parent / "not_configured.raw"
|
||||
).read_bytes()
|
||||
|
||||
if self._audio_task is None:
|
||||
self._audio_task = self.hass.async_create_background_task(
|
||||
self._play_message(),
|
||||
"voip_not_connected",
|
||||
)
|
||||
|
||||
async def _play_message(self) -> None:
|
||||
await self.hass.async_add_executor_job(
|
||||
partial(
|
||||
self.send_audio,
|
||||
self._audio_bytes,
|
||||
16000,
|
||||
2,
|
||||
1,
|
||||
silence_before=_MESSAGE_DELAY,
|
||||
)
|
||||
)
|
||||
|
||||
await asyncio.sleep(_LOOP_DELAY)
|
||||
|
||||
# Allow message to play again
|
||||
self._audio_task = None
|
||||
|
|
|
@ -2591,7 +2591,7 @@ venstarcolortouch==0.19
|
|||
vilfo-api-client==0.3.2
|
||||
|
||||
# homeassistant.components.voip
|
||||
voip-utils==0.0.2
|
||||
voip-utils==0.0.5
|
||||
|
||||
# homeassistant.components.volkszaehler
|
||||
volkszaehler==0.4.0
|
||||
|
|
|
@ -1867,7 +1867,7 @@ venstarcolortouch==0.19
|
|||
vilfo-api-client==0.3.2
|
||||
|
||||
# homeassistant.components.voip
|
||||
voip-utils==0.0.2
|
||||
voip-utils==0.0.5
|
||||
|
||||
# homeassistant.components.volvooncall
|
||||
volvooncall==0.10.2
|
||||
|
|
|
@ -35,7 +35,6 @@ async def test_pipeline(
|
|||
async for _chunk in stt_stream:
|
||||
# Stream will end when VAD detects end of "speech"
|
||||
assert _chunk != bad_chunk
|
||||
pass
|
||||
|
||||
# Test empty data
|
||||
event_callback(
|
||||
|
@ -84,14 +83,17 @@ async def test_pipeline(
|
|||
new=async_get_media_source_audio,
|
||||
):
|
||||
rtp_protocol = voip.voip.PipelineRtpDatagramProtocol(
|
||||
hass, hass.config.language, voip_device
|
||||
hass,
|
||||
hass.config.language,
|
||||
voip_device,
|
||||
listening_tone_enabled=False,
|
||||
)
|
||||
rtp_protocol.transport = Mock()
|
||||
|
||||
# Ensure audio queue is cleared before pipeline starts
|
||||
rtp_protocol._audio_queue.put_nowait(bad_chunk)
|
||||
|
||||
async def send_audio(*args, **kwargs):
|
||||
def send_audio(*args, **kwargs):
|
||||
# Test finished successfully
|
||||
done.set()
|
||||
|
||||
|
@ -123,9 +125,16 @@ async def test_pipeline_timeout(hass: HomeAssistant, voip_device: VoIPDevice) ->
|
|||
with patch(
|
||||
"homeassistant.components.voip.voip.async_pipeline_from_audio_stream",
|
||||
new=async_pipeline_from_audio_stream,
|
||||
), patch(
|
||||
"homeassistant.components.voip.voip.PipelineRtpDatagramProtocol._wait_for_speech",
|
||||
return_value=True,
|
||||
):
|
||||
rtp_protocol = voip.voip.PipelineRtpDatagramProtocol(
|
||||
hass, hass.config.language, voip_device, pipeline_timeout=0.001
|
||||
hass,
|
||||
hass.config.language,
|
||||
voip_device,
|
||||
pipeline_timeout=0.001,
|
||||
listening_tone_enabled=False,
|
||||
)
|
||||
transport = Mock(spec=["close"])
|
||||
rtp_protocol.connection_made(transport)
|
||||
|
@ -158,7 +167,11 @@ async def test_stt_stream_timeout(hass: HomeAssistant, voip_device: VoIPDevice)
|
|||
new=async_pipeline_from_audio_stream,
|
||||
):
|
||||
rtp_protocol = voip.voip.PipelineRtpDatagramProtocol(
|
||||
hass, hass.config.language, voip_device, audio_timeout=0.001
|
||||
hass,
|
||||
hass.config.language,
|
||||
voip_device,
|
||||
audio_timeout=0.001,
|
||||
listening_tone_enabled=False,
|
||||
)
|
||||
transport = Mock(spec=["close"])
|
||||
rtp_protocol.connection_made(transport)
|
||||
|
|
Loading…
Reference in New Issue