diff --git a/homeassistant/components/wyoming/__init__.py b/homeassistant/components/wyoming/__init__.py index 8676365212a..33064d21097 100644 --- a/homeassistant/components/wyoming/__init__.py +++ b/homeassistant/components/wyoming/__init__.py @@ -7,11 +7,16 @@ from homeassistant.config_entries import ConfigEntry from homeassistant.core import HomeAssistant from homeassistant.exceptions import ConfigEntryNotReady -from .const import DOMAIN +from .const import ATTR_SPEAKER, DOMAIN from .data import WyomingService _LOGGER = logging.getLogger(__name__) +__all__ = [ + "ATTR_SPEAKER", + "DOMAIN", +] + async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool: """Load Wyoming.""" diff --git a/homeassistant/components/wyoming/const.py b/homeassistant/components/wyoming/const.py index 26443cc11eb..fd73a6bd047 100644 --- a/homeassistant/components/wyoming/const.py +++ b/homeassistant/components/wyoming/const.py @@ -5,3 +5,6 @@ DOMAIN = "wyoming" SAMPLE_RATE = 16000 SAMPLE_WIDTH = 2 SAMPLE_CHANNELS = 1 + +# For multi-speaker voices, this is the name of the selected speaker. +ATTR_SPEAKER = "speaker" diff --git a/homeassistant/components/wyoming/manifest.json b/homeassistant/components/wyoming/manifest.json index 9ad8092bb8c..7fbf3542e13 100644 --- a/homeassistant/components/wyoming/manifest.json +++ b/homeassistant/components/wyoming/manifest.json @@ -5,5 +5,5 @@ "config_flow": true, "documentation": "https://www.home-assistant.io/integrations/wyoming", "iot_class": "local_push", - "requirements": ["wyoming==0.0.1"] + "requirements": ["wyoming==1.0.0"] } diff --git a/homeassistant/components/wyoming/tts.py b/homeassistant/components/wyoming/tts.py index 0fc7bf5e6c4..6510fd8c761 100644 --- a/homeassistant/components/wyoming/tts.py +++ b/homeassistant/components/wyoming/tts.py @@ -6,14 +6,14 @@ import wave from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop from wyoming.client import AsyncTcpClient -from wyoming.tts import Synthesize +from wyoming.tts import Synthesize, SynthesizeVoice from homeassistant.components import tts from homeassistant.config_entries import ConfigEntry from homeassistant.core import HomeAssistant, callback from homeassistant.helpers.entity_platform import AddEntitiesCallback -from .const import DOMAIN +from .const import ATTR_SPEAKER, DOMAIN from .data import WyomingService from .error import WyomingError @@ -57,10 +57,16 @@ class WyomingTtsProvider(tts.TextToSpeechEntity): self._voices[language].append( tts.Voice( voice_id=voice.name, - name=voice.name, + name=voice.description or voice.name, ) ) + # Sort voices by name + for language in self._voices: + self._voices[language] = sorted( + self._voices[language], key=lambda v: v.name + ) + self._supported_languages: list[str] = list(voice_languages) self._attr_name = self._tts_service.name @@ -82,7 +88,7 @@ class WyomingTtsProvider(tts.TextToSpeechEntity): @property def supported_options(self): """Return list of supported options like voice, emotion.""" - return [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE] + return [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE, ATTR_SPEAKER] @property def default_options(self): @@ -95,10 +101,18 @@ class WyomingTtsProvider(tts.TextToSpeechEntity): return self._voices.get(language) async def async_get_tts_audio(self, message, language, options): - """Load TTS from UNIX socket.""" + """Load TTS from TCP socket.""" + voice_name: str | None = options.get(tts.ATTR_VOICE) + voice_speaker: str | None = options.get(ATTR_SPEAKER) + try: async with AsyncTcpClient(self.service.host, self.service.port) as client: - await client.write_event(Synthesize(message).event()) + voice: SynthesizeVoice | None = None + if voice_name is not None: + voice = SynthesizeVoice(name=voice_name, speaker=voice_speaker) + + synthesize = Synthesize(text=message, voice=voice) + await client.write_event(synthesize.event()) with io.BytesIO() as wav_io: wav_writer: wave.Wave_write | None = None diff --git a/requirements_all.txt b/requirements_all.txt index 7497521708a..74c409bc8c1 100644 --- a/requirements_all.txt +++ b/requirements_all.txt @@ -2681,7 +2681,7 @@ wled==0.16.0 wolf-smartset==0.1.11 # homeassistant.components.wyoming -wyoming==0.0.1 +wyoming==1.0.0 # homeassistant.components.xbox xbox-webapi==2.0.11 diff --git a/requirements_test_all.txt b/requirements_test_all.txt index 681c17952e4..24f5987227f 100644 --- a/requirements_test_all.txt +++ b/requirements_test_all.txt @@ -1963,7 +1963,7 @@ wled==0.16.0 wolf-smartset==0.1.11 # homeassistant.components.wyoming -wyoming==0.0.1 +wyoming==1.0.0 # homeassistant.components.xbox xbox-webapi==2.0.11 diff --git a/tests/components/wyoming/__init__.py b/tests/components/wyoming/__init__.py index d48b908f26b..3d12d41ce5e 100644 --- a/tests/components/wyoming/__init__.py +++ b/tests/components/wyoming/__init__.py @@ -1,16 +1,26 @@ """Tests for the Wyoming integration.""" -from wyoming.info import AsrModel, AsrProgram, Attribution, Info, TtsProgram, TtsVoice +from wyoming.info import ( + AsrModel, + AsrProgram, + Attribution, + Info, + TtsProgram, + TtsVoice, + TtsVoiceSpeaker, +) TEST_ATTR = Attribution(name="Test", url="http://www.test.com") STT_INFO = Info( asr=[ AsrProgram( name="Test ASR", + description="Test ASR", installed=True, attribution=TEST_ATTR, models=[ AsrModel( name="Test Model", + description="Test Model", installed=True, attribution=TEST_ATTR, languages=["en-US"], @@ -23,14 +33,17 @@ TTS_INFO = Info( tts=[ TtsProgram( name="Test TTS", + description="Test TTS", installed=True, attribution=TEST_ATTR, voices=[ TtsVoice( name="Test Voice", + description="Test Voice", installed=True, attribution=TEST_ATTR, languages=["en-US"], + speakers=[TtsVoiceSpeaker(name="Test Speaker")], ) ], ) diff --git a/tests/components/wyoming/snapshots/test_tts.ambr b/tests/components/wyoming/snapshots/test_tts.ambr index eb0b33c3276..1cb5a6cb874 100644 --- a/tests/components/wyoming/snapshots/test_tts.ambr +++ b/tests/components/wyoming/snapshots/test_tts.ambr @@ -21,3 +21,18 @@ }), ]) # --- +# name: test_voice_speaker + list([ + dict({ + 'data': dict({ + 'text': 'Hello world', + 'voice': dict({ + 'name': 'voice1', + 'speaker': 'speaker1', + }), + }), + 'payload': None, + 'type': 'synthesize', + }), + ]) +# --- diff --git a/tests/components/wyoming/test_tts.py b/tests/components/wyoming/test_tts.py index 8767660ca08..51a684bc4fd 100644 --- a/tests/components/wyoming/test_tts.py +++ b/tests/components/wyoming/test_tts.py @@ -8,7 +8,7 @@ import wave import pytest from wyoming.audio import AudioChunk, AudioStop -from homeassistant.components import tts +from homeassistant.components import tts, wyoming from homeassistant.core import HomeAssistant from homeassistant.exceptions import HomeAssistantError from homeassistant.helpers.entity_component import DATA_INSTANCES @@ -31,7 +31,11 @@ async def test_support(hass: HomeAssistant, init_wyoming_tts) -> None: assert entity is not None assert entity.supported_languages == ["en-US"] - assert entity.supported_options == [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE] + assert entity.supported_options == [ + tts.ATTR_AUDIO_OUTPUT, + tts.ATTR_VOICE, + wyoming.ATTR_SPEAKER, + ] voices = entity.async_get_supported_voices("en-US") assert len(voices) == 1 assert voices[0].name == "Test Voice" @@ -137,3 +141,28 @@ async def test_get_tts_audio_audio_oserror( hass, "Hello world", "tts.test_tts", hass.config.language ), ) + + +async def test_voice_speaker(hass: HomeAssistant, init_wyoming_tts, snapshot) -> None: + """Test using a different voice and speaker.""" + audio = bytes(100) + audio_events = [ + AudioChunk(audio=audio, rate=16000, width=2, channels=1).event(), + AudioStop().event(), + ] + + with patch( + "homeassistant.components.wyoming.tts.AsyncTcpClient", + MockAsyncTcpClient(audio_events), + ) as mock_client: + await tts.async_get_media_source_audio( + hass, + tts.generate_media_source_id( + hass, + "Hello world", + "tts.test_tts", + "en-US", + options={tts.ATTR_VOICE: "voice1", wyoming.ATTR_SPEAKER: "speaker1"}, + ), + ) + assert mock_client.written == snapshot