Wyoming Piper 1.1 (#96490)

* Add voice/speaker options to Piper TTS * Use description if available * Fix tests * Clean up if
2023-07-14 07:56:27 -05:00 · 2023-07-14 07:56:27 -05:00 · afdded58ee
parent 614f3c6a15
commit afdded58ee
9 changed files with 92 additions and 13 deletions
--- a/homeassistant/components/wyoming/init.py
+++ b/homeassistant/components/wyoming/init.py
@ -7,11 +7,16 @@ from homeassistant.config_entries import ConfigEntry
 from homeassistant.core import HomeAssistant
 from homeassistant.exceptions import ConfigEntryNotReady

-from .const import DOMAIN
+from .const import ATTR_SPEAKER, DOMAIN
 from .data import WyomingService

 _LOGGER = logging.getLogger(__name__)

+__all__ = [
+    "ATTR_SPEAKER",
+    "DOMAIN",
+]
+

 async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
    """Load Wyoming."""
--- a/homeassistant/components/wyoming/const.py
+++ b/homeassistant/components/wyoming/const.py
@ -5,3 +5,6 @@ DOMAIN = "wyoming"
 SAMPLE_RATE = 16000
 SAMPLE_WIDTH = 2
 SAMPLE_CHANNELS = 1
+
+# For multi-speaker voices, this is the name of the selected speaker.
+ATTR_SPEAKER = "speaker"
--- a/homeassistant/components/wyoming/manifest.json
+++ b/homeassistant/components/wyoming/manifest.json
@ -5,5 +5,5 @@
  "config_flow": true,
  "documentation": "https://www.home-assistant.io/integrations/wyoming",
  "iot_class": "local_push",
-  "requirements": ["wyoming==0.0.1"]
+  "requirements": ["wyoming==1.0.0"]
 }
--- a/homeassistant/components/wyoming/tts.py
+++ b/homeassistant/components/wyoming/tts.py
@ -6,14 +6,14 @@ import wave

 from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop
 from wyoming.client import AsyncTcpClient
-from wyoming.tts import Synthesize
+from wyoming.tts import Synthesize, SynthesizeVoice

 from homeassistant.components import tts
 from homeassistant.config_entries import ConfigEntry
 from homeassistant.core import HomeAssistant, callback
 from homeassistant.helpers.entity_platform import AddEntitiesCallback

-from .const import DOMAIN
+from .const import ATTR_SPEAKER, DOMAIN
 from .data import WyomingService
 from .error import WyomingError

@ -57,10 +57,16 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
                self._voices[language].append(
                    tts.Voice(
                        voice_id=voice.name,
-                        name=voice.name,
+                        name=voice.description or voice.name,
                    )
                )

+        # Sort voices by name
+        for language in self._voices:
+            self._voices[language] = sorted(
+                self._voices[language], key=lambda v: v.name
+            )
+
        self._supported_languages: list[str] = list(voice_languages)

        self._attr_name = self._tts_service.name
@ -82,7 +88,7 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
    @property
    def supported_options(self):
        """Return list of supported options like voice, emotion."""
-        return [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE]
+        return [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE, ATTR_SPEAKER]

    @property
    def default_options(self):
@ -95,10 +101,18 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
        return self._voices.get(language)

    async def async_get_tts_audio(self, message, language, options):
-        """Load TTS from UNIX socket."""
+        """Load TTS from TCP socket."""
+        voice_name: str | None = options.get(tts.ATTR_VOICE)
+        voice_speaker: str | None = options.get(ATTR_SPEAKER)
+
        try:
            async with AsyncTcpClient(self.service.host, self.service.port) as client:
-                await client.write_event(Synthesize(message).event())
+                voice: SynthesizeVoice | None = None
+                if voice_name is not None:
+                    voice = SynthesizeVoice(name=voice_name, speaker=voice_speaker)
+
+                synthesize = Synthesize(text=message, voice=voice)
+                await client.write_event(synthesize.event())

                with io.BytesIO() as wav_io:
                    wav_writer: wave.Wave_write | None = None
--- a/requirements_all.txt
+++ b/requirements_all.txt
@ -2681,7 +2681,7 @@ wled==0.16.0
 wolf-smartset==0.1.11

 # homeassistant.components.wyoming
-wyoming==0.0.1
+wyoming==1.0.0

 # homeassistant.components.xbox
 xbox-webapi==2.0.11
--- a/requirements_test_all.txt
+++ b/requirements_test_all.txt
@ -1963,7 +1963,7 @@ wled==0.16.0
 wolf-smartset==0.1.11

 # homeassistant.components.wyoming
-wyoming==0.0.1
+wyoming==1.0.0

 # homeassistant.components.xbox
 xbox-webapi==2.0.11
--- a/tests/components/wyoming/init.py
+++ b/tests/components/wyoming/init.py
@ -1,16 +1,26 @@
 """Tests for the Wyoming integration."""
-from wyoming.info import AsrModel, AsrProgram, Attribution, Info, TtsProgram, TtsVoice
+from wyoming.info import (
+    AsrModel,
+    AsrProgram,
+    Attribution,
+    Info,
+    TtsProgram,
+    TtsVoice,
+    TtsVoiceSpeaker,
+)

 TEST_ATTR = Attribution(name="Test", url="http://www.test.com")
 STT_INFO = Info(
    asr=[
        AsrProgram(
            name="Test ASR",
+            description="Test ASR",
            installed=True,
            attribution=TEST_ATTR,
            models=[
                AsrModel(
                    name="Test Model",
+                    description="Test Model",
                    installed=True,
                    attribution=TEST_ATTR,
                    languages=["en-US"],
@ -23,14 +33,17 @@ TTS_INFO = Info(
    tts=[
        TtsProgram(
            name="Test TTS",
+            description="Test TTS",
            installed=True,
            attribution=TEST_ATTR,
            voices=[
                TtsVoice(
                    name="Test Voice",
+                    description="Test Voice",
                    installed=True,
                    attribution=TEST_ATTR,
                    languages=["en-US"],
+                    speakers=[TtsVoiceSpeaker(name="Test Speaker")],
                )
            ],
        )
--- a/tests/components/wyoming/snapshots/test_tts.ambr
+++ b/tests/components/wyoming/snapshots/test_tts.ambr
@ -21,3 +21,18 @@
    }),
  ])
 # ---
+# name: test_voice_speaker
+  list([
+    dict({
+      'data': dict({
+        'text': 'Hello world',
+        'voice': dict({
+          'name': 'voice1',
+          'speaker': 'speaker1',
+        }),
+      }),
+      'payload': None,
+      'type': 'synthesize',
+    }),
+  ])
+# ---
--- a/tests/components/wyoming/test_tts.py
+++ b/tests/components/wyoming/test_tts.py
@ -8,7 +8,7 @@ import wave
 import pytest
 from wyoming.audio import AudioChunk, AudioStop

-from homeassistant.components import tts
+from homeassistant.components import tts, wyoming
 from homeassistant.core import HomeAssistant
 from homeassistant.exceptions import HomeAssistantError
 from homeassistant.helpers.entity_component import DATA_INSTANCES
@ -31,7 +31,11 @@ async def test_support(hass: HomeAssistant, init_wyoming_tts) -> None:
    assert entity is not None

    assert entity.supported_languages == ["en-US"]
-    assert entity.supported_options == [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE]
+    assert entity.supported_options == [
+        tts.ATTR_AUDIO_OUTPUT,
+        tts.ATTR_VOICE,
+        wyoming.ATTR_SPEAKER,
+    ]
    voices = entity.async_get_supported_voices("en-US")
    assert len(voices) == 1
    assert voices[0].name == "Test Voice"
@ -137,3 +141,28 @@ async def test_get_tts_audio_audio_oserror(
                hass, "Hello world", "tts.test_tts", hass.config.language
            ),
        )
+
+
+async def test_voice_speaker(hass: HomeAssistant, init_wyoming_tts, snapshot) -> None:
+    """Test using a different voice and speaker."""
+    audio = bytes(100)
+    audio_events = [
+        AudioChunk(audio=audio, rate=16000, width=2, channels=1).event(),
+        AudioStop().event(),
+    ]
+
+    with patch(
+        "homeassistant.components.wyoming.tts.AsyncTcpClient",
+        MockAsyncTcpClient(audio_events),
+    ) as mock_client:
+        await tts.async_get_media_source_audio(
+            hass,
+            tts.generate_media_source_id(
+                hass,
+                "Hello world",
+                "tts.test_tts",
+                "en-US",
+                options={tts.ATTR_VOICE: "voice1", wyoming.ATTR_SPEAKER: "speaker1"},
+            ),
+        )
+        assert mock_client.written == snapshot