Wyoming Piper 1.1 (#96490)

* Add voice/speaker options to Piper TTS

* Use description if available

* Fix tests

* Clean up if
pull/96545/head
Michael Hansen 2023-07-14 07:56:27 -05:00 committed by GitHub
parent 614f3c6a15
commit afdded58ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 92 additions and 13 deletions

View File

@ -7,11 +7,16 @@ from homeassistant.config_entries import ConfigEntry
from homeassistant.core import HomeAssistant
from homeassistant.exceptions import ConfigEntryNotReady
from .const import DOMAIN
from .const import ATTR_SPEAKER, DOMAIN
from .data import WyomingService
_LOGGER = logging.getLogger(__name__)
__all__ = [
"ATTR_SPEAKER",
"DOMAIN",
]
async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
"""Load Wyoming."""

View File

@ -5,3 +5,6 @@ DOMAIN = "wyoming"
SAMPLE_RATE = 16000
SAMPLE_WIDTH = 2
SAMPLE_CHANNELS = 1
# For multi-speaker voices, this is the name of the selected speaker.
ATTR_SPEAKER = "speaker"

View File

@ -5,5 +5,5 @@
"config_flow": true,
"documentation": "https://www.home-assistant.io/integrations/wyoming",
"iot_class": "local_push",
"requirements": ["wyoming==0.0.1"]
"requirements": ["wyoming==1.0.0"]
}

View File

@ -6,14 +6,14 @@ import wave
from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop
from wyoming.client import AsyncTcpClient
from wyoming.tts import Synthesize
from wyoming.tts import Synthesize, SynthesizeVoice
from homeassistant.components import tts
from homeassistant.config_entries import ConfigEntry
from homeassistant.core import HomeAssistant, callback
from homeassistant.helpers.entity_platform import AddEntitiesCallback
from .const import DOMAIN
from .const import ATTR_SPEAKER, DOMAIN
from .data import WyomingService
from .error import WyomingError
@ -57,10 +57,16 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
self._voices[language].append(
tts.Voice(
voice_id=voice.name,
name=voice.name,
name=voice.description or voice.name,
)
)
# Sort voices by name
for language in self._voices:
self._voices[language] = sorted(
self._voices[language], key=lambda v: v.name
)
self._supported_languages: list[str] = list(voice_languages)
self._attr_name = self._tts_service.name
@ -82,7 +88,7 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
@property
def supported_options(self):
"""Return list of supported options like voice, emotion."""
return [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE]
return [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE, ATTR_SPEAKER]
@property
def default_options(self):
@ -95,10 +101,18 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
return self._voices.get(language)
async def async_get_tts_audio(self, message, language, options):
"""Load TTS from UNIX socket."""
"""Load TTS from TCP socket."""
voice_name: str | None = options.get(tts.ATTR_VOICE)
voice_speaker: str | None = options.get(ATTR_SPEAKER)
try:
async with AsyncTcpClient(self.service.host, self.service.port) as client:
await client.write_event(Synthesize(message).event())
voice: SynthesizeVoice | None = None
if voice_name is not None:
voice = SynthesizeVoice(name=voice_name, speaker=voice_speaker)
synthesize = Synthesize(text=message, voice=voice)
await client.write_event(synthesize.event())
with io.BytesIO() as wav_io:
wav_writer: wave.Wave_write | None = None

View File

@ -2681,7 +2681,7 @@ wled==0.16.0
wolf-smartset==0.1.11
# homeassistant.components.wyoming
wyoming==0.0.1
wyoming==1.0.0
# homeassistant.components.xbox
xbox-webapi==2.0.11

View File

@ -1963,7 +1963,7 @@ wled==0.16.0
wolf-smartset==0.1.11
# homeassistant.components.wyoming
wyoming==0.0.1
wyoming==1.0.0
# homeassistant.components.xbox
xbox-webapi==2.0.11

View File

@ -1,16 +1,26 @@
"""Tests for the Wyoming integration."""
from wyoming.info import AsrModel, AsrProgram, Attribution, Info, TtsProgram, TtsVoice
from wyoming.info import (
AsrModel,
AsrProgram,
Attribution,
Info,
TtsProgram,
TtsVoice,
TtsVoiceSpeaker,
)
TEST_ATTR = Attribution(name="Test", url="http://www.test.com")
STT_INFO = Info(
asr=[
AsrProgram(
name="Test ASR",
description="Test ASR",
installed=True,
attribution=TEST_ATTR,
models=[
AsrModel(
name="Test Model",
description="Test Model",
installed=True,
attribution=TEST_ATTR,
languages=["en-US"],
@ -23,14 +33,17 @@ TTS_INFO = Info(
tts=[
TtsProgram(
name="Test TTS",
description="Test TTS",
installed=True,
attribution=TEST_ATTR,
voices=[
TtsVoice(
name="Test Voice",
description="Test Voice",
installed=True,
attribution=TEST_ATTR,
languages=["en-US"],
speakers=[TtsVoiceSpeaker(name="Test Speaker")],
)
],
)

View File

@ -21,3 +21,18 @@
}),
])
# ---
# name: test_voice_speaker
list([
dict({
'data': dict({
'text': 'Hello world',
'voice': dict({
'name': 'voice1',
'speaker': 'speaker1',
}),
}),
'payload': None,
'type': 'synthesize',
}),
])
# ---

View File

@ -8,7 +8,7 @@ import wave
import pytest
from wyoming.audio import AudioChunk, AudioStop
from homeassistant.components import tts
from homeassistant.components import tts, wyoming
from homeassistant.core import HomeAssistant
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers.entity_component import DATA_INSTANCES
@ -31,7 +31,11 @@ async def test_support(hass: HomeAssistant, init_wyoming_tts) -> None:
assert entity is not None
assert entity.supported_languages == ["en-US"]
assert entity.supported_options == [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE]
assert entity.supported_options == [
tts.ATTR_AUDIO_OUTPUT,
tts.ATTR_VOICE,
wyoming.ATTR_SPEAKER,
]
voices = entity.async_get_supported_voices("en-US")
assert len(voices) == 1
assert voices[0].name == "Test Voice"
@ -137,3 +141,28 @@ async def test_get_tts_audio_audio_oserror(
hass, "Hello world", "tts.test_tts", hass.config.language
),
)
async def test_voice_speaker(hass: HomeAssistant, init_wyoming_tts, snapshot) -> None:
"""Test using a different voice and speaker."""
audio = bytes(100)
audio_events = [
AudioChunk(audio=audio, rate=16000, width=2, channels=1).event(),
AudioStop().event(),
]
with patch(
"homeassistant.components.wyoming.tts.AsyncTcpClient",
MockAsyncTcpClient(audio_events),
) as mock_client:
await tts.async_get_media_source_audio(
hass,
tts.generate_media_source_id(
hass,
"Hello world",
"tts.test_tts",
"en-US",
options={tts.ATTR_VOICE: "voice1", wyoming.ATTR_SPEAKER: "speaker1"},
),
)
assert mock_client.written == snapshot