239 lines
6.5 KiB
Python
239 lines
6.5 KiB
Python
"""Support for the Amazon Polly text to speech service."""
|
|
import logging
|
|
|
|
import voluptuous as vol
|
|
|
|
from homeassistant.components.tts import PLATFORM_SCHEMA, Provider
|
|
import homeassistant.helpers.config_validation as cv
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
CONF_REGION = "region_name"
|
|
CONF_ACCESS_KEY_ID = "aws_access_key_id"
|
|
CONF_SECRET_ACCESS_KEY = "aws_secret_access_key"
|
|
CONF_PROFILE_NAME = "profile_name"
|
|
ATTR_CREDENTIALS = "credentials"
|
|
|
|
DEFAULT_REGION = "us-east-1"
|
|
SUPPORTED_REGIONS = [
|
|
"us-east-1",
|
|
"us-east-2",
|
|
"us-west-1",
|
|
"us-west-2",
|
|
"ca-central-1",
|
|
"eu-west-1",
|
|
"eu-central-1",
|
|
"eu-west-2",
|
|
"eu-west-3",
|
|
"ap-southeast-1",
|
|
"ap-southeast-2",
|
|
"ap-northeast-2",
|
|
"ap-northeast-1",
|
|
"ap-south-1",
|
|
"sa-east-1",
|
|
]
|
|
|
|
CONF_VOICE = "voice"
|
|
CONF_OUTPUT_FORMAT = "output_format"
|
|
CONF_SAMPLE_RATE = "sample_rate"
|
|
CONF_TEXT_TYPE = "text_type"
|
|
|
|
SUPPORTED_VOICES = [
|
|
"Zhiyu", # Chinese
|
|
"Mads",
|
|
"Naja", # Danish
|
|
"Ruben",
|
|
"Lotte", # Dutch
|
|
"Russell",
|
|
"Nicole", # English Australian
|
|
"Brian",
|
|
"Amy",
|
|
"Emma", # English
|
|
"Aditi",
|
|
"Raveena", # English, Indian
|
|
"Joey",
|
|
"Justin",
|
|
"Matthew",
|
|
"Ivy",
|
|
"Joanna",
|
|
"Kendra",
|
|
"Kimberly",
|
|
"Salli", # English
|
|
"Geraint", # English Welsh
|
|
"Mathieu",
|
|
"Celine",
|
|
"Lea", # French
|
|
"Chantal", # French Canadian
|
|
"Hans",
|
|
"Marlene",
|
|
"Vicki", # German
|
|
"Aditi", # Hindi
|
|
"Karl",
|
|
"Dora", # Icelandic
|
|
"Giorgio",
|
|
"Carla",
|
|
"Bianca", # Italian
|
|
"Takumi",
|
|
"Mizuki", # Japanese
|
|
"Seoyeon", # Korean
|
|
"Liv", # Norwegian
|
|
"Jacek",
|
|
"Jan",
|
|
"Ewa",
|
|
"Maja", # Polish
|
|
"Ricardo",
|
|
"Vitoria", # Portuguese, Brazilian
|
|
"Cristiano",
|
|
"Ines", # Portuguese, European
|
|
"Carmen", # Romanian
|
|
"Maxim",
|
|
"Tatyana", # Russian
|
|
"Enrique",
|
|
"Conchita",
|
|
"Lucia", # Spanish European
|
|
"Mia", # Spanish Mexican
|
|
"Miguel",
|
|
"Penelope", # Spanish US
|
|
"Astrid", # Swedish
|
|
"Filiz", # Turkish
|
|
"Gwyneth", # Welsh
|
|
]
|
|
|
|
SUPPORTED_OUTPUT_FORMATS = ["mp3", "ogg_vorbis", "pcm"]
|
|
|
|
SUPPORTED_SAMPLE_RATES = ["8000", "16000", "22050"]
|
|
|
|
SUPPORTED_SAMPLE_RATES_MAP = {
|
|
"mp3": ["8000", "16000", "22050"],
|
|
"ogg_vorbis": ["8000", "16000", "22050"],
|
|
"pcm": ["8000", "16000"],
|
|
}
|
|
|
|
SUPPORTED_TEXT_TYPES = ["text", "ssml"]
|
|
|
|
CONTENT_TYPE_EXTENSIONS = {"audio/mpeg": "mp3", "audio/ogg": "ogg", "audio/pcm": "pcm"}
|
|
|
|
DEFAULT_VOICE = "Joanna"
|
|
DEFAULT_OUTPUT_FORMAT = "mp3"
|
|
DEFAULT_TEXT_TYPE = "text"
|
|
|
|
DEFAULT_SAMPLE_RATES = {"mp3": "22050", "ogg_vorbis": "22050", "pcm": "16000"}
|
|
|
|
PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
|
|
{
|
|
vol.Optional(CONF_REGION, default=DEFAULT_REGION): vol.In(SUPPORTED_REGIONS),
|
|
vol.Inclusive(CONF_ACCESS_KEY_ID, ATTR_CREDENTIALS): cv.string,
|
|
vol.Inclusive(CONF_SECRET_ACCESS_KEY, ATTR_CREDENTIALS): cv.string,
|
|
vol.Exclusive(CONF_PROFILE_NAME, ATTR_CREDENTIALS): cv.string,
|
|
vol.Optional(CONF_VOICE, default=DEFAULT_VOICE): vol.In(SUPPORTED_VOICES),
|
|
vol.Optional(CONF_OUTPUT_FORMAT, default=DEFAULT_OUTPUT_FORMAT): vol.In(
|
|
SUPPORTED_OUTPUT_FORMATS
|
|
),
|
|
vol.Optional(CONF_SAMPLE_RATE): vol.All(
|
|
cv.string, vol.In(SUPPORTED_SAMPLE_RATES)
|
|
),
|
|
vol.Optional(CONF_TEXT_TYPE, default=DEFAULT_TEXT_TYPE): vol.In(
|
|
SUPPORTED_TEXT_TYPES
|
|
),
|
|
}
|
|
)
|
|
|
|
|
|
def get_engine(hass, config):
|
|
"""Set up Amazon Polly speech component."""
|
|
output_format = config.get(CONF_OUTPUT_FORMAT)
|
|
sample_rate = config.get(CONF_SAMPLE_RATE, DEFAULT_SAMPLE_RATES[output_format])
|
|
if sample_rate not in SUPPORTED_SAMPLE_RATES_MAP.get(output_format):
|
|
_LOGGER.error(
|
|
"%s is not a valid sample rate for %s", sample_rate, output_format
|
|
)
|
|
return None
|
|
|
|
config[CONF_SAMPLE_RATE] = sample_rate
|
|
|
|
import boto3
|
|
|
|
profile = config.get(CONF_PROFILE_NAME)
|
|
|
|
if profile is not None:
|
|
boto3.setup_default_session(profile_name=profile)
|
|
|
|
aws_config = {
|
|
CONF_REGION: config.get(CONF_REGION),
|
|
CONF_ACCESS_KEY_ID: config.get(CONF_ACCESS_KEY_ID),
|
|
CONF_SECRET_ACCESS_KEY: config.get(CONF_SECRET_ACCESS_KEY),
|
|
}
|
|
|
|
del config[CONF_REGION]
|
|
del config[CONF_ACCESS_KEY_ID]
|
|
del config[CONF_SECRET_ACCESS_KEY]
|
|
|
|
polly_client = boto3.client("polly", **aws_config)
|
|
|
|
supported_languages = []
|
|
|
|
all_voices = {}
|
|
|
|
all_voices_req = polly_client.describe_voices()
|
|
|
|
for voice in all_voices_req.get("Voices"):
|
|
all_voices[voice.get("Id")] = voice
|
|
if voice.get("LanguageCode") not in supported_languages:
|
|
supported_languages.append(voice.get("LanguageCode"))
|
|
|
|
return AmazonPollyProvider(polly_client, config, supported_languages, all_voices)
|
|
|
|
|
|
class AmazonPollyProvider(Provider):
|
|
"""Amazon Polly speech api provider."""
|
|
|
|
def __init__(self, polly_client, config, supported_languages, all_voices):
|
|
"""Initialize Amazon Polly provider for TTS."""
|
|
self.client = polly_client
|
|
self.config = config
|
|
self.supported_langs = supported_languages
|
|
self.all_voices = all_voices
|
|
self.default_voice = self.config.get(CONF_VOICE)
|
|
self.name = "Amazon Polly"
|
|
|
|
@property
|
|
def supported_languages(self):
|
|
"""Return a list of supported languages."""
|
|
return self.supported_langs
|
|
|
|
@property
|
|
def default_language(self):
|
|
"""Return the default language."""
|
|
return self.all_voices.get(self.default_voice).get("LanguageCode")
|
|
|
|
@property
|
|
def default_options(self):
|
|
"""Return dict include default options."""
|
|
return {CONF_VOICE: self.default_voice}
|
|
|
|
@property
|
|
def supported_options(self):
|
|
"""Return a list of supported options."""
|
|
return [CONF_VOICE]
|
|
|
|
def get_tts_audio(self, message, language=None, options=None):
|
|
"""Request TTS file from Polly."""
|
|
voice_id = options.get(CONF_VOICE, self.default_voice)
|
|
voice_in_dict = self.all_voices.get(voice_id)
|
|
if language != voice_in_dict.get("LanguageCode"):
|
|
_LOGGER.error("%s does not support the %s language", voice_id, language)
|
|
return None, None
|
|
|
|
resp = self.client.synthesize_speech(
|
|
OutputFormat=self.config[CONF_OUTPUT_FORMAT],
|
|
SampleRate=self.config[CONF_SAMPLE_RATE],
|
|
Text=message,
|
|
TextType=self.config[CONF_TEXT_TYPE],
|
|
VoiceId=voice_id,
|
|
)
|
|
|
|
return (
|
|
CONTENT_TYPE_EXTENSIONS[resp.get("ContentType")],
|
|
resp.get("AudioStream").read(),
|
|
)
|