87 lines
2.8 KiB
Python
87 lines
2.8 KiB
Python
"""ElevenLabs speech module"""
|
|
import os
|
|
|
|
import requests
|
|
from playsound import playsound
|
|
|
|
from autogpt.config import Config
|
|
from autogpt.speech.base import VoiceBase
|
|
|
|
PLACEHOLDERS = {"your-voice-id"}
|
|
|
|
|
|
class ElevenLabsSpeech(VoiceBase):
|
|
"""ElevenLabs speech class"""
|
|
|
|
def _setup(self) -> None:
|
|
"""Setup the voices, API key, etc.
|
|
|
|
Returns:
|
|
None: None
|
|
"""
|
|
|
|
cfg = Config()
|
|
default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
|
|
voice_options = {
|
|
"Rachel": "21m00Tcm4TlvDq8ikWAM",
|
|
"Domi": "AZnzlk1XvdvUeBnXmlld",
|
|
"Bella": "EXAVITQu4vr4xnSDxMaL",
|
|
"Antoni": "ErXwobaYiN019PkySvjV",
|
|
"Elli": "MF3mGyEYCl7XYWbV9V6O",
|
|
"Josh": "TxGEqnHWrfWFTfGW9XjX",
|
|
"Arnold": "VR6AewLTigWG4xSOukaG",
|
|
"Adam": "pNInz6obpgDQGcFmaJgB",
|
|
"Sam": "yoZ06aMxZJJ28mfd3POQ",
|
|
}
|
|
self._headers = {
|
|
"Content-Type": "application/json",
|
|
"xi-api-key": cfg.elevenlabs_api_key,
|
|
}
|
|
self._voices = default_voices.copy()
|
|
if cfg.elevenlabs_voice_1_id in voice_options:
|
|
cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id]
|
|
if cfg.elevenlabs_voice_2_id in voice_options:
|
|
cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
|
|
self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0)
|
|
self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)
|
|
|
|
def _use_custom_voice(self, voice, voice_index) -> None:
|
|
"""Use a custom voice if provided and not a placeholder
|
|
|
|
Args:
|
|
voice (str): The voice ID
|
|
voice_index (int): The voice index
|
|
|
|
Returns:
|
|
None: None
|
|
"""
|
|
# Placeholder values that should be treated as empty
|
|
if voice and voice not in PLACEHOLDERS:
|
|
self._voices[voice_index] = voice
|
|
|
|
def _speech(self, text: str, voice_index: int = 0) -> bool:
|
|
"""Speak text using elevenlabs.io's API
|
|
|
|
Args:
|
|
text (str): The text to speak
|
|
voice_index (int, optional): The voice to use. Defaults to 0.
|
|
|
|
Returns:
|
|
bool: True if the request was successful, False otherwise
|
|
"""
|
|
tts_url = (
|
|
f"https://api.elevenlabs.io/v1/text-to-speech/{self._voices[voice_index]}"
|
|
)
|
|
response = requests.post(tts_url, headers=self._headers, json={"text": text})
|
|
|
|
if response.status_code == 200:
|
|
with open("speech.mpeg", "wb") as f:
|
|
f.write(response.content)
|
|
playsound("speech.mpeg", True)
|
|
os.remove("speech.mpeg")
|
|
return True
|
|
else:
|
|
print("Request failed with status code:", response.status_code)
|
|
print("Response content:", response.content)
|
|
return False
|