Added new env variable and speech function for alternative TTS voice

pull/754/head
meta-fx 2023-04-10 20:00:43 -05:00
parent e62a3be294
commit 0cf790b633
3 changed files with 35 additions and 5 deletions

View File

@ -13,3 +13,4 @@ OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure
IMAGE_PROVIDER=dalle
HUGGINGFACE_API_TOKEN=
USE_MAC_OS_TTS=False
USE_BRIAN_TTS=False

View File

@ -57,7 +57,10 @@ class Config(metaclass=Singleton):
self.use_mac_os_tts = False
self.use_mac_os_tts = os.getenv("USE_MAC_OS_TTS")
self.use_brian_tts = False
self.use_brian_tts = os.getenv("USE_BRIAN_TTS")
self.google_api_key = os.getenv("GOOGLE_API_KEY")
self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
@ -69,11 +72,13 @@ class Config(metaclass=Singleton):
# User agent headers to use when browsing web
# Some websites might just completely deny request with an error code if no user agent was found.
self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
self.user_agent_header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
self.redis_host = os.getenv("REDIS_HOST", "localhost")
self.redis_port = os.getenv("REDIS_PORT", "6379")
self.redis_password = os.getenv("REDIS_PASSWORD", "")
self.wipe_redis_on_start = os.getenv("WIPE_REDIS_ON_START", "True") == 'True'
self.wipe_redis_on_start = os.getenv(
"WIPE_REDIS_ON_START", "True") == 'True'
self.memory_index = os.getenv("MEMORY_INDEX", 'auto-gpt')
# Note that indexes must be created on db 0 in redis, this is not configureable.

View File

@ -1,9 +1,9 @@
import gtts
import os
from playsound import playsound
import requests
from config import Config
cfg = Config()
import gtts
# TODO: Nicer names for these ids
@ -14,6 +14,7 @@ tts_headers = {
"xi-api-key": cfg.elevenlabs_api_key
}
def eleven_labs_speech(text, voice_index=0):
"""Speak text using elevenlabs.io's API"""
tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format(
@ -33,23 +34,46 @@ def eleven_labs_speech(text, voice_index=0):
print("Response content:", response.content)
return False
def brian_speech(text):
"""Speak text using Brian with the streamelements API"""
tts_url = f"https://api.streamelements.com/kappa/v2/speech?voice=Brian&text={text}"
response = requests.get(tts_url)
if response.status_code == 200:
with open("speech.mp3", "wb") as f:
f.write(response.content)
playsound("speech.mp3")
os.remove("speech.mp3")
return True
else:
print("Request failed with status code:", response.status_code)
print("Response content:", response.content)
return False
def gtts_speech(text):
tts = gtts.gTTS(text)
tts.save("speech.mp3")
playsound("speech.mp3")
os.remove("speech.mp3")
def macos_tts_speech(text):
os.system(f'say "{text}"')
def say_text(text, voice_index=0):
if not cfg.elevenlabs_api_key:
if cfg.use_mac_os_tts == 'True':
macos_tts_speech(text)
elif cfg.use_brian_tts == 'True':
success = brian_speech(text)
if not success:
gtts_speech(text)
else:
gtts_speech(text)
else:
success = eleven_labs_speech(text, voice_index)
if not success:
gtts_speech(text)