Added a default (free) library for text to speech

Adds the gTTS (Google Text-to-Speech) Python library as a fallback for text-to-speech conversion in the speak.py file. The changes were made to ensure that users can still convert text to speech even if the ElevenLabs API key is not set or if the API encounters an error. Additionally, the requirements.txt file has been updated to include the new gTTS dependency.
2023-04-03 22:38:01 -07:00 · 2023-04-03 22:38:01 -07:00 · 68e4af8685
parent 4839a0d89b
commit 68e4af8685
2 changed files with 21 additions and 3 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -7,6 +7,7 @@ pyyaml==6.0
 readability-lxml==0.8.1
 requests
 tiktoken==0.3.3
+gTTS==2.3.1
 docker
 googlesearch-python
 google-api-python-client #(https://developers.google.com/custom-search/v1/overview) 
--- a/scripts/speak.py
+++ b/scripts/speak.py
@ -3,6 +3,8 @@ from playsound import playsound
 import requests
 from config import Config
 cfg = Config()
+import gtts
+

 # TODO: Nicer names for these ids
 voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
@ -12,10 +14,9 @@ tts_headers = {
    "xi-api-key": cfg.elevenlabs_api_key
 }

-def say_text(text, voice_index=0):
+def eleven_labs_speech(text, voice_index=0):
    tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format(
        voice_id=voices[voice_index])
-
    formatted_message = {"text": text}
    response = requests.post(
        tts_url, headers=tts_headers, json=formatted_message)
@ -24,8 +25,24 @@ def say_text(text, voice_index=0):
        with open("speech.mpeg", "wb") as f:
            f.write(response.content)
        playsound("speech.mpeg")
-        # Delete audio file
        os.remove("speech.mpeg")
+        return True
    else:
        print("Request failed with status code:", response.status_code)
        print("Response content:", response.content)
+        return False
+
+def gtts_speech(text):
+    tts = gtts.gTTS(text)
+    tts.save("speech.mp3")
+    playsound("speech.mp3")
+    os.remove("speech.mp3")
+
+def say_text(text, voice_index=0):
+    if not cfg.elevenlabs_api_key:
+        gtts_speech(text)
+    else:
+        success = eleven_labs_speech()
+        if not success:
+            gtts_speech(text)
+