Merge branch 'master' into master

2023-04-15 21:29:14 -05:00 · 2023-04-15 21:29:14 -05:00 · 16553be539
parent 6e9cc463b3 4daa083fd3
commit 16553be539
7 changed files with 77 additions and 0 deletions
--- a/.env.template
+++ b/.env.template
@ -93,6 +93,13 @@ IMAGE_PROVIDER=dalle
 # HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
 HUGGINGFACE_API_TOKEN=your-huggingface-api-token

+################################################################################
+### AUDIO TO TEXT PROVIDER
+################################################################################
+
+### HUGGINGFACE
+HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h
+
 ################################################################################
 ### GIT Provider for repository actions
 ################################################################################
--- a/README.md
+++ b/README.md
@ -196,6 +196,19 @@ Use this to use TTS _(Text-to-Speech)_ for Auto-GPT
 python -m autogpt --speak
 ```

+### List of IDs with names from eleven labs, you can use the name or ID:
+
+- Rachel : 21m00Tcm4TlvDq8ikWAM
+- Domi : AZnzlk1XvdvUeBnXmlld
+- Bella : EXAVITQu4vr4xnSDxMaL
+- Antoni : ErXwobaYiN019PkySvjV
+- Elli : MF3mGyEYCl7XYWbV9V6O
+- Josh : TxGEqnHWrfWFTfGW9XjX
+- Arnold : VR6AewLTigWG4xSOukaG
+- Adam : pNInz6obpgDQGcFmaJgB
+- Sam : yoZ06aMxZJJ28mfd3POQ
+
+
 ## OpenAI API Keys Configuration

 Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys.
--- a/autogpt/app.py
+++ b/autogpt/app.py
@ -8,6 +8,7 @@ from autogpt.commands.improve_code import improve_code
 from autogpt.commands.write_tests import write_tests
 from autogpt.config import Config
 from autogpt.commands.image_gen import generate_image
+from autogpt.commands.audio_text import read_audio_from_file
 from autogpt.commands.web_requests import scrape_links, scrape_text
 from autogpt.commands.execute_code import execute_python_file, execute_shell
 from autogpt.commands.file_operations import (
@ -180,6 +181,8 @@ def execute_command(command_name: str, arguments):
                    " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
                    "in your config. Do not attempt to bypass the restriction."
                )
+        elif command_name == "read_audio_from_file":
+            return read_audio_from_file(arguments["file"])
        elif command_name == "generate_image":
            return generate_image(arguments["prompt"])
        elif command_name == "send_tweet":
--- a/autogpt/commands/audio_text.py
+++ b/autogpt/commands/audio_text.py
@ -0,0 +1,35 @@
+import requests
+import json
+
+from autogpt.config import Config
+from autogpt.commands.file_operations import safe_join
+
+cfg = Config()
+
+working_directory = "auto_gpt_workspace"
+
+
+def read_audio_from_file(audio_path):
+    audio_path = safe_join(working_directory, audio_path)
+    with open(audio_path, "rb") as audio_file:
+        audio = audio_file.read()
+    return read_audio(audio)
+
+
+def read_audio(audio):
+    model = cfg.huggingface_audio_to_text_model
+    api_url = f"https://api-inference.huggingface.co/models/{model}"
+    api_token = cfg.huggingface_api_token
+    headers = {"Authorization": f"Bearer {api_token}"}
+
+    if api_token is None:
+        raise ValueError("You need to set your Hugging Face API token in the config file.")
+
+    response = requests.post(
+        api_url,
+        headers=headers,
+        data=audio,
+    )
+
+    text = json.loads(response.content.decode("utf-8"))['text']
+    return "The audio says: " + text
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@ -72,6 +72,9 @@ class Config(metaclass=Singleton):

        self.image_provider = os.getenv("IMAGE_PROVIDER")
        self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
+        self.huggingface_audio_to_text_model = os.getenv(
+            "HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
+        )

        # User agent headers to use when browsing web
        # Some websites might just completely deny request with an error code if
--- a/autogpt/prompt.py
+++ b/autogpt/prompt.py
@ -82,6 +82,7 @@ def get_prompt() -> str:
        ),
        ("Execute Python File", "execute_python_file", {"file": "<file>"}),
        ("Generate Image", "generate_image", {"prompt": "<prompt>"}),
+        ("Convert Audio to text", "read_audio_from_file", {"file": "<file>"}),
        ("Send Tweet", "send_tweet", {"text": "<text>"}),

    ]
--- a/autogpt/speech/eleven_labs.py
+++ b/autogpt/speech/eleven_labs.py
@ -22,11 +22,26 @@ class ElevenLabsSpeech(VoiceBase):

        cfg = Config()
        default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
+        voice_options = {
+            "Rachel": "21m00Tcm4TlvDq8ikWAM",
+            "Domi": "AZnzlk1XvdvUeBnXmlld",
+            "Bella": "EXAVITQu4vr4xnSDxMaL",
+            "Antoni": "ErXwobaYiN019PkySvjV",
+            "Elli": "MF3mGyEYCl7XYWbV9V6O",
+            "Josh": "TxGEqnHWrfWFTfGW9XjX",
+            "Arnold": "VR6AewLTigWG4xSOukaG",
+            "Adam": "pNInz6obpgDQGcFmaJgB",
+            "Sam": "yoZ06aMxZJJ28mfd3POQ",
+        }
        self._headers = {
            "Content-Type": "application/json",
            "xi-api-key": cfg.elevenlabs_api_key,
        }
        self._voices = default_voices.copy()
+        if cfg.elevenlabs_voice_1_id in voice_options:
+            cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id]
+        if cfg.elevenlabs_voice_2_id in voice_options:
+            cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
        self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0)
        self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)