Merge branch 'master' into master

pull/1679/head
Slowly-Grokking 2023-04-15 21:29:14 -05:00 committed by GitHub
commit 16553be539
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 77 additions and 0 deletions

View File

@ -93,6 +93,13 @@ IMAGE_PROVIDER=dalle
# HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
HUGGINGFACE_API_TOKEN=your-huggingface-api-token
################################################################################
### AUDIO TO TEXT PROVIDER
################################################################################
### HUGGINGFACE
HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h
################################################################################
### GIT Provider for repository actions
################################################################################

View File

@ -196,6 +196,19 @@ Use this to use TTS _(Text-to-Speech)_ for Auto-GPT
python -m autogpt --speak
```
### List of IDs with names from eleven labs, you can use the name or ID:
- Rachel : 21m00Tcm4TlvDq8ikWAM
- Domi : AZnzlk1XvdvUeBnXmlld
- Bella : EXAVITQu4vr4xnSDxMaL
- Antoni : ErXwobaYiN019PkySvjV
- Elli : MF3mGyEYCl7XYWbV9V6O
- Josh : TxGEqnHWrfWFTfGW9XjX
- Arnold : VR6AewLTigWG4xSOukaG
- Adam : pNInz6obpgDQGcFmaJgB
- Sam : yoZ06aMxZJJ28mfd3POQ
## OpenAI API Keys Configuration
Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys.

View File

@ -8,6 +8,7 @@ from autogpt.commands.improve_code import improve_code
from autogpt.commands.write_tests import write_tests
from autogpt.config import Config
from autogpt.commands.image_gen import generate_image
from autogpt.commands.audio_text import read_audio_from_file
from autogpt.commands.web_requests import scrape_links, scrape_text
from autogpt.commands.execute_code import execute_python_file, execute_shell
from autogpt.commands.file_operations import (
@ -180,6 +181,8 @@ def execute_command(command_name: str, arguments):
" shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
"in your config. Do not attempt to bypass the restriction."
)
elif command_name == "read_audio_from_file":
return read_audio_from_file(arguments["file"])
elif command_name == "generate_image":
return generate_image(arguments["prompt"])
elif command_name == "send_tweet":

View File

@ -0,0 +1,35 @@
import requests
import json
from autogpt.config import Config
from autogpt.commands.file_operations import safe_join
cfg = Config()
working_directory = "auto_gpt_workspace"
def read_audio_from_file(audio_path):
audio_path = safe_join(working_directory, audio_path)
with open(audio_path, "rb") as audio_file:
audio = audio_file.read()
return read_audio(audio)
def read_audio(audio):
model = cfg.huggingface_audio_to_text_model
api_url = f"https://api-inference.huggingface.co/models/{model}"
api_token = cfg.huggingface_api_token
headers = {"Authorization": f"Bearer {api_token}"}
if api_token is None:
raise ValueError("You need to set your Hugging Face API token in the config file.")
response = requests.post(
api_url,
headers=headers,
data=audio,
)
text = json.loads(response.content.decode("utf-8"))['text']
return "The audio says: " + text

View File

@ -72,6 +72,9 @@ class Config(metaclass=Singleton):
self.image_provider = os.getenv("IMAGE_PROVIDER")
self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
self.huggingface_audio_to_text_model = os.getenv(
"HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
)
# User agent headers to use when browsing web
# Some websites might just completely deny request with an error code if

View File

@ -82,6 +82,7 @@ def get_prompt() -> str:
),
("Execute Python File", "execute_python_file", {"file": "<file>"}),
("Generate Image", "generate_image", {"prompt": "<prompt>"}),
("Convert Audio to text", "read_audio_from_file", {"file": "<file>"}),
("Send Tweet", "send_tweet", {"text": "<text>"}),
]

View File

@ -22,11 +22,26 @@ class ElevenLabsSpeech(VoiceBase):
cfg = Config()
default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
voice_options = {
"Rachel": "21m00Tcm4TlvDq8ikWAM",
"Domi": "AZnzlk1XvdvUeBnXmlld",
"Bella": "EXAVITQu4vr4xnSDxMaL",
"Antoni": "ErXwobaYiN019PkySvjV",
"Elli": "MF3mGyEYCl7XYWbV9V6O",
"Josh": "TxGEqnHWrfWFTfGW9XjX",
"Arnold": "VR6AewLTigWG4xSOukaG",
"Adam": "pNInz6obpgDQGcFmaJgB",
"Sam": "yoZ06aMxZJJ28mfd3POQ",
}
self._headers = {
"Content-Type": "application/json",
"xi-api-key": cfg.elevenlabs_api_key,
}
self._voices = default_voices.copy()
if cfg.elevenlabs_voice_1_id in voice_options:
cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id]
if cfg.elevenlabs_voice_2_id in voice_options:
cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0)
self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)