Transcribing audio
parent
ff5b8f1490
commit
9696fc622c
|
@ -93,6 +93,13 @@ IMAGE_PROVIDER=dalle
|
||||||
# HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
|
# HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
|
||||||
HUGGINGFACE_API_TOKEN=your-huggingface-api-token
|
HUGGINGFACE_API_TOKEN=your-huggingface-api-token
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### AUDIO TO TEXT PROVIDER
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
### HUGGINGFACE
|
||||||
|
HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### GIT Provider for repository actions
|
### GIT Provider for repository actions
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
|
@ -8,6 +8,7 @@ from autogpt.commands.improve_code import improve_code
|
||||||
from autogpt.commands.write_tests import write_tests
|
from autogpt.commands.write_tests import write_tests
|
||||||
from autogpt.config import Config
|
from autogpt.config import Config
|
||||||
from autogpt.commands.image_gen import generate_image
|
from autogpt.commands.image_gen import generate_image
|
||||||
|
from autogpt.commands.audio_text import read_audio_from_file
|
||||||
from autogpt.commands.web_requests import scrape_links, scrape_text
|
from autogpt.commands.web_requests import scrape_links, scrape_text
|
||||||
from autogpt.commands.execute_code import execute_python_file, execute_shell
|
from autogpt.commands.execute_code import execute_python_file, execute_shell
|
||||||
from autogpt.commands.file_operations import (
|
from autogpt.commands.file_operations import (
|
||||||
|
@ -179,6 +180,8 @@ def execute_command(command_name: str, arguments):
|
||||||
" shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
|
" shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
|
||||||
"in your config. Do not attempt to bypass the restriction."
|
"in your config. Do not attempt to bypass the restriction."
|
||||||
)
|
)
|
||||||
|
elif command_name == "read_audio_from_file":
|
||||||
|
return read_audio_from_file(arguments["file"])
|
||||||
elif command_name == "generate_image":
|
elif command_name == "generate_image":
|
||||||
return generate_image(arguments["prompt"])
|
return generate_image(arguments["prompt"])
|
||||||
elif command_name == "do_nothing":
|
elif command_name == "do_nothing":
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
from autogpt.config import Config
|
||||||
|
from autogpt.commands.file_operations import safe_join
|
||||||
|
|
||||||
|
cfg = Config()
|
||||||
|
|
||||||
|
working_directory = "auto_gpt_workspace"
|
||||||
|
|
||||||
|
|
||||||
|
def read_audio_from_file(audio_path):
|
||||||
|
audio_path = safe_join(working_directory, audio_path)
|
||||||
|
with open(audio_path, "rb") as audio_file:
|
||||||
|
audio = audio_file.read()
|
||||||
|
return read_audio(audio)
|
||||||
|
|
||||||
|
|
||||||
|
def read_audio(audio):
|
||||||
|
model = cfg.huggingface_audio_to_text_model
|
||||||
|
api_url = f"https://api-inference.huggingface.co/models/{model}"
|
||||||
|
api_token = cfg.huggingface_api_token
|
||||||
|
headers = {"Authorization": f"Bearer {api_token}"}
|
||||||
|
|
||||||
|
if api_token is None:
|
||||||
|
raise ValueError("You need to set your Hugging Face API token in the config file.")
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
api_url,
|
||||||
|
headers=headers,
|
||||||
|
data=audio,
|
||||||
|
)
|
||||||
|
|
||||||
|
text = json.loads(response.content.decode("utf-8"))['text']
|
||||||
|
return "The audio says: " + text
|
|
@ -72,6 +72,9 @@ class Config(metaclass=Singleton):
|
||||||
|
|
||||||
self.image_provider = os.getenv("IMAGE_PROVIDER")
|
self.image_provider = os.getenv("IMAGE_PROVIDER")
|
||||||
self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
||||||
|
self.huggingface_audio_to_text_model = os.getenv(
|
||||||
|
"HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
|
||||||
|
)
|
||||||
|
|
||||||
# User agent headers to use when browsing web
|
# User agent headers to use when browsing web
|
||||||
# Some websites might just completely deny request with an error code if
|
# Some websites might just completely deny request with an error code if
|
||||||
|
|
|
@ -82,6 +82,7 @@ def get_prompt() -> str:
|
||||||
),
|
),
|
||||||
("Execute Python File", "execute_python_file", {"file": "<file>"}),
|
("Execute Python File", "execute_python_file", {"file": "<file>"}),
|
||||||
("Generate Image", "generate_image", {"prompt": "<prompt>"}),
|
("Generate Image", "generate_image", {"prompt": "<prompt>"}),
|
||||||
|
("Convert Audio to text", "read_audio_from_file", {"image": "<file>"}),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Only add shell command to the prompt if the AI is allowed to execute it
|
# Only add shell command to the prompt if the AI is allowed to execute it
|
||||||
|
|
Loading…
Reference in New Issue