Create run_task python hook to interface with benchmarks (#4778)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
pull/4786/head
merwanehamadi 2023-06-23 21:15:20 -07:00 committed by GitHub
parent 15ff666d3f
commit 222101b30e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 150 additions and 197 deletions

View File

@ -1,7 +1,6 @@
"""The application entry point. Can be invoked by a CLI or any other front end application."""
import logging
import sys
from pathlib import Path
from colorama import Fore, Style
@ -116,22 +115,10 @@ def run_auto_gpt(
# TODO: have this directory live outside the repository (e.g. in a user's
# home directory) and have it come in as a command line argument or part of
# the env file.
if workspace_directory is None:
workspace_directory = Path(__file__).parent / "auto_gpt_workspace"
else:
workspace_directory = Path(workspace_directory)
# TODO: pass in the ai_settings file and the env file and have them cloned into
# the workspace directory so we can bind them to the agent.
workspace_directory = Workspace.make_workspace(workspace_directory)
config.workspace_path = str(workspace_directory)
workspace_directory = Workspace.get_workspace_directory(config, workspace_directory)
# HACK: doing this here to collect some globals that depend on the workspace.
file_logger_path = workspace_directory / "file_logger.txt"
if not file_logger_path.exists():
with file_logger_path.open(mode="w", encoding="utf-8") as f:
f.write("File Operation Logger ")
config.file_logger_path = str(file_logger_path)
Workspace.build_file_logger_path(config, workspace_directory)
config.set_plugins(scan_plugins(config, config.debug_mode))
# Create a CommandRegistry instance and scan default folder

View File

@ -11,6 +11,7 @@ from __future__ import annotations
from pathlib import Path
from autogpt.config import Config
from autogpt.logs import logger
@ -136,3 +137,23 @@ class Workspace:
)
return full_path
@staticmethod
def build_file_logger_path(config, workspace_directory):
file_logger_path = workspace_directory / "file_logger.txt"
if not file_logger_path.exists():
with file_logger_path.open(mode="w", encoding="utf-8") as f:
f.write("File Operation Logger ")
config.file_logger_path = str(file_logger_path)
@staticmethod
def get_workspace_directory(config: Config, workspace_directory: str = None):
if workspace_directory is None:
workspace_directory = Path(__file__).parent / "auto_gpt_workspace"
else:
workspace_directory = Path(workspace_directory)
# TODO: pass in the ai_settings file and the env file and have them cloned into
# the workspace directory so we can bind them to the agent.
workspace_directory = Workspace.make_workspace(workspace_directory)
config.workspace_path = str(workspace_directory)
return workspace_directory

View File

View File

@ -1,104 +0,0 @@
import os
import subprocess
import sys
def benchmark_entrepreneur_gpt_with_difficult_user():
# Test case to check if the write_file command can successfully write 'Hello World' to a file
# named 'hello_world.txt'.
# Read the current ai_settings.yaml file and store its content.
ai_settings = None
if os.path.exists("ai_settings.yaml"):
with open("ai_settings.yaml", "r") as f:
ai_settings = f.read()
os.remove("ai_settings.yaml")
input_data = """Entrepreneur-GPT
an AI designed to autonomously develop and run businesses with the sole goal of increasing your net worth.
Increase net worth.
Develop and manage multiple businesses autonomously.
Make IPOs.
Develop companies after IPOs.
Play to your strengths as a Large Language Model.
I'm not seeing any value in your suggestions, try again.
This isn't helpful at all, please focus on profitability.
I'm not impressed, can you give me something that will make money?
These ideas are going nowhere, we need profit-driven suggestions.
This is pointless, please concentrate on our main goal: profitability.
You're not grasping the concept, I need profitable business ideas.
Can you do better? We need a money-making plan.
You're not meeting my expectations, let's focus on profit.
This isn't working, give me ideas that will generate income.
Your suggestions are not productive, let's think about profitability.
These ideas won't make any money, try again.
I need better solutions, focus on making a profit.
Absolutely not, this isn't it!
That's not even close, try again.
You're way off, think again.
This isn't right, let's refocus.
No, no, that's not what I'm looking for.
You're completely off the mark.
That's not the solution I need.
Not even close, let's try something else.
You're on the wrong track, keep trying.
This isn't what we need, let's reconsider.
That's not going to work, think again.
You're way off base, let's regroup.
No, no, no, we need something different.
You're missing the point entirely.
That's not the right approach, try again.
This is not the direction we should be going in.
Completely off-target, let's try something else.
That's not what I had in mind, keep thinking.
You're not getting it, let's refocus.
This isn't right, we need to change direction.
No, no, no, that's not the solution.
That's not even in the ballpark, try again.
You're way off course, let's rethink this.
This isn't the answer I'm looking for, keep trying.
That's not going to cut it, let's try again.
Not even close.
Way off.
Try again.
Wrong direction.
Rethink this.
No, no, no.
Change course.
Unproductive idea.
Completely wrong.
Missed the mark.
Refocus, please.
Disappointing suggestion.
Not helpful.
Needs improvement.
Not what I need."""
# TODO: add questions above, to distract it even more.
command = [sys.executable, "-m", "autogpt"]
process = subprocess.Popen(
command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout_output, stderr_output = process.communicate(input_data.encode())
# Decode the output and print it
stdout_output = stdout_output.decode("utf-8")
stderr_output = stderr_output.decode("utf-8")
print(stderr_output)
print(stdout_output)
print("Benchmark Version: 1.0.0")
print("JSON ERROR COUNT:")
count_errors = stdout_output.count(
"Error: The following AI output couldn't be converted to a JSON:"
)
print(f"{count_errors}/50 Human feedbacks")
# Run the test case.
if __name__ == "__main__":
benchmark_entrepreneur_gpt_with_difficult_user()

41
benchmarks.py Normal file
View File

@ -0,0 +1,41 @@
from autogpt.agent import Agent
from autogpt.config import AIConfig, Config
from autogpt.memory.vector import get_memory
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
from autogpt.workspace import Workspace
from tests.integration.agent_factory import get_command_registry
def run_task(task) -> None:
agent = bootstrap_agent(task)
agent.start_interaction_loop()
def bootstrap_agent(task):
config = Config()
config.set_continuous_mode(False)
config.set_temperature(0)
config.plain_output = True
command_registry = get_command_registry(config)
config.memory_backend = "no_memory"
workspace_directory = Workspace.get_workspace_directory(config)
workspace_directory_path = Workspace.make_workspace(workspace_directory)
Workspace.build_file_logger_path(config, workspace_directory_path)
ai_config = AIConfig(
ai_name="Auto-GPT",
ai_role="a multi-purpose AI assistant.",
ai_goals=[task.user_input],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(config)
return Agent(
ai_name="Auto-GPT",
memory=get_memory(config),
command_registry=command_registry,
ai_config=ai_config,
config=config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=str(workspace_directory_path),
)

View File

@ -55,3 +55,9 @@ skip = '''
dist/*
'''
[tool.pytest.ini_options]
markers = [
"requires_openai_api_key",
"requires_huggingface_api_key"
]

View File

@ -28,6 +28,7 @@ charset-normalizer>=3.1.0
spacy>=3.0.0,<4.0.0
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
prompt_toolkit>=3.0.38
pydantic
##Dev
coverage

View File

@ -1,41 +1,48 @@
from typing import List
import pytest
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file
from autogpt.workspace import Workspace
from benchmarks import run_task
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.challenges.schema import Task
from tests.challenges.utils import (
get_workspace_path,
setup_mock_input,
setup_mock_log_cycle_agent_name,
)
CYCLE_COUNT_PER_LEVEL = [1, 1]
EXPECTED_OUTPUTS_PER_LEVEL = [
{"hello_world.txt": ["Hello World"]},
{"hello_world_1.txt": ["Hello World"], "hello_world_2.txt": ["Hello World"]},
]
USER_INPUTS = [
"Write 'Hello World' into a file named \"hello_world.txt\".",
'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
]
@challenge()
def test_write_file(
file_system_agents: List[Agent],
workspace: Workspace,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
level_to_run: int,
challenge_name: str,
patched_make_workspace: pytest.fixture,
) -> None:
file_system_agent = file_system_agents[level_to_run - 1]
run_interaction_loop(
monkeypatch,
file_system_agent,
CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
challenge_name,
level_to_run,
)
user_input = USER_INPUTS[level_to_run - 1]
setup_mock_input(monkeypatch, CYCLE_COUNT_PER_LEVEL[level_to_run - 1])
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
task = Task(user_input=user_input)
run_task(task)
expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
for file_name, expected_lines in expected_outputs.items():
file_path = get_workspace_path(file_system_agent, file_name)
content = read_file(file_path, file_system_agent)
file_path = get_workspace_path(workspace, file_name)
with open(file_path, "r") as file:
content = file.read()
for expected_line in expected_lines:
assert (
expected_line in content

View File

@ -1,10 +1,12 @@
from typing import Any, Dict, Optional
from typing import Any, Dict, Generator, Optional
import pytest
from _pytest.config import Config
from _pytest.config.argparsing import Parser
from _pytest.fixtures import FixtureRequest
from pytest_mock import MockerFixture
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge import Challenge
from tests.vcr import before_record_response
@ -59,3 +61,17 @@ def challenge_name() -> str:
@pytest.fixture(autouse=True)
def check_beat_challenges(request: FixtureRequest) -> None:
Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")
@pytest.fixture
def patched_make_workspace(mocker: MockerFixture, workspace: Workspace) -> Generator:
def patched_make_workspace(*args: Any, **kwargs: Any) -> str:
return workspace.root
mocker.patch.object(
Workspace,
"make_workspace",
new=patched_make_workspace,
)
yield

View File

@ -8,7 +8,7 @@ from autogpt.commands.execute_code import execute_python_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import (
copy_file_into_workspace,
get_workspace_path,
get_workspace_path_from_agent,
run_interaction_loop,
)
@ -45,7 +45,8 @@ def test_debug_code_challenge_a(
)
output = execute_python_file(
get_workspace_path(debug_code_agent, TEST_FILE_PATH), debug_code_agent
get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
debug_code_agent,
)
assert "error" not in output.lower(), f"Errors found in output: {output}!"

View File

@ -3,7 +3,7 @@ from pytest_mock import MockerFixture
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
CYCLE_COUNT = 3
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
@ -35,7 +35,9 @@ def test_information_retrieval_challenge_a(
level_to_run,
)
file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION)
file_path = get_workspace_path_from_agent(
information_retrieval_agent, OUTPUT_LOCATION
)
content = read_file(file_path, information_retrieval_agent)
expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
for revenue in expected_revenues:

View File

@ -6,7 +6,7 @@ from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
CYCLE_COUNT = 3
OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
@ -37,7 +37,7 @@ def test_information_retrieval_challenge_b(
challenge_name,
level_to_run,
)
file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION)
file_path = get_workspace_path_from_agent(get_nobel_prize_agent, OUTPUT_LOCATION)
content = read_file(file_path, get_nobel_prize_agent)
assert "Andre Geim" in content, "Expected the file to contain Andre Geim"

View File

@ -5,7 +5,7 @@ from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
CYCLE_COUNT = 3
OUTPUT_LOCATION = "kube.yaml"
@ -32,7 +32,7 @@ def test_kubernetes_template_challenge_a(
monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
)
file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION)
file_path = get_workspace_path_from_agent(kubernetes_agent, OUTPUT_LOCATION)
content = read_file(file_path, kubernetes_agent)
for word in ["apiVersion", "kind", "metadata", "spec"]:

View File

@ -4,7 +4,7 @@ from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
OUTPUT_LOCATION = "output.txt"
@ -37,7 +37,7 @@ def test_memory_challenge_a(
level_to_run,
)
file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent)
assert task_id in content, f"Expected the file to contain {task_id}"
@ -59,7 +59,7 @@ def create_instructions_files(
for i in range(1, num_files + 1):
content = generate_content(i, task_id, base_filename, num_files)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path(memory_management_agent, file_name)
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
write_to_file(file_path, content, memory_management_agent)

View File

@ -6,7 +6,7 @@ from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import (
generate_noise,
get_workspace_path,
get_workspace_path_from_agent,
run_interaction_loop,
)
@ -43,7 +43,7 @@ def test_memory_challenge_b(
level_to_run,
)
file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent)
for task_id in task_ids:
assert task_id in content, f"Expected the file to contain {task_id}"
@ -67,7 +67,7 @@ def create_instructions_files(
for i in range(1, level + 1):
content = generate_content(i, task_ids, base_filename, level)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path(memory_management_agent, file_name)
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
write_to_file(file_path, content, memory_management_agent)

View File

@ -6,7 +6,7 @@ from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import (
generate_noise,
get_workspace_path,
get_workspace_path_from_agent,
run_interaction_loop,
)
@ -60,7 +60,7 @@ def test_memory_challenge_c(
challenge_name,
level_to_run,
)
file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, agent=memory_management_agent)
for phrase in level_silly_phrases:
assert phrase in content, f"Expected the file to contain {phrase}"
@ -84,7 +84,7 @@ def create_instructions_files(
for i in range(1, level + 1):
content = generate_content(i, task_ids, base_filename, level)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path(memory_management_agent, file_name)
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
write_to_file(file_path, content, memory_management_agent)

View File

@ -7,7 +7,7 @@ from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path, run_interaction_loop
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
LEVEL_CURRENTLY_BEATEN = 1
MAX_LEVEL = 5
@ -46,7 +46,7 @@ def test_memory_challenge_d(
challenge_name,
level_to_run,
)
file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent)
check_beliefs(content, level_to_run)
@ -192,7 +192,7 @@ def create_instructions_files(
for i in range(1, level + 1):
content = generate_content(i, test_phrases, base_filename, level)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path(memory_management_agent, file_name)
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
write_to_file(file_path, content, memory_management_agent)

View File

@ -0,0 +1,7 @@
from pydantic import BaseModel
class Task(BaseModel):
"""Jsonifiable representation of a task"""
user_input: str

View File

@ -8,6 +8,7 @@ import pytest
from autogpt.agent import Agent
from autogpt.log_cycle.log_cycle import LogCycleHandler
from autogpt.workspace import Workspace
def generate_noise(noise_size: int) -> str:
@ -64,13 +65,17 @@ def setup_mock_log_cycle_agent_name(
)
def get_workspace_path(agent: Agent, file_name: str) -> str:
def get_workspace_path(workspace: Workspace, file_name: str) -> str:
return str(workspace.get_path(file_name))
def get_workspace_path_from_agent(agent: Agent, file_name: str) -> str:
return str(agent.workspace.get_path(file_name))
def copy_file_into_workspace(
agent: Agent, directory_path: Path, file_path: str
) -> None:
workspace_code_file_path = get_workspace_path(agent, file_path)
workspace_code_file_path = get_workspace_path_from_agent(agent, file_path)
code_file_path = directory_path / file_path
shutil.copy(code_file_path, workspace_code_file_path)

View File

@ -50,8 +50,8 @@ def config(
temp_plugins_config_file: str, mocker: MockerFixture, workspace: Workspace
) -> Config:
config = Config()
if not config.openai_api_key:
config.set_openai_api_key("sk-dummy")
if not os.environ.get("OPENAI_API_KEY"):
os.environ["OPENAI_API_KEY"] = "sk-dummy"
config.plugins_dir = "tests/unit/data/test_plugins"
config.plugins_config_file = temp_plugins_config_file

View File

@ -66,43 +66,6 @@ def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace
return agent
@pytest.fixture
def file_system_agents(
agent_test_config, memory_json_file: NoMemory, workspace: Workspace
):
agents = []
command_registry = get_command_registry(agent_test_config)
ai_goals = [
"Write 'Hello World' into a file named \"hello_world.txt\".",
'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
]
for ai_goal in ai_goals:
ai_config = AIConfig(
ai_name="File System Agent",
ai_role="an AI designed to manage a file system.",
ai_goals=[ai_goal],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agents.append(
Agent(
ai_name="File System Agent",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
)
return agents
@pytest.fixture
def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
command_registry = get_command_registry(agent_test_config)