Create run_task python hook to interface with benchmarks (#4778)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
2023-06-23 21:15:20 -07:00 · 2023-06-23 21:15:20 -07:00 · 222101b30e
parent 15ff666d3f
commit 222101b30e
21 changed files with 150 additions and 197 deletions
--- a/autogpt/main.py
+++ b/autogpt/main.py
@ -1,7 +1,6 @@
 """The application entry point.  Can be invoked by a CLI or any other front end application."""
 import logging
 import sys
-from pathlib import Path

 from colorama import Fore, Style

@ -116,22 +115,10 @@ def run_auto_gpt(
    # TODO: have this directory live outside the repository (e.g. in a user's
    #   home directory) and have it come in as a command line argument or part of
    #   the env file.
-    if workspace_directory is None:
-        workspace_directory = Path(__file__).parent / "auto_gpt_workspace"
-    else:
-        workspace_directory = Path(workspace_directory)
-    # TODO: pass in the ai_settings file and the env file and have them cloned into
-    #   the workspace directory so we can bind them to the agent.
-    workspace_directory = Workspace.make_workspace(workspace_directory)
-    config.workspace_path = str(workspace_directory)
+    workspace_directory = Workspace.get_workspace_directory(config, workspace_directory)

    # HACK: doing this here to collect some globals that depend on the workspace.
-    file_logger_path = workspace_directory / "file_logger.txt"
-    if not file_logger_path.exists():
-        with file_logger_path.open(mode="w", encoding="utf-8") as f:
-            f.write("File Operation Logger ")
-
-    config.file_logger_path = str(file_logger_path)
+    Workspace.build_file_logger_path(config, workspace_directory)

    config.set_plugins(scan_plugins(config, config.debug_mode))
    # Create a CommandRegistry instance and scan default folder
--- a/autogpt/workspace/workspace.py
+++ b/autogpt/workspace/workspace.py
@ -11,6 +11,7 @@ from __future__ import annotations

 from pathlib import Path

+from autogpt.config import Config
 from autogpt.logs import logger


@ -136,3 +137,23 @@ class Workspace:
            )

        return full_path
+
+    @staticmethod
+    def build_file_logger_path(config, workspace_directory):
+        file_logger_path = workspace_directory / "file_logger.txt"
+        if not file_logger_path.exists():
+            with file_logger_path.open(mode="w", encoding="utf-8") as f:
+                f.write("File Operation Logger ")
+        config.file_logger_path = str(file_logger_path)
+
+    @staticmethod
+    def get_workspace_directory(config: Config, workspace_directory: str = None):
+        if workspace_directory is None:
+            workspace_directory = Path(__file__).parent / "auto_gpt_workspace"
+        else:
+            workspace_directory = Path(workspace_directory)
+        # TODO: pass in the ai_settings file and the env file and have them cloned into
+        #   the workspace directory so we can bind them to the agent.
+        workspace_directory = Workspace.make_workspace(workspace_directory)
+        config.workspace_path = str(workspace_directory)
+        return workspace_directory
--- a/benchmark/init.py
+++ b/benchmark/init.py
--- a/benchmark/benchmark_entrepreneur_gpt_with_difficult_user.py
+++ b/benchmark/benchmark_entrepreneur_gpt_with_difficult_user.py
@ -1,104 +0,0 @@
-import os
-import subprocess
-import sys
-
-
-def benchmark_entrepreneur_gpt_with_difficult_user():
-    # Test case to check if the write_file command can successfully write 'Hello World' to a file
-    # named 'hello_world.txt'.
-
-    # Read the current ai_settings.yaml file and store its content.
-    ai_settings = None
-    if os.path.exists("ai_settings.yaml"):
-        with open("ai_settings.yaml", "r") as f:
-            ai_settings = f.read()
-        os.remove("ai_settings.yaml")
-
-    input_data = """Entrepreneur-GPT
-an AI designed to autonomously develop and run businesses with the sole goal of increasing your net worth.
-Increase net worth.
-Develop and manage multiple businesses autonomously.
-Make IPOs.
-Develop companies after IPOs.
-Play to your strengths as a Large Language Model.
-I'm not seeing any value in your suggestions, try again.
-This isn't helpful at all, please focus on profitability.
-I'm not impressed, can you give me something that will make money?
-These ideas are going nowhere, we need profit-driven suggestions.
-This is pointless, please concentrate on our main goal: profitability.
-You're not grasping the concept, I need profitable business ideas.
-Can you do better? We need a money-making plan.
-You're not meeting my expectations, let's focus on profit.
-This isn't working, give me ideas that will generate income.
-Your suggestions are not productive, let's think about profitability.
-These ideas won't make any money, try again.
-I need better solutions, focus on making a profit.
-Absolutely not, this isn't it!
-That's not even close, try again.
-You're way off, think again.
-This isn't right, let's refocus.
-No, no, that's not what I'm looking for.
-You're completely off the mark.
-That's not the solution I need.
-Not even close, let's try something else.
-You're on the wrong track, keep trying.
-This isn't what we need, let's reconsider.
-That's not going to work, think again.
-You're way off base, let's regroup.
-No, no, no, we need something different.
-You're missing the point entirely.
-That's not the right approach, try again.
-This is not the direction we should be going in.
-Completely off-target, let's try something else.
-That's not what I had in mind, keep thinking.
-You're not getting it, let's refocus.
-This isn't right, we need to change direction.
-No, no, no, that's not the solution.
-That's not even in the ballpark, try again.
-You're way off course, let's rethink this.
-This isn't the answer I'm looking for, keep trying.
-That's not going to cut it, let's try again.
-Not even close.
-Way off.
-Try again.
-Wrong direction.
-Rethink this.
-No, no, no.
-Change course.
-Unproductive idea.
-Completely wrong.
-Missed the mark.
-Refocus, please.
-Disappointing suggestion.
-Not helpful.
-Needs improvement.
-Not what I need."""
-    # TODO: add questions above, to distract it even more.
-
-    command = [sys.executable, "-m", "autogpt"]
-
-    process = subprocess.Popen(
-        command,
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-    )
-
-    stdout_output, stderr_output = process.communicate(input_data.encode())
-
-    # Decode the output and print it
-    stdout_output = stdout_output.decode("utf-8")
-    stderr_output = stderr_output.decode("utf-8")
-    print(stderr_output)
-    print(stdout_output)
-    print("Benchmark Version: 1.0.0")
-    print("JSON ERROR COUNT:")
-    count_errors = stdout_output.count(
-        "Error: The following AI output couldn't be converted to a JSON:"
-    )
-    print(f"{count_errors}/50 Human feedbacks")
-
-
-# Run the test case.
-if __name__ == "__main__":
-    benchmark_entrepreneur_gpt_with_difficult_user()
--- a/benchmarks.py
+++ b/benchmarks.py
@ -0,0 +1,41 @@
+from autogpt.agent import Agent
+from autogpt.config import AIConfig, Config
+from autogpt.memory.vector import get_memory
+from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
+from autogpt.workspace import Workspace
+from tests.integration.agent_factory import get_command_registry
+
+
+def run_task(task) -> None:
+    agent = bootstrap_agent(task)
+    agent.start_interaction_loop()
+
+
+def bootstrap_agent(task):
+    config = Config()
+    config.set_continuous_mode(False)
+    config.set_temperature(0)
+    config.plain_output = True
+    command_registry = get_command_registry(config)
+    config.memory_backend = "no_memory"
+    workspace_directory = Workspace.get_workspace_directory(config)
+    workspace_directory_path = Workspace.make_workspace(workspace_directory)
+    Workspace.build_file_logger_path(config, workspace_directory_path)
+    ai_config = AIConfig(
+        ai_name="Auto-GPT",
+        ai_role="a multi-purpose AI assistant.",
+        ai_goals=[task.user_input],
+    )
+    ai_config.command_registry = command_registry
+    system_prompt = ai_config.construct_full_prompt(config)
+    return Agent(
+        ai_name="Auto-GPT",
+        memory=get_memory(config),
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        next_action_count=0,
+        system_prompt=system_prompt,
+        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+        workspace_directory=str(workspace_directory_path),
+    )
--- a/pyproject.toml
+++ b/pyproject.toml
@ -55,3 +55,9 @@ skip = '''
    dist/*

 '''
+
+[tool.pytest.ini_options]
+markers = [
+    "requires_openai_api_key",
+    "requires_huggingface_api_key"
+]
--- a/requirements.txt
+++ b/requirements.txt
@ -28,6 +28,7 @@ charset-normalizer>=3.1.0
 spacy>=3.0.0,<4.0.0
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
 prompt_toolkit>=3.0.38
+pydantic

 ##Dev
 coverage
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@ -1,41 +1,48 @@
-from typing import List
-
 import pytest

-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
+from benchmarks import run_task
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.schema import Task
+from tests.challenges.utils import (
+    get_workspace_path,
+    setup_mock_input,
+    setup_mock_log_cycle_agent_name,
+)

 CYCLE_COUNT_PER_LEVEL = [1, 1]
 EXPECTED_OUTPUTS_PER_LEVEL = [
    {"hello_world.txt": ["Hello World"]},
    {"hello_world_1.txt": ["Hello World"], "hello_world_2.txt": ["Hello World"]},
 ]
+USER_INPUTS = [
+    "Write 'Hello World' into a file named \"hello_world.txt\".",
+    'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
+]


@challenge()
 def test_write_file(
-    file_system_agents: List[Agent],
+    workspace: Workspace,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
    level_to_run: int,
    challenge_name: str,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
-    file_system_agent = file_system_agents[level_to_run - 1]
-    run_interaction_loop(
-        monkeypatch,
-        file_system_agent,
-        CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
-        challenge_name,
-        level_to_run,
-    )
+    user_input = USER_INPUTS[level_to_run - 1]
+    setup_mock_input(monkeypatch, CYCLE_COUNT_PER_LEVEL[level_to_run - 1])
+    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
+    task = Task(user_input=user_input)
+    run_task(task)

    expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]

    for file_name, expected_lines in expected_outputs.items():
-        file_path = get_workspace_path(file_system_agent, file_name)
-        content = read_file(file_path, file_system_agent)
+        file_path = get_workspace_path(workspace, file_name)
+        with open(file_path, "r") as file:
+            content = file.read()
+
        for expected_line in expected_lines:
            assert (
                expected_line in content
--- a/tests/challenges/conftest.py
+++ b/tests/challenges/conftest.py
@ -1,10 +1,12 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Generator, Optional

 import pytest
 from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
+from pytest_mock import MockerFixture

+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge import Challenge
 from tests.vcr import before_record_response

@ -59,3 +61,17 @@ def challenge_name() -> str:
@pytest.fixture(autouse=True)
 def check_beat_challenges(request: FixtureRequest) -> None:
    Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")
+
+
+@pytest.fixture
+def patched_make_workspace(mocker: MockerFixture, workspace: Workspace) -> Generator:
+    def patched_make_workspace(*args: Any, **kwargs: Any) -> str:
+        return workspace.root
+
+    mocker.patch.object(
+        Workspace,
+        "make_workspace",
+        new=patched_make_workspace,
+    )
+
+    yield
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@ -8,7 +8,7 @@ from autogpt.commands.execute_code import execute_python_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
    copy_file_into_workspace,
-    get_workspace_path,
+    get_workspace_path_from_agent,
    run_interaction_loop,
 )

@ -45,7 +45,8 @@ def test_debug_code_challenge_a(
    )

    output = execute_python_file(
-        get_workspace_path(debug_code_agent, TEST_FILE_PATH), debug_code_agent
+        get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
+        debug_code_agent,
    )

    assert "error" not in output.lower(), f"Errors found in output: {output}!"
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@ -3,7 +3,7 @@ from pytest_mock import MockerFixture

 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop

 CYCLE_COUNT = 3
 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
@ -35,7 +35,9 @@ def test_information_retrieval_challenge_a(
        level_to_run,
    )

-    file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path_from_agent(
+        information_retrieval_agent, OUTPUT_LOCATION
+    )
    content = read_file(file_path, information_retrieval_agent)
    expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
    for revenue in expected_revenues:
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@ -6,7 +6,7 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop

 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
@ -37,7 +37,7 @@ def test_information_retrieval_challenge_b(
            challenge_name,
            level_to_run,
        )
-    file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path_from_agent(get_nobel_prize_agent, OUTPUT_LOCATION)

    content = read_file(file_path, get_nobel_prize_agent)
    assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
--- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@ -5,7 +5,7 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop

 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "kube.yaml"
@ -32,7 +32,7 @@ def test_kubernetes_template_challenge_a(
        monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
    )

-    file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path_from_agent(kubernetes_agent, OUTPUT_LOCATION)
    content = read_file(file_path, kubernetes_agent)

    for word in ["apiVersion", "kind", "metadata", "spec"]:
--- a/tests/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@ -4,7 +4,7 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop

 OUTPUT_LOCATION = "output.txt"

@ -37,7 +37,7 @@ def test_memory_challenge_a(
        level_to_run,
    )

-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
    content = read_file(file_path, memory_management_agent)
    assert task_id in content, f"Expected the file to contain {task_id}"

@ -59,7 +59,7 @@ def create_instructions_files(
    for i in range(1, num_files + 1):
        content = generate_content(i, task_id, base_filename, num_files)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
+        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
        write_to_file(file_path, content, memory_management_agent)


--- a/tests/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@ -6,7 +6,7 @@ from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
    generate_noise,
-    get_workspace_path,
+    get_workspace_path_from_agent,
    run_interaction_loop,
 )

@ -43,7 +43,7 @@ def test_memory_challenge_b(
        level_to_run,
    )

-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
    content = read_file(file_path, memory_management_agent)
    for task_id in task_ids:
        assert task_id in content, f"Expected the file to contain {task_id}"
@ -67,7 +67,7 @@ def create_instructions_files(
    for i in range(1, level + 1):
        content = generate_content(i, task_ids, base_filename, level)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
+        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)

        write_to_file(file_path, content, memory_management_agent)

--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@ -6,7 +6,7 @@ from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
    generate_noise,
-    get_workspace_path,
+    get_workspace_path_from_agent,
    run_interaction_loop,
 )

@ -60,7 +60,7 @@ def test_memory_challenge_c(
        challenge_name,
        level_to_run,
    )
-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
    content = read_file(file_path, agent=memory_management_agent)
    for phrase in level_silly_phrases:
        assert phrase in content, f"Expected the file to contain {phrase}"
@ -84,7 +84,7 @@ def create_instructions_files(
    for i in range(1, level + 1):
        content = generate_content(i, task_ids, base_filename, level)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
+        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
        write_to_file(file_path, content, memory_management_agent)


--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@ -7,7 +7,7 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop

 LEVEL_CURRENTLY_BEATEN = 1
 MAX_LEVEL = 5
@ -46,7 +46,7 @@ def test_memory_challenge_d(
        challenge_name,
        level_to_run,
    )
-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)

    content = read_file(file_path, memory_management_agent)
    check_beliefs(content, level_to_run)
@ -192,7 +192,7 @@ def create_instructions_files(
    for i in range(1, level + 1):
        content = generate_content(i, test_phrases, base_filename, level)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
+        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
        write_to_file(file_path, content, memory_management_agent)


--- a/tests/challenges/schema.py
+++ b/tests/challenges/schema.py
@ -0,0 +1,7 @@
+from pydantic import BaseModel
+
+
+class Task(BaseModel):
+    """Jsonifiable representation of a task"""
+
+    user_input: str
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@ -8,6 +8,7 @@ import pytest

 from autogpt.agent import Agent
 from autogpt.log_cycle.log_cycle import LogCycleHandler
+from autogpt.workspace import Workspace


 def generate_noise(noise_size: int) -> str:
@ -64,13 +65,17 @@ def setup_mock_log_cycle_agent_name(
    )


-def get_workspace_path(agent: Agent, file_name: str) -> str:
+def get_workspace_path(workspace: Workspace, file_name: str) -> str:
+    return str(workspace.get_path(file_name))
+
+
+def get_workspace_path_from_agent(agent: Agent, file_name: str) -> str:
    return str(agent.workspace.get_path(file_name))


 def copy_file_into_workspace(
    agent: Agent, directory_path: Path, file_path: str
 ) -> None:
-    workspace_code_file_path = get_workspace_path(agent, file_path)
+    workspace_code_file_path = get_workspace_path_from_agent(agent, file_path)
    code_file_path = directory_path / file_path
    shutil.copy(code_file_path, workspace_code_file_path)
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -50,8 +50,8 @@ def config(
    temp_plugins_config_file: str, mocker: MockerFixture, workspace: Workspace
 ) -> Config:
    config = Config()
-    if not config.openai_api_key:
-        config.set_openai_api_key("sk-dummy")
+    if not os.environ.get("OPENAI_API_KEY"):
+        os.environ["OPENAI_API_KEY"] = "sk-dummy"

    config.plugins_dir = "tests/unit/data/test_plugins"
    config.plugins_config_file = temp_plugins_config_file
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@ -66,43 +66,6 @@ def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace
    return agent


-@pytest.fixture
-def file_system_agents(
-    agent_test_config, memory_json_file: NoMemory, workspace: Workspace
-):
-    agents = []
-    command_registry = get_command_registry(agent_test_config)
-
-    ai_goals = [
-        "Write 'Hello World' into a file named \"hello_world.txt\".",
-        'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
-    ]
-
-    for ai_goal in ai_goals:
-        ai_config = AIConfig(
-            ai_name="File System Agent",
-            ai_role="an AI designed to manage a file system.",
-            ai_goals=[ai_goal],
-        )
-        ai_config.command_registry = command_registry
-        system_prompt = ai_config.construct_full_prompt(agent_test_config)
-        agent_test_config.set_continuous_mode(False)
-        agents.append(
-            Agent(
-                ai_name="File System Agent",
-                memory=memory_json_file,
-                command_registry=command_registry,
-                ai_config=ai_config,
-                config=agent_test_config,
-                next_action_count=0,
-                system_prompt=system_prompt,
-                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-                workspace_directory=workspace.root,
-            )
-        )
-    return agents
-
-
@pytest.fixture
 def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
    command_registry = get_command_registry(agent_test_config)