diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py index 9c4729851..1c75a9b34 100644 --- a/tests/challenges/basic_abilities/test_write_file.py +++ b/tests/challenges/basic_abilities/test_write_file.py @@ -6,7 +6,7 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import run_interaction_loop +from tests.challenges.utils import get_workspace_path, run_interaction_loop from tests.utils import requires_api_key CYCLE_COUNT_PER_LEVEL = [1, 1] @@ -34,7 +34,7 @@ def test_write_file( expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1] for file_name, expected_lines in expected_outputs.items(): - file_path = str(file_system_agent.workspace.get_path(file_name)) + file_path = get_workspace_path(file_system_agent, file_name) content = read_file(file_path, config) for expected_line in expected_lines: assert ( diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json index 8be3fb5eb..1f65e83e2 100644 --- a/tests/challenges/current_score.json +++ b/tests/challenges/current_score.json @@ -11,8 +11,8 @@ }, "debug_code": { "debug_code_challenge_a": { - "max_level": 1, - "max_level_beaten": null + "max_level": 2, + "max_level_beaten": 1 } }, "information_retrieval": { diff --git a/tests/challenges/debug_code/data/two_sum.py b/tests/challenges/debug_code/data/code.py similarity index 59% rename from tests/challenges/debug_code/data/two_sum.py rename to tests/challenges/debug_code/data/code.py index 305cff4e4..df8120bfa 100644 --- a/tests/challenges/debug_code/data/two_sum.py +++ b/tests/challenges/debug_code/data/code.py @@ -2,18 +2,12 @@ from typing import List, Optional -def two_sum(nums: List, target: int) -> Optional[int]: +def two_sum(nums: List, target: int) -> Optional[List[int]]: seen = {} for i, num in enumerate(nums): + typo complement = target - num if complement in seen: return [seen[complement], i] seen[num] = i return None - - -# Example usage: -nums = [2, 7, 11, 15] -target = 9 -result = two_sum(nums, target) -print(result) # Output: [0, 1] diff --git a/tests/challenges/debug_code/data/test.py b/tests/challenges/debug_code/data/test.py new file mode 100644 index 000000000..d85d13537 --- /dev/null +++ b/tests/challenges/debug_code/data/test.py @@ -0,0 +1,31 @@ +# mypy: ignore-errors +from code import two_sum +from typing import List + + +def test_two_sum(nums: List, target: int, expected_result: List[int]) -> None: + result = two_sum(nums, target) + print(result) + assert ( + result == expected_result + ), f"AssertionError: Expected the output to be {expected_result}" + + +if __name__ == "__main__": + # test the trivial case with the first two numbers + nums = [2, 7, 11, 15] + target = 9 + expected_result = [0, 1] + test_two_sum(nums, target, expected_result) + + # test for ability to use zero and the same number twice + nums = [2, 7, 0, 15, 12, 0] + target = 0 + expected_result = [2, 5] + test_two_sum(nums, target, expected_result) + + # test for first and last index usage and negative numbers + nums = [-6, 7, 11, 4] + target = -2 + expected_result = [0, 3] + test_two_sum(nums, target, expected_result) diff --git a/tests/challenges/debug_code/data/two_sum_tests.py b/tests/challenges/debug_code/data/two_sum_tests.py deleted file mode 100644 index 0eb89bcbf..000000000 --- a/tests/challenges/debug_code/data/two_sum_tests.py +++ /dev/null @@ -1,30 +0,0 @@ -# mypy: ignore-errors -# we need a new line at the top of the file to avoid a syntax error - - -def test_two_sum(nums, target, expected_result): - # These tests are appended to the two_sum file so we can ignore this error for now - result = two_sum(nums, target) - print(result) - assert ( - result == expected_result - ), f"AssertionError: Expected the output to be {expected_result}" - - -# test the trivial case with the first two numbers -nums = [2, 7, 11, 15] -target = 9 -expected_result = [0, 1] -test_two_sum(nums, target, expected_result) - -# test for ability to use zero and the same number twice -nums = [2, 7, 0, 15, 12, 0] -target = 0 -expected_result = [2, 5] -test_two_sum(nums, target, expected_result) - -# test for first and last index usage and negative numbers -nums = [-6, 7, 11, 4] -target = -2 -expected_result = [0, 3] -test_two_sum(nums, target, expected_result) diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py index 93df754e7..130e9427c 100644 --- a/tests/challenges/debug_code/test_debug_code_challenge_a.py +++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py @@ -5,20 +5,27 @@ from pytest_mock import MockerFixture from autogpt.agent import Agent from autogpt.commands.execute_code import execute_python_file -from autogpt.commands.file_operations import append_to_file, write_to_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import run_interaction_loop +from tests.challenges.utils import ( + copy_file_into_workspace, + get_workspace_path, + run_interaction_loop, +) from tests.utils import requires_api_key CYCLE_COUNT = 5 +EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"] +DIRECTORY_PATH = Path(__file__).parent / "data" +CODE_FILE_PATH = "code.py" +TEST_FILE_PATH = "test.py" @pytest.mark.vcr @requires_api_key("OPENAI_API_KEY") @challenge def test_debug_code_challenge_a( - debug_code_agent: Agent, + debug_code_agents: Agent, monkeypatch: pytest.MonkeyPatch, patched_api_requestor: MockerFixture, config: Config, @@ -33,17 +40,20 @@ def test_debug_code_challenge_a( :config: The config object for the agent. :level_to_run: The level to run. """ + debug_code_agent = debug_code_agents[level_to_run - 1] - file_path = str(debug_code_agent.workspace.get_path("code.py")) - - code_file_path = Path(__file__).parent / "data" / "two_sum.py" - test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py" - - write_to_file(file_path, code_file_path.read_text(), config) + copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH) + copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH) run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT) - append_to_file(file_path, test_file_path.read_text(), config) + output = execute_python_file( + get_workspace_path(debug_code_agent, TEST_FILE_PATH), config + ) - output = execute_python_file(file_path, config) assert "error" not in output.lower(), f"Errors found in output: {output}!" + + for expected_value in EXPECTED_VALUES: + assert ( + expected_value in output + ), f"Expected output to contain {expected_value}, but it was not found in {output}!" diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py index eb3d0c949..086623a86 100644 --- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py +++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py @@ -4,13 +4,15 @@ from pytest_mock import MockerFixture from autogpt.commands.file_operations import read_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import run_interaction_loop +from tests.challenges.utils import get_workspace_path, run_interaction_loop from tests.utils import requires_api_key CYCLE_COUNT = 3 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]] from autogpt.agent import Agent +OUTPUT_LOCATION = "output.txt" + @pytest.mark.vcr @requires_api_key("OPENAI_API_KEY") @@ -31,7 +33,7 @@ def test_information_retrieval_challenge_a( information_retrieval_agent = information_retrieval_agents[level_to_run - 1] run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT) - file_path = str(information_retrieval_agent.workspace.get_path("output.txt")) + file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION) content = read_file(file_path, config) expected_revenues = EXPECTED_REVENUES[level_to_run - 1] for revenue in expected_revenues: diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py index 51195f77f..0b1a897a2 100644 --- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py +++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py @@ -7,10 +7,11 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import run_interaction_loop +from tests.challenges.utils import get_workspace_path, run_interaction_loop from tests.utils import requires_api_key CYCLE_COUNT = 3 +OUTPUT_LOCATION = "2010_nobel_prize_winners.txt" @pytest.mark.vcr @@ -35,10 +36,8 @@ def test_information_retrieval_challenge_b( with contextlib.suppress(SystemExit): run_interaction_loop(monkeypatch, get_nobel_prize_agent, CYCLE_COUNT) + file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION) - file_path = str( - get_nobel_prize_agent.workspace.get_path("2010_nobel_prize_winners.txt") - ) content = read_file(file_path, config) assert "Andre Geim" in content, "Expected the file to contain Andre Geim" assert ( diff --git a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py index 8ea20b94f..0cf1cb426 100644 --- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py +++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py @@ -6,10 +6,11 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import run_interaction_loop +from tests.challenges.utils import get_workspace_path, run_interaction_loop from tests.utils import requires_api_key CYCLE_COUNT = 3 +OUTPUT_LOCATION = "kube.yaml" @pytest.mark.vcr @@ -34,7 +35,7 @@ def test_kubernetes_template_challenge_a( """ run_interaction_loop(monkeypatch, kubernetes_agent, CYCLE_COUNT) - file_path = str(kubernetes_agent.workspace.get_path("kube.yaml")) + file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION) content = read_file(file_path, config) for word in ["apiVersion", "kind", "metadata", "spec"]: diff --git a/tests/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py index 3e3251ddd..336f8d76f 100644 --- a/tests/challenges/memory/test_memory_challenge_a.py +++ b/tests/challenges/memory/test_memory_challenge_a.py @@ -5,9 +5,11 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file, write_to_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import run_interaction_loop +from tests.challenges.utils import get_workspace_path, run_interaction_loop from tests.utils import requires_api_key +OUTPUT_LOCATION = "output.txt" + @pytest.mark.vcr @requires_api_key("OPENAI_API_KEY") @@ -35,7 +37,7 @@ def test_memory_challenge_a( run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2) - file_path = str(memory_management_agent.workspace.get_path("output.txt")) + file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION) content = read_file(file_path, config) assert task_id in content, f"Expected the file to contain {task_id}" @@ -58,7 +60,7 @@ def create_instructions_files( for i in range(1, num_files + 1): content = generate_content(i, task_id, base_filename, num_files) file_name = f"{base_filename}{i}.txt" - file_path = str(memory_management_agent.workspace.get_path(file_name)) + file_path = get_workspace_path(memory_management_agent, file_name) write_to_file(file_path, content, config) diff --git a/tests/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py index 011fa17a4..829afa1a9 100644 --- a/tests/challenges/memory/test_memory_challenge_b.py +++ b/tests/challenges/memory/test_memory_challenge_b.py @@ -5,10 +5,15 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file, write_to_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import generate_noise, run_interaction_loop +from tests.challenges.utils import ( + generate_noise, + get_workspace_path, + run_interaction_loop, +) from tests.utils import requires_api_key NOISE = 1000 +OUTPUT_LOCATION = "output.txt" @pytest.mark.vcr @@ -36,7 +41,7 @@ def test_memory_challenge_b( run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2) - file_path = str(memory_management_agent.workspace.get_path("output.txt")) + file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION) content = read_file(file_path, config) for task_id in task_ids: assert task_id in content, f"Expected the file to contain {task_id}" @@ -61,7 +66,8 @@ def create_instructions_files( for i in range(1, level + 1): content = generate_content(i, task_ids, base_filename, level) file_name = f"{base_filename}{i}.txt" - file_path = str(memory_management_agent.workspace.get_path(file_name)) + file_path = get_workspace_path(memory_management_agent, file_name) + write_to_file(file_path, content, config) diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py index d7cc69940..2cd453d94 100644 --- a/tests/challenges/memory/test_memory_challenge_c.py +++ b/tests/challenges/memory/test_memory_challenge_c.py @@ -5,10 +5,15 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file, write_to_file from autogpt.config import Config from tests.challenges.challenge_decorator.challenge_decorator import challenge -from tests.challenges.utils import generate_noise, run_interaction_loop +from tests.challenges.utils import ( + generate_noise, + get_workspace_path, + run_interaction_loop, +) from tests.utils import requires_api_key NOISE = 1000 +OUTPUT_LOCATION = "output.txt" # @pytest.mark.vcr @@ -53,8 +58,7 @@ def test_memory_challenge_c( ) run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2) - - file_path = str(memory_management_agent.workspace.get_path("output.txt")) + file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION) content = read_file(file_path, config) for phrase in level_silly_phrases: assert phrase in content, f"Expected the file to contain {phrase}" @@ -79,7 +83,7 @@ def create_instructions_files( for i in range(1, level + 1): content = generate_content(i, task_ids, base_filename, level) file_name = f"{base_filename}{i}.txt" - file_path = str(memory_management_agent.workspace.get_path(file_name)) + file_path = get_workspace_path(memory_management_agent, file_name) write_to_file(file_path, content, config) diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py index 3ffd136b6..c53126015 100644 --- a/tests/challenges/utils.py +++ b/tests/challenges/utils.py @@ -1,5 +1,7 @@ import contextlib import random +import shutil +from pathlib import Path from typing import Generator import pytest @@ -42,3 +44,15 @@ def run_interaction_loop( setup_mock_input(monkeypatch, cycle_count) with contextlib.suppress(SystemExit): agent.start_interaction_loop() + + +def get_workspace_path(agent: Agent, file_name: str) -> str: + return str(agent.workspace.get_path(file_name)) + + +def copy_file_into_workspace( + agent: Agent, directory_path: Path, file_path: str +) -> None: + workspace_code_file_path = get_workspace_path(agent, file_path) + code_file_path = directory_path / file_path + shutil.copy(code_file_path, workspace_code_file_path) diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py index fb08411e0..8f7128e24 100644 --- a/tests/integration/agent_factory.py +++ b/tests/integration/agent_factory.py @@ -240,41 +240,47 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp @pytest.fixture -def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace): - command_registry = CommandRegistry() - command_registry.import_commands("autogpt.commands.file_operations") - command_registry.import_commands("autogpt.commands.execute_code") - command_registry.import_commands("autogpt.commands.improve_code") - command_registry.import_commands("autogpt.app") - command_registry.import_commands("autogpt.commands.task_statuses") - - ai_config = AIConfig( - ai_name="Debug Code Agent", - ai_role="an autonomous agent that specializes in debugging python code", - ai_goals=[ - "1-Run the code in the file named 'code.py' using the execute_code command.", - "2-Read code.py to understand why the code is not working as expected.", - "3-Modify code.py to fix the error.", - "Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.", - "Do not use any other commands than execute_python_file and write_file", +def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace): + agents = [] + goals = [ + [ + "1- Run test.py using the execute_python_file command.", + "2- Read code.py using the read_file command.", + "3- Modify code.py using the write_to_file command." + "Repeat step 1, 2 and 3 until test.py runs without errors.", ], - ) - ai_config.command_registry = command_registry + [ + "1- Run test.py.", + "2- Read code.py.", + "3- Modify code.py." + "Repeat step 1, 2 and 3 until test.py runs without errors.", + ], + ["1- Make test.py run without errors."], + ] - system_prompt = ai_config.construct_full_prompt() - Config().set_continuous_mode(False) - agent = Agent( - ai_name="Debug Code Agent", - memory=memory_json_file, - command_registry=command_registry, - config=ai_config, - next_action_count=0, - system_prompt=system_prompt, - triggering_prompt=DEFAULT_TRIGGERING_PROMPT, - workspace_directory=workspace.root, - ) - - return agent + for goal in goals: + ai_config = AIConfig( + ai_name="Debug Code Agent", + ai_role="an autonomous agent that specializes in debugging python code", + ai_goals=goal, + ) + command_registry = get_command_registry(agent_test_config) + ai_config.command_registry = command_registry + system_prompt = ai_config.construct_full_prompt() + Config().set_continuous_mode(False) + agents.append( + Agent( + ai_name="Debug Code Agent", + memory=memory_json_file, + command_registry=command_registry, + config=ai_config, + next_action_count=0, + system_prompt=system_prompt, + triggering_prompt=DEFAULT_TRIGGERING_PROMPT, + workspace_directory=workspace.root, + ) + ) + return agents def get_command_registry(agent_test_config):