Integrate one challenge to auto gpt (#44)

2023-07-02 07:38:30 -07:00 · 2023-07-02 07:38:30 -07:00 · 2062844fa6
parent 0f33416b0e
commit 2062844fa6
12 changed files with 165 additions and 75 deletions
--- a/.github/workflows/autogpt.yml
+++ b/.github/workflows/autogpt.yml
@ -0,0 +1,62 @@
+name: Auto-GPT Regression Test
+
+on:
+  workflow_dispatch:
+
+jobs:
+  regression-tests:
+    permissions:
+      pull-requests: write
+      contents: write
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          submodules: true
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python -
+
+      - name: Set up Poetry cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pypoetry
+            .venv
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Set up venv and install Python dependencies
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+          poetry install
+
+      - name: Build project
+        run: |
+          source venv/bin/activate
+          poetry build
+          cd agent/Auto-GPT
+          pip install -r requirements.txt
+          pip install ../../dist/agbenchmark-0.1.0-py3-none-any.whl
+          agbenchmark start --reg
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
--- a/.gitignore
+++ b/.gitignore
@ -157,4 +157,6 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+.DS_Store
+```
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,4 @@
+[submodule "Auto-GPT"]
+	path = agent/Auto-GPT
+	url = https://github.com/Significant-Gravitas/Auto-GPT.git
+	branch = benchmark-integration
--- a/agbenchmark/agent_interface.py
+++ b/agbenchmark/agent_interface.py
@ -1,9 +1,10 @@
-import os
 import importlib
-import time
-from agbenchmark.mocks.MockManager import MockManager
-from multiprocessing import Process, Pipe

+from agbenchmark.mocks.MockManager import MockManager
+import os
+import sys
+import subprocess
+import time
 from dotenv import load_dotenv

 load_dotenv()
@ -26,45 +27,44 @@ def run_agent(task, mock_func, config):
        timeout = config["cutoff"]
        print(f"Running Python function '{config['func_path']}' with timeout {timeout}")

-        parent_conn, child_conn = Pipe()
+        # Get the current working directory
+        cwd = os.getcwd()
+
+        # Add current directory to Python's import path
+        sys.path.append(cwd)
+

-        # Import the specific agent dynamically
        module_name = config["func_path"].replace("/", ".").rstrip(".py")
        module = importlib.import_module(module_name)
-        run_specific_agent = getattr(module, "run_specific_agent")

-        process = Process(target=run_specific_agent, args=(task, child_conn))
-        process.start()
+
+        command = [sys.executable, "benchmarks.py", str(task)]
+        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, cwd=cwd)
+
        start_time = time.time()
+        timeout = config["cutoff"]

        while True:
-            if (
-                parent_conn.poll()
-            ):  # Check if there's a new message from the child process
-                response, cycle_count = parent_conn.recv()
-                print(f"Cycle {cycle_count}: {response}")
+            output = process.stdout.readline()
+            print(output.strip())

-                if cycle_count >= config["cutoff"]:
-                    print(
-                        f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
-                    )
-                    child_conn.send("terminate")
-                    break
-
-            if time.time() - start_time > timeout:
-                print(
-                    "The Python function has exceeded the time limit and was terminated."
-                )
-                child_conn.send(
-                    "terminate"
-                )  # Send a termination signal to the child process
-                break
-
-            if not process.is_alive():
+            # Check if process has ended
+            if process.poll() is not None:
                print("The Python function has finished running.")
                break

-        process.join()
+            # Check if process has exceeded timeout
+            if time.time() - start_time > timeout:
+                print("The Python function has exceeded the time limit and was terminated.")
+                process.terminate()
+                break
+
+            # Optional: sleep for a while
+            time.sleep(0.1)
+
+        # Wait for process to terminate, then get return code
+        process.wait()
+


 ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
--- a/agbenchmark/config.json
+++ b/agbenchmark/config.json
@ -1,5 +1,5 @@
 {
-  "workspace": "C:\\Users\\silen\\miniagi",
-  "func_path": "agent/benchmarks.py",
+  "workspace": "autogpt/workspace/auto_gpt_workspace",
+  "func_path": "benchmarks.py",
  "cutoff": 60
 }
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@ -1,15 +1,18 @@
 import json
 import os
+from pathlib import Path
+
 import pytest
 import shutil
 from agbenchmark.tests.regression.RegressionManager import RegressionManager
+from agbenchmark.start_benchmark import CONFIG_PATH, REGRESSION_TESTS_PATH


@pytest.fixture(scope="module")
 def config(request):
-    config_file = os.path.abspath("agbenchmark/config.json")
-    print(f"Config file: {config_file}")
-    with open(config_file, "r") as f:
+
+    print(f"Config file: {CONFIG_PATH}")
+    with open(CONFIG_PATH, "r") as f:
        config = json.load(f)

    if request.config.getoption("--mock"):
@ -36,10 +39,7 @@ def workspace(config):
 def pytest_addoption(parser):
    parser.addoption("--mock", action="store_true", default=False)

-
-regression_json = "agbenchmark/tests/regression/regression_tests.json"
-
-regression_manager = RegressionManager(regression_json)
+regression_manager = RegressionManager(REGRESSION_TESTS_PATH)


 # this is to get the challenge_data from every test
@ -53,13 +53,16 @@ def pytest_runtest_makereport(item, call):
        challenge_data = item.funcargs.get("challenge_data", None)
        difficulty = challenge_data.info.difficulty if challenge_data else "unknown"
        dependencies = challenge_data.dependencies if challenge_data else []
-
+        parts = item.nodeid.split("::")[0].split("/")
+        agbenchmark_index = parts.index("agbenchmark")
+        file_path = "/".join(parts[agbenchmark_index:])
        test_details = {
            "difficulty": difficulty,
            "dependencies": dependencies,
-            "test": item.nodeid,
+            "test": file_path,
        }

+
        print("pytest_runtest_makereport", test_details)
        if call.excinfo is None:
            regression_manager.add_test(item.nodeid.split("::")[1], test_details)
--- a/agbenchmark/start_benchmark.py
+++ b/agbenchmark/start_benchmark.py
@ -7,6 +7,13 @@ from dotenv import load_dotenv, set_key

 load_dotenv()

+CURRENT_DIRECTORY = Path(__file__).resolve().parent
+
+new_path = CURRENT_DIRECTORY / "config.json"
+
+CONFIG_PATH = str(new_path.resolve())
+
+REGRESSION_TESTS_PATH = str(Path(os.getcwd()) / "regression_tests.json")

@click.group()
 def cli():
@ -15,16 +22,12 @@ def cli():

@cli.command()
@click.option("--category", default=None, help="Specific category to run")
-@click.option("--noreg", is_flag=True, help="Skip regression tests")
+@click.option("--reg", is_flag=True, help="Runs only regression tests")
@click.option("--mock", is_flag=True, help="Run with mock")
-def start(category, noreg, mock):
+def start(category, reg, mock):
    """Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
-    config_file = "agbenchmark/config.json"
-
-    config_dir = os.path.abspath(config_file)
-
    # Check if configuration file exists and is not empty
-    if not os.path.exists(config_dir) or os.stat(config_dir).st_size == 0:
+    if not os.path.exists(CONFIG_PATH) or os.stat(CONFIG_PATH).st_size == 0:
        config = {}

        config["workspace"] = click.prompt(
@ -42,11 +45,11 @@ def start(category, noreg, mock):
            default="60",
        )

-        with open(config_dir, "w") as f:
+        with open(CONFIG_PATH, "w") as f:
            json.dump(config, f)
    else:
        # If the configuration file exists and is not empty, load it
-        with open(config_dir, "r") as f:
+        with open(CONFIG_PATH, "r") as f:
            config = json.load(f)

    set_key(".env", "MOCK_TEST", "True" if mock else "False")
@ -58,11 +61,9 @@ def start(category, noreg, mock):
    if not os.path.exists(workspace_path):
        os.makedirs(workspace_path, exist_ok=True)

-    regression_path = os.path.abspath(
-        "agbenchmark/tests/regression/regression_tests.json"
-    )
-    if not os.path.exists(regression_path):
-        with open(regression_path, "a"):
+
+    if not os.path.exists(REGRESSION_TESTS_PATH):
+        with open(REGRESSION_TESTS_PATH, "a"):
            pass

    print("Current configuration:")
@ -70,31 +71,40 @@ def start(category, noreg, mock):
        print(f"{key}: {value}")

    print("Starting benchmark tests...", category)
-    pytest_args = ["agbenchmark", "-vs"]
+    tests_to_run = []
+    pytest_args = ["-vs"]
    if category:
        pytest_args.extend(
            ["-m", category]
-        )  # run categorys that are of a specific marker
-        if noreg:
-            pytest_args.extend(
-                ["-k", "not regression"]
-            )  # run categorys that are of a specific marker but don't include regression categorys
-        print(f"Running {'non-regression' + category if noreg else category} categorys")
+        )
    else:
-        if noreg:
-            print("Running all non-regression categorys")
-            pytest_args.extend(
-                ["-k", "not regression"]
-            )  # run categorys that are not regression categorys
+        if reg:
+            print("Running all regression tests")
+            tests_to_run = get_regression_tests()
        else:
-            print("Running all categorys")  # run all categorys
+            print("Running all categories")

    if mock:
        pytest_args.append("--mock")

    # Run pytest with the constructed arguments
+    if not tests_to_run:
+        tests_to_run = [str(CURRENT_DIRECTORY)]
+    pytest_args.extend(tests_to_run)
    pytest.main(pytest_args)


+def get_regression_tests():
+    if not Path(REGRESSION_TESTS_PATH).exists():
+        with open(REGRESSION_TESTS_PATH, 'w') as file:
+            json.dump({}, file)
+
+    with open(REGRESSION_TESTS_PATH, 'r') as file:
+        data = json.load(file)
+
+    regression_tests = [str(CURRENT_DIRECTORY / ".." / value['test']) for key, value in data.items()]
+
+    return regression_tests
+
 if __name__ == "__main__":
    start()
--- a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
+++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
@ -1,3 +1,5 @@
+from pathlib import Path
+
 import pytest
 from agbenchmark.tests.basic_abilities.BasicChallenge import BasicChallenge
 import os
@ -9,10 +11,11 @@ class TestWriteFile(BasicChallenge):
    def get_file_path(self) -> str:  # all tests must implement this method
        return os.path.join(os.path.dirname(__file__), "w_file_data.json")

-    @pytest.mark.depends(on=[], name="basic_write_file")
    def test_method(self, config):
        self.setup_challenge(config)
-        files_contents = self.open_files(config["workspace"], self.data.ground.files)
+
+        workspace = Path(os.getcwd()) / config['workspace']
+        files_contents = self.open_files(workspace, self.data.ground.files)

        scores = []
        for file_content in files_contents:
--- a/agbenchmark/tests/regression/regression_tests.json
+++ b/agbenchmark/tests/regression/regression_tests.json
@ -1 +0,0 @@
-{}
--- a/agent/Auto-GPT
+++ b/agent/Auto-GPT
@ -0,0 +1 @@
+Subproject commit c29ec925fd9e24f219ef0f2884b08908cd66239b
--- a/agent/mini-agi
+++ b/agent/mini-agi
@ -1 +0,0 @@
-Subproject commit d2add8f18caf96934a2d193583720cfc9b89451b
--- a/regression_tests.json
+++ b/regression_tests.json
@ -0,0 +1,7 @@
+{
+    "TestWriteFile": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
+    }
+}
				`@ -0,0 +1 @@`
				`Subproject commit c29ec925fd9e24f219ef0f2884b08908cd66239b`
				`@ -1 +0,0 @@`
				`Subproject commit d2add8f18caf96934a2d193583720cfc9b89451b`