Integrate one challenge to auto gpt (#44)

pull/5155/head
merwanehamadi 2023-07-02 07:38:30 -07:00 committed by GitHub
parent 0f33416b0e
commit 2062844fa6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 165 additions and 75 deletions

62
.github/workflows/autogpt.yml vendored Normal file
View File

@ -0,0 +1,62 @@
name: Auto-GPT Regression Test
on:
workflow_dispatch:
jobs:
regression-tests:
permissions:
pull-requests: write
contents: write
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
matrix:
python-version: ["3.10"]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
submodules: true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- id: get_date
name: Get date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python -
- name: Set up Poetry cache
uses: actions/cache@v2
with:
path: |
~/.cache/pypoetry
.venv
key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }}
- name: Set up venv and install Python dependencies
run: |
python -m venv venv
source venv/bin/activate
poetry install
- name: Build project
run: |
source venv/bin/activate
poetry build
cd agent/Auto-GPT
pip install -r requirements.txt
pip install ../../dist/agbenchmark-0.1.0-py3-none-any.whl
agbenchmark start --reg
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

4
.gitignore vendored
View File

@ -157,4 +157,6 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/
.DS_Store
```

4
.gitmodules vendored Normal file
View File

@ -0,0 +1,4 @@
[submodule "Auto-GPT"]
path = agent/Auto-GPT
url = https://github.com/Significant-Gravitas/Auto-GPT.git
branch = benchmark-integration

View File

@ -1,9 +1,10 @@
import os
import importlib
import time
from agbenchmark.mocks.MockManager import MockManager
from multiprocessing import Process, Pipe
from agbenchmark.mocks.MockManager import MockManager
import os
import sys
import subprocess
import time
from dotenv import load_dotenv
load_dotenv()
@ -26,45 +27,44 @@ def run_agent(task, mock_func, config):
timeout = config["cutoff"]
print(f"Running Python function '{config['func_path']}' with timeout {timeout}")
parent_conn, child_conn = Pipe()
# Get the current working directory
cwd = os.getcwd()
# Add current directory to Python's import path
sys.path.append(cwd)
# Import the specific agent dynamically
module_name = config["func_path"].replace("/", ".").rstrip(".py")
module = importlib.import_module(module_name)
run_specific_agent = getattr(module, "run_specific_agent")
process = Process(target=run_specific_agent, args=(task, child_conn))
process.start()
command = [sys.executable, "benchmarks.py", str(task)]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, cwd=cwd)
start_time = time.time()
timeout = config["cutoff"]
while True:
if (
parent_conn.poll()
): # Check if there's a new message from the child process
response, cycle_count = parent_conn.recv()
print(f"Cycle {cycle_count}: {response}")
output = process.stdout.readline()
print(output.strip())
if cycle_count >= config["cutoff"]:
print(
f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
)
child_conn.send("terminate")
break
if time.time() - start_time > timeout:
print(
"The Python function has exceeded the time limit and was terminated."
)
child_conn.send(
"terminate"
) # Send a termination signal to the child process
break
if not process.is_alive():
# Check if process has ended
if process.poll() is not None:
print("The Python function has finished running.")
break
process.join()
# Check if process has exceeded timeout
if time.time() - start_time > timeout:
print("The Python function has exceeded the time limit and was terminated.")
process.terminate()
break
# Optional: sleep for a while
time.sleep(0.1)
# Wait for process to terminate, then get return code
process.wait()
ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"

View File

@ -1,5 +1,5 @@
{
"workspace": "C:\\Users\\silen\\miniagi",
"func_path": "agent/benchmarks.py",
"workspace": "autogpt/workspace/auto_gpt_workspace",
"func_path": "benchmarks.py",
"cutoff": 60
}

View File

@ -1,15 +1,18 @@
import json
import os
from pathlib import Path
import pytest
import shutil
from agbenchmark.tests.regression.RegressionManager import RegressionManager
from agbenchmark.start_benchmark import CONFIG_PATH, REGRESSION_TESTS_PATH
@pytest.fixture(scope="module")
def config(request):
config_file = os.path.abspath("agbenchmark/config.json")
print(f"Config file: {config_file}")
with open(config_file, "r") as f:
print(f"Config file: {CONFIG_PATH}")
with open(CONFIG_PATH, "r") as f:
config = json.load(f)
if request.config.getoption("--mock"):
@ -36,10 +39,7 @@ def workspace(config):
def pytest_addoption(parser):
parser.addoption("--mock", action="store_true", default=False)
regression_json = "agbenchmark/tests/regression/regression_tests.json"
regression_manager = RegressionManager(regression_json)
regression_manager = RegressionManager(REGRESSION_TESTS_PATH)
# this is to get the challenge_data from every test
@ -53,13 +53,16 @@ def pytest_runtest_makereport(item, call):
challenge_data = item.funcargs.get("challenge_data", None)
difficulty = challenge_data.info.difficulty if challenge_data else "unknown"
dependencies = challenge_data.dependencies if challenge_data else []
parts = item.nodeid.split("::")[0].split("/")
agbenchmark_index = parts.index("agbenchmark")
file_path = "/".join(parts[agbenchmark_index:])
test_details = {
"difficulty": difficulty,
"dependencies": dependencies,
"test": item.nodeid,
"test": file_path,
}
print("pytest_runtest_makereport", test_details)
if call.excinfo is None:
regression_manager.add_test(item.nodeid.split("::")[1], test_details)

View File

@ -7,6 +7,13 @@ from dotenv import load_dotenv, set_key
load_dotenv()
CURRENT_DIRECTORY = Path(__file__).resolve().parent
new_path = CURRENT_DIRECTORY / "config.json"
CONFIG_PATH = str(new_path.resolve())
REGRESSION_TESTS_PATH = str(Path(os.getcwd()) / "regression_tests.json")
@click.group()
def cli():
@ -15,16 +22,12 @@ def cli():
@cli.command()
@click.option("--category", default=None, help="Specific category to run")
@click.option("--noreg", is_flag=True, help="Skip regression tests")
@click.option("--reg", is_flag=True, help="Runs only regression tests")
@click.option("--mock", is_flag=True, help="Run with mock")
def start(category, noreg, mock):
def start(category, reg, mock):
"""Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
config_file = "agbenchmark/config.json"
config_dir = os.path.abspath(config_file)
# Check if configuration file exists and is not empty
if not os.path.exists(config_dir) or os.stat(config_dir).st_size == 0:
if not os.path.exists(CONFIG_PATH) or os.stat(CONFIG_PATH).st_size == 0:
config = {}
config["workspace"] = click.prompt(
@ -42,11 +45,11 @@ def start(category, noreg, mock):
default="60",
)
with open(config_dir, "w") as f:
with open(CONFIG_PATH, "w") as f:
json.dump(config, f)
else:
# If the configuration file exists and is not empty, load it
with open(config_dir, "r") as f:
with open(CONFIG_PATH, "r") as f:
config = json.load(f)
set_key(".env", "MOCK_TEST", "True" if mock else "False")
@ -58,11 +61,9 @@ def start(category, noreg, mock):
if not os.path.exists(workspace_path):
os.makedirs(workspace_path, exist_ok=True)
regression_path = os.path.abspath(
"agbenchmark/tests/regression/regression_tests.json"
)
if not os.path.exists(regression_path):
with open(regression_path, "a"):
if not os.path.exists(REGRESSION_TESTS_PATH):
with open(REGRESSION_TESTS_PATH, "a"):
pass
print("Current configuration:")
@ -70,31 +71,40 @@ def start(category, noreg, mock):
print(f"{key}: {value}")
print("Starting benchmark tests...", category)
pytest_args = ["agbenchmark", "-vs"]
tests_to_run = []
pytest_args = ["-vs"]
if category:
pytest_args.extend(
["-m", category]
) # run categorys that are of a specific marker
if noreg:
pytest_args.extend(
["-k", "not regression"]
) # run categorys that are of a specific marker but don't include regression categorys
print(f"Running {'non-regression' + category if noreg else category} categorys")
)
else:
if noreg:
print("Running all non-regression categorys")
pytest_args.extend(
["-k", "not regression"]
) # run categorys that are not regression categorys
if reg:
print("Running all regression tests")
tests_to_run = get_regression_tests()
else:
print("Running all categorys") # run all categorys
print("Running all categories")
if mock:
pytest_args.append("--mock")
# Run pytest with the constructed arguments
if not tests_to_run:
tests_to_run = [str(CURRENT_DIRECTORY)]
pytest_args.extend(tests_to_run)
pytest.main(pytest_args)
def get_regression_tests():
if not Path(REGRESSION_TESTS_PATH).exists():
with open(REGRESSION_TESTS_PATH, 'w') as file:
json.dump({}, file)
with open(REGRESSION_TESTS_PATH, 'r') as file:
data = json.load(file)
regression_tests = [str(CURRENT_DIRECTORY / ".." / value['test']) for key, value in data.items()]
return regression_tests
if __name__ == "__main__":
start()

View File

@ -1,3 +1,5 @@
from pathlib import Path
import pytest
from agbenchmark.tests.basic_abilities.BasicChallenge import BasicChallenge
import os
@ -9,10 +11,11 @@ class TestWriteFile(BasicChallenge):
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "w_file_data.json")
@pytest.mark.depends(on=[], name="basic_write_file")
def test_method(self, config):
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
workspace = Path(os.getcwd()) / config['workspace']
files_contents = self.open_files(workspace, self.data.ground.files)
scores = []
for file_content in files_contents:

1
agent/Auto-GPT Submodule

@ -0,0 +1 @@
Subproject commit c29ec925fd9e24f219ef0f2884b08908cd66239b

@ -1 +0,0 @@
Subproject commit d2add8f18caf96934a2d193583720cfc9b89451b

7
regression_tests.json Normal file
View File

@ -0,0 +1,7 @@
{
"TestWriteFile": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
}
}