AutoGPT/agbenchmark/start_benchmark.py

130 lines
3.9 KiB
Python

import json
import os
import sys
from pathlib import Path
from typing import Any
import click
import pytest
from dotenv import load_dotenv
load_dotenv()
from agbenchmark.utils import calculate_info_test_path
CURRENT_DIRECTORY = Path(__file__).resolve().parent
benchmarks_folder_path = Path(os.getcwd()) / "agbenchmark"
CONFIG_PATH = str(benchmarks_folder_path / "config.json")
REGRESSION_TESTS_PATH = str(benchmarks_folder_path / "regression_tests.json")
INFO_TESTS_PATH = calculate_info_test_path(benchmarks_folder_path)
@click.group()
def cli() -> None:
pass
@cli.command()
@click.option("--category", default=None, help="Specific category to run")
@click.option("--test", default=None, help="Specific test to run")
@click.option("--maintain", is_flag=True, help="Runs only regression tests")
@click.option("--improve", is_flag=True, help="Run only non-regression tests")
@click.option("--mock", is_flag=True, help="Run with mock")
def start(category: str, test: str, maintain: bool, improve: bool, mock: bool) -> int:
"""Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
# Check if configuration file exists and is not empty
if maintain and improve:
print(
"Error: You can't use both --maintain and --improve at the same time. Please choose one."
)
return 1
if test and (category or maintain or improve):
print(
"Error: If you're running a specific test make sure no other options are selected. Please just pass the --test."
)
return 1
if not benchmarks_folder_path.exists():
benchmarks_folder_path.mkdir(exist_ok=True)
print(CONFIG_PATH, os.path.exists(CONFIG_PATH), os.stat(CONFIG_PATH).st_size)
if not os.path.exists(CONFIG_PATH) or os.stat(CONFIG_PATH).st_size == 0:
config = {}
config["workspace"] = click.prompt(
"Please enter a new workspace path",
default=os.path.join(Path.home(), "workspace"),
)
config["entry_path"] = click.prompt(
"Please enter a the path to your run_specific_agent function implementation within the benchmarks folder",
default="benchmarks.py",
)
config["cutoff"] = click.prompt(
"Please enter a hard cutoff runtime for your agent per test",
default="60",
)
with open(CONFIG_PATH, "w") as f:
json.dump(config, f)
else:
# If the configuration file exists and is not empty, load it
with open(CONFIG_PATH, "r") as f:
config = json.load(f)
os.environ["MOCK_TEST"] = "True" if mock else "False"
if not os.path.exists(REGRESSION_TESTS_PATH):
with open(REGRESSION_TESTS_PATH, "w"):
pass
if not os.path.exists(INFO_TESTS_PATH):
with open(INFO_TESTS_PATH, "w"):
pass
print("Current configuration:")
for key, value in config.items():
print(f"{key}: {value}")
pytest_args = ["-vs"]
if test:
print("Running specific test:", test)
pytest_args.extend(["-k", test, "--test"])
else:
if category:
pytest_args.extend(["-m", category])
print("Running tests of category:", category)
else:
print("Running all categories")
if maintain:
print("Running only regression tests")
pytest_args.append("--maintain")
elif improve:
print("Running only non-regression tests")
pytest_args.append("--improve")
if mock:
pytest_args.append("--mock")
# when used as a library, the pytest directory to execute is in the CURRENT_DIRECTORY
pytest_args.append(str(CURRENT_DIRECTORY))
return sys.exit(pytest.main(pytest_args))
def get_regression_data() -> Any:
with open(REGRESSION_TESTS_PATH, "r") as file:
data = json.load(file)
return data
if __name__ == "__main__":
start()