Add automatic regression markers (#38)
parent
e5974ca3ea
commit
15c5469bb1
|
@ -24,7 +24,7 @@ Share your progress :)
|
|||
|
||||
1. `pip install auto-gpt-benchmarks`
|
||||
2. Add boilerplate code to start webserver to your agent (run loop and stop condition)
|
||||
3. `agbenchmark start --challenge challenge_category` remove challenge flag to run all tests. specify config of hostname, port, and workspace directory
|
||||
3. `agbenchmark start --category challenge_category` remove challenge flag to run all tests. specify config of hostname, port, and workspace directory
|
||||
4. We call the server to run the agent for each test
|
||||
5. Show pass rate of tests, logs, and any other metrics
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ import os
|
|||
import pytest
|
||||
import shutil
|
||||
from agbenchmark.mocks.tests.retrieval_manual import mock_retrieval
|
||||
from agbenchmark.tests.regression.RegressionManager import RegressionManager
|
||||
import requests
|
||||
|
||||
|
||||
|
@ -41,3 +42,35 @@ def server_response(request, config):
|
|||
# response.status_code == 200
|
||||
# ), f"Request failed with status code {response.status_code}"
|
||||
mock_retrieval(task, config["workspace"])
|
||||
|
||||
|
||||
regression_txt = "agbenchmark/tests/regression/regression_tests.txt"
|
||||
|
||||
regression_manager = RegressionManager(regression_txt)
|
||||
|
||||
|
||||
def pytest_runtest_makereport(item, call):
|
||||
"""Called for each test report. Generated for each stage
|
||||
of a test run (setup, call, teardown)."""
|
||||
if call.when == "call":
|
||||
if (
|
||||
call.excinfo is None
|
||||
): # if no error in the call stage, add it as a regression test
|
||||
regression_manager.add_test(item.nodeid)
|
||||
else: # otherwise, :(
|
||||
regression_manager.remove_test(item.nodeid)
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(items):
|
||||
"""Called once all test items are collected. Used
|
||||
to add regression marker to collected test items."""
|
||||
for item in items:
|
||||
print("pytest_collection_modifyitems", item.nodeid)
|
||||
if item.nodeid + "\n" in regression_manager.tests:
|
||||
print(regression_manager.tests)
|
||||
item.add_marker(pytest.mark.regression)
|
||||
|
||||
|
||||
def pytest_sessionfinish():
|
||||
"""Called at the end of the session to save regression tests"""
|
||||
regression_manager.save()
|
||||
|
|
|
@ -10,38 +10,58 @@ def cli():
|
|||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--challenge", default=None, help="Specific challenge to run")
|
||||
def start(challenge):
|
||||
"""Start the benchmark tests. If a challenge flag is is provided, run the challenges with that mark."""
|
||||
with open("agbenchmark/config.json", "r") as f:
|
||||
config = json.load(f)
|
||||
@click.option("--category", default=None, help="Specific category to run")
|
||||
@click.option("--noreg", is_flag=True, help="Skip regression tests")
|
||||
def start(category, noreg):
|
||||
"""Start the benchmark tests. If a category flag is is provided, run the categories with that mark."""
|
||||
"""Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
|
||||
config_file = "agbenchmark/config.json"
|
||||
|
||||
# Check if configuration file exists and is not empty
|
||||
if not os.path.exists(config_file) or os.stat(config_file).st_size == 0:
|
||||
config = {}
|
||||
|
||||
config["hostname"] = click.prompt(
|
||||
"\nPlease enter a new hostname", default="localhost"
|
||||
)
|
||||
config["port"] = click.prompt("Please enter a new port", default=8080)
|
||||
config["workspace"] = click.prompt(
|
||||
"Please enter a new workspace path", default="/path/to/workspace"
|
||||
)
|
||||
|
||||
with open(config_file, "w") as f:
|
||||
json.dump(config, f)
|
||||
else:
|
||||
# If the configuration file exists and is not empty, load it
|
||||
with open(config_file, "r") as f:
|
||||
config = json.load(f)
|
||||
|
||||
print("Current configuration:")
|
||||
for key, value in config.items():
|
||||
print(f"{key}: {value}")
|
||||
|
||||
update_config = click.confirm(
|
||||
"\nDo you want to update these parameters?", default=False
|
||||
)
|
||||
if update_config:
|
||||
config["hostname"] = click.prompt(
|
||||
"\nPlease enter a new hostname", default=config["hostname"]
|
||||
)
|
||||
config["port"] = click.prompt("Please enter a new port", default=config["port"])
|
||||
config["workspace"] = click.prompt(
|
||||
"Please enter a new workspace path", default=config["workspace"]
|
||||
)
|
||||
|
||||
with open("agbenchmark/config.json", "w") as f:
|
||||
json.dump(config, f)
|
||||
|
||||
print("Starting benchmark tests...", challenge)
|
||||
if challenge:
|
||||
print(f"Running {challenge} challenges")
|
||||
pytest.main(["agbenchmark", "-m", challenge, "-vs"])
|
||||
print("Starting benchmark tests...", category)
|
||||
pytest_args = ["agbenchmark", "-vs"]
|
||||
if category:
|
||||
pytest_args.extend(
|
||||
["-m", category]
|
||||
) # run categorys that are of a specific marker
|
||||
if noreg:
|
||||
pytest_args.extend(
|
||||
["-k", "not regression"]
|
||||
) # run categorys that are of a specific marker but don't include regression categorys
|
||||
print(f"Running {'non-regression' + category if noreg else category} categorys")
|
||||
else:
|
||||
print("Running all challenges")
|
||||
pytest.main(["agbenchmark", "-vs"])
|
||||
if noreg:
|
||||
print("Running all non-regression categorys")
|
||||
pytest_args.extend(
|
||||
["-k", "not regression"]
|
||||
) # run categorys that are not regression categorys
|
||||
else:
|
||||
print("Running all categorys") # run all categorys
|
||||
|
||||
# Run pytest with the constructed arguments
|
||||
pytest.main(pytest_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
class RegressionManager:
|
||||
"""Abstracts interaction with the regression tests file"""
|
||||
|
||||
def __init__(self, filename: str):
|
||||
self.filename = filename
|
||||
self.load()
|
||||
|
||||
def load(self) -> None:
|
||||
with open(self.filename, "r") as f:
|
||||
self.tests = f.readlines()
|
||||
|
||||
def save(self) -> None:
|
||||
with open(self.filename, "w") as f:
|
||||
f.writelines(self.tests)
|
||||
|
||||
def add_test(self, test_id) -> None:
|
||||
if f"{test_id}\n" not in self.tests:
|
||||
self.tests.append(f"{test_id}\n")
|
||||
|
||||
def remove_test(self, test_id) -> None:
|
||||
if f"{test_id}\n" in self.tests:
|
||||
self.tests.remove(f"{test_id}\n")
|
|
@ -28,6 +28,7 @@ testpaths = [
|
|||
]
|
||||
markers = [
|
||||
"retrieval",
|
||||
"regression"
|
||||
]
|
||||
|
||||
[tool.poetry.scripts]
|
||||
|
|
Loading…
Reference in New Issue