Add automatic regression markers (#38)

pull/5155/head
Silen Naihin 2023-06-22 08:18:22 -04:00 committed by GitHub
parent e5974ca3ea
commit 15c5469bb1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 103 additions and 27 deletions

View File

@ -24,7 +24,7 @@ Share your progress :)
1. `pip install auto-gpt-benchmarks`
2. Add boilerplate code to start webserver to your agent (run loop and stop condition)
3. `agbenchmark start --challenge challenge_category` remove challenge flag to run all tests. specify config of hostname, port, and workspace directory
3. `agbenchmark start --category challenge_category` remove challenge flag to run all tests. specify config of hostname, port, and workspace directory
4. We call the server to run the agent for each test
5. Show pass rate of tests, logs, and any other metrics

View File

@ -3,6 +3,7 @@ import os
import pytest
import shutil
from agbenchmark.mocks.tests.retrieval_manual import mock_retrieval
from agbenchmark.tests.regression.RegressionManager import RegressionManager
import requests
@ -41,3 +42,35 @@ def server_response(request, config):
# response.status_code == 200
# ), f"Request failed with status code {response.status_code}"
mock_retrieval(task, config["workspace"])
regression_txt = "agbenchmark/tests/regression/regression_tests.txt"
regression_manager = RegressionManager(regression_txt)
def pytest_runtest_makereport(item, call):
"""Called for each test report. Generated for each stage
of a test run (setup, call, teardown)."""
if call.when == "call":
if (
call.excinfo is None
): # if no error in the call stage, add it as a regression test
regression_manager.add_test(item.nodeid)
else: # otherwise, :(
regression_manager.remove_test(item.nodeid)
def pytest_collection_modifyitems(items):
"""Called once all test items are collected. Used
to add regression marker to collected test items."""
for item in items:
print("pytest_collection_modifyitems", item.nodeid)
if item.nodeid + "\n" in regression_manager.tests:
print(regression_manager.tests)
item.add_marker(pytest.mark.regression)
def pytest_sessionfinish():
"""Called at the end of the session to save regression tests"""
regression_manager.save()

View File

@ -10,38 +10,58 @@ def cli():
@cli.command()
@click.option("--challenge", default=None, help="Specific challenge to run")
def start(challenge):
"""Start the benchmark tests. If a challenge flag is is provided, run the challenges with that mark."""
with open("agbenchmark/config.json", "r") as f:
config = json.load(f)
@click.option("--category", default=None, help="Specific category to run")
@click.option("--noreg", is_flag=True, help="Skip regression tests")
def start(category, noreg):
"""Start the benchmark tests. If a category flag is is provided, run the categories with that mark."""
"""Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
config_file = "agbenchmark/config.json"
# Check if configuration file exists and is not empty
if not os.path.exists(config_file) or os.stat(config_file).st_size == 0:
config = {}
config["hostname"] = click.prompt(
"\nPlease enter a new hostname", default="localhost"
)
config["port"] = click.prompt("Please enter a new port", default=8080)
config["workspace"] = click.prompt(
"Please enter a new workspace path", default="/path/to/workspace"
)
with open(config_file, "w") as f:
json.dump(config, f)
else:
# If the configuration file exists and is not empty, load it
with open(config_file, "r") as f:
config = json.load(f)
print("Current configuration:")
for key, value in config.items():
print(f"{key}: {value}")
update_config = click.confirm(
"\nDo you want to update these parameters?", default=False
)
if update_config:
config["hostname"] = click.prompt(
"\nPlease enter a new hostname", default=config["hostname"]
)
config["port"] = click.prompt("Please enter a new port", default=config["port"])
config["workspace"] = click.prompt(
"Please enter a new workspace path", default=config["workspace"]
)
with open("agbenchmark/config.json", "w") as f:
json.dump(config, f)
print("Starting benchmark tests...", challenge)
if challenge:
print(f"Running {challenge} challenges")
pytest.main(["agbenchmark", "-m", challenge, "-vs"])
print("Starting benchmark tests...", category)
pytest_args = ["agbenchmark", "-vs"]
if category:
pytest_args.extend(
["-m", category]
) # run categorys that are of a specific marker
if noreg:
pytest_args.extend(
["-k", "not regression"]
) # run categorys that are of a specific marker but don't include regression categorys
print(f"Running {'non-regression' + category if noreg else category} categorys")
else:
print("Running all challenges")
pytest.main(["agbenchmark", "-vs"])
if noreg:
print("Running all non-regression categorys")
pytest_args.extend(
["-k", "not regression"]
) # run categorys that are not regression categorys
else:
print("Running all categorys") # run all categorys
# Run pytest with the constructed arguments
pytest.main(pytest_args)
if __name__ == "__main__":

View File

@ -0,0 +1,22 @@
class RegressionManager:
"""Abstracts interaction with the regression tests file"""
def __init__(self, filename: str):
self.filename = filename
self.load()
def load(self) -> None:
with open(self.filename, "r") as f:
self.tests = f.readlines()
def save(self) -> None:
with open(self.filename, "w") as f:
f.writelines(self.tests)
def add_test(self, test_id) -> None:
if f"{test_id}\n" not in self.tests:
self.tests.append(f"{test_id}\n")
def remove_test(self, test_id) -> None:
if f"{test_id}\n" in self.tests:
self.tests.remove(f"{test_id}\n")

View File

@ -28,6 +28,7 @@ testpaths = [
]
markers = [
"retrieval",
"regression"
]
[tool.poetry.scripts]