Add automatic regression markers (#38)

2023-06-22 08:18:22 -04:00 · 2023-06-22 08:18:22 -04:00 · 15c5469bb1
parent e5974ca3ea
commit 15c5469bb1
6 changed files with 103 additions and 27 deletions
--- a/README.md
+++ b/README.md
@ -24,7 +24,7 @@ Share your progress :)

 1. `pip install auto-gpt-benchmarks`
 2. Add boilerplate code to start webserver to your agent (run loop and stop condition)
-3. `agbenchmark start --challenge challenge_category` remove challenge flag to run all tests. specify config of hostname, port, and workspace directory
+3. `agbenchmark start --category challenge_category` remove challenge flag to run all tests. specify config of hostname, port, and workspace directory
 4. We call the server to run the agent for each test
 5. Show pass rate of tests, logs, and any other metrics

--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@ -3,6 +3,7 @@ import os
 import pytest
 import shutil
 from agbenchmark.mocks.tests.retrieval_manual import mock_retrieval
+from agbenchmark.tests.regression.RegressionManager import RegressionManager
 import requests


@ -41,3 +42,35 @@ def server_response(request, config):
    #     response.status_code == 200
    # ), f"Request failed with status code {response.status_code}"
    mock_retrieval(task, config["workspace"])
+
+
+regression_txt = "agbenchmark/tests/regression/regression_tests.txt"
+
+regression_manager = RegressionManager(regression_txt)
+
+
+def pytest_runtest_makereport(item, call):
+    """Called for each test report. Generated for each stage
+    of a test run (setup, call, teardown)."""
+    if call.when == "call":
+        if (
+            call.excinfo is None
+        ):  # if no error in the call stage, add it as a regression test
+            regression_manager.add_test(item.nodeid)
+        else:  # otherwise, :(
+            regression_manager.remove_test(item.nodeid)
+
+
+def pytest_collection_modifyitems(items):
+    """Called once all test items are collected. Used
+    to add regression marker to collected test items."""
+    for item in items:
+        print("pytest_collection_modifyitems", item.nodeid)
+        if item.nodeid + "\n" in regression_manager.tests:
+            print(regression_manager.tests)
+            item.add_marker(pytest.mark.regression)
+
+
+def pytest_sessionfinish():
+    """Called at the end of the session to save regression tests"""
+    regression_manager.save()
--- a/agbenchmark/start_benchmark.py
+++ b/agbenchmark/start_benchmark.py
@ -10,38 +10,58 @@ def cli():


@cli.command()
-@click.option("--challenge", default=None, help="Specific challenge to run")
-def start(challenge):
-    """Start the benchmark tests. If a challenge flag is is provided, run the challenges with that mark."""
-    with open("agbenchmark/config.json", "r") as f:
-        config = json.load(f)
+@click.option("--category", default=None, help="Specific category to run")
+@click.option("--noreg", is_flag=True, help="Skip regression tests")
+def start(category, noreg):
+    """Start the benchmark tests. If a category flag is is provided, run the categories with that mark."""
+    """Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
+    config_file = "agbenchmark/config.json"
+
+    # Check if configuration file exists and is not empty
+    if not os.path.exists(config_file) or os.stat(config_file).st_size == 0:
+        config = {}
+
+        config["hostname"] = click.prompt(
+            "\nPlease enter a new hostname", default="localhost"
+        )
+        config["port"] = click.prompt("Please enter a new port", default=8080)
+        config["workspace"] = click.prompt(
+            "Please enter a new workspace path", default="/path/to/workspace"
+        )
+
+        with open(config_file, "w") as f:
+            json.dump(config, f)
+    else:
+        # If the configuration file exists and is not empty, load it
+        with open(config_file, "r") as f:
+            config = json.load(f)

    print("Current configuration:")
    for key, value in config.items():
        print(f"{key}: {value}")

-    update_config = click.confirm(
-        "\nDo you want to update these parameters?", default=False
-    )
-    if update_config:
-        config["hostname"] = click.prompt(
-            "\nPlease enter a new hostname", default=config["hostname"]
-        )
-        config["port"] = click.prompt("Please enter a new port", default=config["port"])
-        config["workspace"] = click.prompt(
-            "Please enter a new workspace path", default=config["workspace"]
-        )
-
-        with open("agbenchmark/config.json", "w") as f:
-            json.dump(config, f)
-
-    print("Starting benchmark tests...", challenge)
-    if challenge:
-        print(f"Running {challenge} challenges")
-        pytest.main(["agbenchmark", "-m", challenge, "-vs"])
+    print("Starting benchmark tests...", category)
+    pytest_args = ["agbenchmark", "-vs"]
+    if category:
+        pytest_args.extend(
+            ["-m", category]
+        )  # run categorys that are of a specific marker
+        if noreg:
+            pytest_args.extend(
+                ["-k", "not regression"]
+            )  # run categorys that are of a specific marker but don't include regression categorys
+        print(f"Running {'non-regression' + category if noreg else category} categorys")
    else:
-        print("Running all challenges")
-        pytest.main(["agbenchmark", "-vs"])
+        if noreg:
+            print("Running all non-regression categorys")
+            pytest_args.extend(
+                ["-k", "not regression"]
+            )  # run categorys that are not regression categorys
+        else:
+            print("Running all categorys")  # run all categorys
+
+    # Run pytest with the constructed arguments
+    pytest.main(pytest_args)


 if __name__ == "__main__":
--- a/agbenchmark/tests/regression/RegressionManager.py
+++ b/agbenchmark/tests/regression/RegressionManager.py
@ -0,0 +1,22 @@
+class RegressionManager:
+    """Abstracts interaction with the regression tests file"""
+
+    def __init__(self, filename: str):
+        self.filename = filename
+        self.load()
+
+    def load(self) -> None:
+        with open(self.filename, "r") as f:
+            self.tests = f.readlines()
+
+    def save(self) -> None:
+        with open(self.filename, "w") as f:
+            f.writelines(self.tests)
+
+    def add_test(self, test_id) -> None:
+        if f"{test_id}\n" not in self.tests:
+            self.tests.append(f"{test_id}\n")
+
+    def remove_test(self, test_id) -> None:
+        if f"{test_id}\n" in self.tests:
+            self.tests.remove(f"{test_id}\n")
--- a/agbenchmark/tests/regression/regression_tests.txt
+++ b/agbenchmark/tests/regression/regression_tests.txt
--- a/pyproject.toml
+++ b/pyproject.toml
@ -28,6 +28,7 @@ testpaths = [
 ]
 markers = [
    "retrieval",
+    "regression"
 ]

 [tool.poetry.scripts]