internal_info.json dynamic changes (#163)

2023-07-17 09:39:24 -04:00 · 2023-07-17 09:39:24 -04:00 · dffc1dfd51
parent ce4cefe7e7
commit dffc1dfd51
5 changed files with 77 additions and 93 deletions
--- a/agbenchmark/ReportManager.py
+++ b/agbenchmark/ReportManager.py
@ -3,7 +3,7 @@ import os
 import sys
 import time
 from datetime import datetime
-from typing import Any, Dict
+from typing import Any, Dict, Optional

 from agbenchmark.utils import get_highest_success_difficulty

@ -37,8 +37,18 @@ class ReportManager:
        with open(self.filename, "w") as f:
            json.dump(self.tests, f, indent=4)

-    def add_test(self, test_name: str, test_details: dict | list) -> None:
-        self.tests[test_name] = test_details
+    def add_test(
+        self,
+        test_name: str,
+        test_details: dict | list,
+        agent_name: Optional[str] = None,
+    ) -> None:
+        if agent_name:
+            if agent_name not in self.tests:
+                self.tests[agent_name] = {}
+            self.tests[agent_name][test_name] = test_details
+        else:
+            self.tests[test_name] = test_details

        self.save()

--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@ -15,7 +15,7 @@ from agbenchmark.start_benchmark import (
    REGRESSION_TESTS_PATH,
    get_regression_data,
 )
-from agbenchmark.utils import calculate_success_percentage
+from agbenchmark.utils import AGENT_NAME, calculate_success_percentage


 def resolve_workspace(workspace: str) -> str:
@ -128,9 +128,10 @@ regression_manager = ReportManager(REGRESSION_TESTS_PATH)
 # user facing reporting information
 info_manager = ReportManager(INFO_TESTS_PATH)

-INTERNAL_LOGS = Path(__file__).resolve().parent  # agbenchmark/conftest.py
+INTERNAL_LOGS_PATH = Path(__file__).resolve().parent / "reports"
+
 # internal db step in replacement track pass/fail rate
-internal_info = ReportManager(str(INTERNAL_LOGS / "internal_info.json"))
+internal_info = ReportManager(str(INTERNAL_LOGS_PATH / "internal_info.json"))


 def pytest_runtest_makereport(item: Any, call: Any) -> None:
@ -171,11 +172,22 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None:
                regression_manager.remove_test(test_name)
            info_details["metrics"]["fail_reason"] = str(call.excinfo.value)

-        prev_test_results: list[bool] = internal_info.tests.get(test_name, [])
+        prev_test_results: list[bool]
+        agent_tests: dict[str, list[bool]] = {}
+
+        # if the structure is nested inside of the agent name
+        if AGENT_NAME:
+            agent_tests = internal_info.tests.get(AGENT_NAME, {})
+
+        if agent_tests:
+            prev_test_results = agent_tests.get(test_name, [])
+        else:
+            prev_test_results = internal_info.tests.get(test_name, [])
+
        if not mock:
            # only add if it's an actual test
            prev_test_results.append(info_details["metrics"]["success"])
-            internal_info.add_test(test_name, prev_test_results)
+            internal_info.add_test(test_name, prev_test_results, AGENT_NAME)

            # can calculate success rate regardless of mock
            info_details["metrics"]["success_%"] = calculate_success_percentage(
--- a/agbenchmark/internal_info.json
+++ b/agbenchmark/internal_info.json
@ -1,83 +0,0 @@
-{
-    "TestBasicMemory": [
-        true,
-        true,
-        true
-    ],
-    "TestBasicRetrieval": [
-        true,
-        true,
-        true
-    ],
-    "TestCreateSimpleWebServer": [
-        false,
-        false,
-        false
-    ],
-    "TestDebugSimpleTypoWithGuidance": [
-        false,
-        false,
-        false,
-        false,
-        false
-    ],
-    "TestDebugSimpleTypoWithoutGuidance": [
-        false,
-        false,
-        false
-    ],
-    "TestReadFile": [
-        true,
-        true,
-        true,
-        true
-    ],
-    "TestRememberMultipleIds": [
-        true,
-        true,
-        true
-    ],
-    "TestRememberMultipleIdsWithNoise": [
-        true,
-        true,
-        true
-    ],
-    "TestRememberMultiplePhrasesWithNoise": [
-        true,
-        true,
-        true
-    ],
-    "TestRetrieval2": [
-        true,
-        true,
-        true
-    ],
-    "TestRetrieval3": [
-        true,
-        true,
-        true
-    ],
-    "TestSearch": [
-        true,
-        true,
-        true,
-        true
-    ],
-    "TestWriteFile": [
-        true,
-        true,
-        true,
-        false,
-        false,
-        false,
-        false,
-        true,
-        false,
-        true,
-        false,
-        false,
-        false,
-        false,
-        true
-    ]
-}
--- a/agbenchmark/reports/internal_info.json
+++ b/agbenchmark/reports/internal_info.json
@ -0,0 +1,40 @@
+{
+  "mini-agi": {
+    "TestBasicMemory": [true, true, true],
+    "TestBasicRetrieval": [true, true, true],
+    "TestCreateSimpleWebServer": [false, false, false],
+    "TestDebugSimpleTypoWithGuidance": [
+      false,
+      false,
+      false,
+      false,
+      false,
+      false
+    ],
+    "TestDebugSimpleTypoWithoutGuidance": [false, false, false],
+    "TestReadFile": [true, true, true, true],
+    "TestRememberMultipleIds": [true, true, true],
+    "TestRememberMultipleIdsWithNoise": [true, true, true],
+    "TestRememberMultiplePhrasesWithNoise": [true, true, true],
+    "TestRetrieval2": [true, true, true],
+    "TestRetrieval3": [true, true, true],
+    "TestSearch": [true, true, true, true],
+    "TestWriteFile": [
+      true,
+      true,
+      true,
+      false,
+      false,
+      false,
+      false,
+      true,
+      false,
+      true,
+      false,
+      false,
+      false,
+      false,
+      true
+    ]
+  }
+}
--- a/agbenchmark/utils.py
+++ b/agbenchmark/utils.py
@ -17,7 +17,6 @@ HOME_ENV = os.getenv("HOME_ENV")


 def calculate_info_test_path(reports_path: Path) -> str:
-    print("reports_pathreports_pathreports_pathreports_path", reports_path)
    if not reports_path.exists():
        reports_path.mkdir(parents=True, exist_ok=True)
        return str(
@ -129,6 +128,7 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
        CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths(
            benchmarks_folder_path
        )
+
    else:
        # otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
        # used when its just a pip install
@ -139,4 +139,9 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
    if not benchmarks_folder_path.exists():
        benchmarks_folder_path.mkdir(exist_ok=True)

-    return HOME_DIRECTORY, CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH
+    return (
+        HOME_DIRECTORY,
+        CONFIG_PATH,
+        REGRESSION_TESTS_PATH,
+        INFO_TESTS_PATH,
+    )