internal_info.json dynamic changes (#163)

2023-07-17 09:39:24 -04:00 · 2023-07-17 09:39:24 -04:00 · dffc1dfd51
parent ce4cefe7e7
commit dffc1dfd51
5 changed files with 77 additions and 93 deletions
--- a/agbenchmark/ReportManager.py
+++ b/agbenchmark/ReportManager.py
@ -3,7 +3,7 @@ import os
 import sys
 import time
 from datetime import datetime
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 from agbenchmark.utils import get_highest_success_difficulty
@ -37,8 +37,18 @@ class ReportManager:
        with open(self.filename, "w") as f:
            json.dump(self.tests, f, indent=4)
-    def add_test(self, test_name: str, test_details: dict | list) -> None:
+    def add_test(
-        self.tests[test_name] = test_details
+        self,
        test_name: str,
        test_details: dict | list,
        agent_name: Optional[str] = None,
    ) -> None:
        if agent_name:
            if agent_name not in self.tests:
                self.tests[agent_name] = {}
            self.tests[agent_name][test_name] = test_details
        else:
            self.tests[test_name] = test_details
        self.save()
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@ -15,7 +15,7 @@ from agbenchmark.start_benchmark import (
    REGRESSION_TESTS_PATH,
    get_regression_data,
 )
-from agbenchmark.utils import calculate_success_percentage
+from agbenchmark.utils import AGENT_NAME, calculate_success_percentage
 def resolve_workspace(workspace: str) -> str:
@ -128,9 +128,10 @@ regression_manager = ReportManager(REGRESSION_TESTS_PATH)
 # user facing reporting information
 info_manager = ReportManager(INFO_TESTS_PATH)
-INTERNAL_LOGS = Path(__file__).resolve().parent  # agbenchmark/conftest.py
+INTERNAL_LOGS_PATH = Path(__file__).resolve().parent / "reports"
 # internal db step in replacement track pass/fail rate
-internal_info = ReportManager(str(INTERNAL_LOGS / "internal_info.json"))
+internal_info = ReportManager(str(INTERNAL_LOGS_PATH / "internal_info.json"))
 def pytest_runtest_makereport(item: Any, call: Any) -> None:
@ -171,11 +172,22 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None:
                regression_manager.remove_test(test_name)
            info_details["metrics"]["fail_reason"] = str(call.excinfo.value)
-        prev_test_results: list[bool] = internal_info.tests.get(test_name, [])
+        prev_test_results: list[bool]
        agent_tests: dict[str, list[bool]] = {}
        # if the structure is nested inside of the agent name
        if AGENT_NAME:
            agent_tests = internal_info.tests.get(AGENT_NAME, {})
        if agent_tests:
            prev_test_results = agent_tests.get(test_name, [])
        else:
            prev_test_results = internal_info.tests.get(test_name, [])
        if not mock:
            # only add if it's an actual test
            prev_test_results.append(info_details["metrics"]["success"])
-            internal_info.add_test(test_name, prev_test_results)
+            internal_info.add_test(test_name, prev_test_results, AGENT_NAME)
            # can calculate success rate regardless of mock
            info_details["metrics"]["success_%"] = calculate_success_percentage(
--- a/agbenchmark/internal_info.json
+++ b/agbenchmark/internal_info.json
@ -1,83 +0,0 @@
 {
    "TestBasicMemory": [
        true,
        true,
        true
    ],
    "TestBasicRetrieval": [
        true,
        true,
        true
    ],
    "TestCreateSimpleWebServer": [
        false,
        false,
        false
    ],
    "TestDebugSimpleTypoWithGuidance": [
        false,
        false,
        false,
        false,
        false
    ],
    "TestDebugSimpleTypoWithoutGuidance": [
        false,
        false,
        false
    ],
    "TestReadFile": [
        true,
        true,
        true,
        true
    ],
    "TestRememberMultipleIds": [
        true,
        true,
        true
    ],
    "TestRememberMultipleIdsWithNoise": [
        true,
        true,
        true
    ],
    "TestRememberMultiplePhrasesWithNoise": [
        true,
        true,
        true
    ],
    "TestRetrieval2": [
        true,
        true,
        true
    ],
    "TestRetrieval3": [
        true,
        true,
        true
    ],
    "TestSearch": [
        true,
        true,
        true,
        true
    ],
    "TestWriteFile": [
        true,
        true,
        true,
        false,
        false,
        false,
        false,
        true,
        false,
        true,
        false,
        false,
        false,
        false,
        true
    ]
 }
--- a/agbenchmark/reports/internal_info.json
+++ b/agbenchmark/reports/internal_info.json
@ -0,0 +1,40 @@
 {
  "mini-agi": {
    "TestBasicMemory": [true, true, true],
    "TestBasicRetrieval": [true, true, true],
    "TestCreateSimpleWebServer": [false, false, false],
    "TestDebugSimpleTypoWithGuidance": [
      false,
      false,
      false,
      false,
      false,
      false
    ],
    "TestDebugSimpleTypoWithoutGuidance": [false, false, false],
    "TestReadFile": [true, true, true, true],
    "TestRememberMultipleIds": [true, true, true],
    "TestRememberMultipleIdsWithNoise": [true, true, true],
    "TestRememberMultiplePhrasesWithNoise": [true, true, true],
    "TestRetrieval2": [true, true, true],
    "TestRetrieval3": [true, true, true],
    "TestSearch": [true, true, true, true],
    "TestWriteFile": [
      true,
      true,
      true,
      false,
      false,
      false,
      false,
      true,
      false,
      true,
      false,
      false,
      false,
      false,
      true
    ]
  }
 }
--- a/agbenchmark/utils.py
+++ b/agbenchmark/utils.py
@ -17,7 +17,6 @@ HOME_ENV = os.getenv("HOME_ENV")
 def calculate_info_test_path(reports_path: Path) -> str:
    print("reports_pathreports_pathreports_pathreports_path", reports_path)
    if not reports_path.exists():
        reports_path.mkdir(parents=True, exist_ok=True)
        return str(
@ -129,6 +128,7 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
        CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths(
            benchmarks_folder_path
        )
    else:
        # otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
        # used when its just a pip install
@ -139,4 +139,9 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
    if not benchmarks_folder_path.exists():
        benchmarks_folder_path.mkdir(exist_ok=True)
-    return HOME_DIRECTORY, CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH
+    return (
        HOME_DIRECTORY,
        CONFIG_PATH,
        REGRESSION_TESTS_PATH,
        INFO_TESTS_PATH,
    )