internal_info.json dynamic changes (#163)

pull/5155/head
Silen Naihin 2023-07-17 09:39:24 -04:00 committed by GitHub
parent ce4cefe7e7
commit dffc1dfd51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 93 deletions

View File

@ -3,7 +3,7 @@ import os
import sys import sys
import time import time
from datetime import datetime from datetime import datetime
from typing import Any, Dict from typing import Any, Dict, Optional
from agbenchmark.utils import get_highest_success_difficulty from agbenchmark.utils import get_highest_success_difficulty
@ -37,8 +37,18 @@ class ReportManager:
with open(self.filename, "w") as f: with open(self.filename, "w") as f:
json.dump(self.tests, f, indent=4) json.dump(self.tests, f, indent=4)
def add_test(self, test_name: str, test_details: dict | list) -> None: def add_test(
self.tests[test_name] = test_details self,
test_name: str,
test_details: dict | list,
agent_name: Optional[str] = None,
) -> None:
if agent_name:
if agent_name not in self.tests:
self.tests[agent_name] = {}
self.tests[agent_name][test_name] = test_details
else:
self.tests[test_name] = test_details
self.save() self.save()

View File

@ -15,7 +15,7 @@ from agbenchmark.start_benchmark import (
REGRESSION_TESTS_PATH, REGRESSION_TESTS_PATH,
get_regression_data, get_regression_data,
) )
from agbenchmark.utils import calculate_success_percentage from agbenchmark.utils import AGENT_NAME, calculate_success_percentage
def resolve_workspace(workspace: str) -> str: def resolve_workspace(workspace: str) -> str:
@ -128,9 +128,10 @@ regression_manager = ReportManager(REGRESSION_TESTS_PATH)
# user facing reporting information # user facing reporting information
info_manager = ReportManager(INFO_TESTS_PATH) info_manager = ReportManager(INFO_TESTS_PATH)
INTERNAL_LOGS = Path(__file__).resolve().parent # agbenchmark/conftest.py INTERNAL_LOGS_PATH = Path(__file__).resolve().parent / "reports"
# internal db step in replacement track pass/fail rate # internal db step in replacement track pass/fail rate
internal_info = ReportManager(str(INTERNAL_LOGS / "internal_info.json")) internal_info = ReportManager(str(INTERNAL_LOGS_PATH / "internal_info.json"))
def pytest_runtest_makereport(item: Any, call: Any) -> None: def pytest_runtest_makereport(item: Any, call: Any) -> None:
@ -171,11 +172,22 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None:
regression_manager.remove_test(test_name) regression_manager.remove_test(test_name)
info_details["metrics"]["fail_reason"] = str(call.excinfo.value) info_details["metrics"]["fail_reason"] = str(call.excinfo.value)
prev_test_results: list[bool] = internal_info.tests.get(test_name, []) prev_test_results: list[bool]
agent_tests: dict[str, list[bool]] = {}
# if the structure is nested inside of the agent name
if AGENT_NAME:
agent_tests = internal_info.tests.get(AGENT_NAME, {})
if agent_tests:
prev_test_results = agent_tests.get(test_name, [])
else:
prev_test_results = internal_info.tests.get(test_name, [])
if not mock: if not mock:
# only add if it's an actual test # only add if it's an actual test
prev_test_results.append(info_details["metrics"]["success"]) prev_test_results.append(info_details["metrics"]["success"])
internal_info.add_test(test_name, prev_test_results) internal_info.add_test(test_name, prev_test_results, AGENT_NAME)
# can calculate success rate regardless of mock # can calculate success rate regardless of mock
info_details["metrics"]["success_%"] = calculate_success_percentage( info_details["metrics"]["success_%"] = calculate_success_percentage(

View File

@ -1,83 +0,0 @@
{
"TestBasicMemory": [
true,
true,
true
],
"TestBasicRetrieval": [
true,
true,
true
],
"TestCreateSimpleWebServer": [
false,
false,
false
],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [
false,
false,
false
],
"TestReadFile": [
true,
true,
true,
true
],
"TestRememberMultipleIds": [
true,
true,
true
],
"TestRememberMultipleIdsWithNoise": [
true,
true,
true
],
"TestRememberMultiplePhrasesWithNoise": [
true,
true,
true
],
"TestRetrieval2": [
true,
true,
true
],
"TestRetrieval3": [
true,
true,
true
],
"TestSearch": [
true,
true,
true,
true
],
"TestWriteFile": [
true,
true,
true,
false,
false,
false,
false,
true,
false,
true,
false,
false,
false,
false,
true
]
}

View File

@ -0,0 +1,40 @@
{
"mini-agi": {
"TestBasicMemory": [true, true, true],
"TestBasicRetrieval": [true, true, true],
"TestCreateSimpleWebServer": [false, false, false],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [false, false, false],
"TestReadFile": [true, true, true, true],
"TestRememberMultipleIds": [true, true, true],
"TestRememberMultipleIdsWithNoise": [true, true, true],
"TestRememberMultiplePhrasesWithNoise": [true, true, true],
"TestRetrieval2": [true, true, true],
"TestRetrieval3": [true, true, true],
"TestSearch": [true, true, true, true],
"TestWriteFile": [
true,
true,
true,
false,
false,
false,
false,
true,
false,
true,
false,
false,
false,
false,
true
]
}
}

View File

@ -17,7 +17,6 @@ HOME_ENV = os.getenv("HOME_ENV")
def calculate_info_test_path(reports_path: Path) -> str: def calculate_info_test_path(reports_path: Path) -> str:
print("reports_pathreports_pathreports_pathreports_path", reports_path)
if not reports_path.exists(): if not reports_path.exists():
reports_path.mkdir(parents=True, exist_ok=True) reports_path.mkdir(parents=True, exist_ok=True)
return str( return str(
@ -129,6 +128,7 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths( CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths(
benchmarks_folder_path benchmarks_folder_path
) )
else: else:
# otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo) # otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
# used when its just a pip install # used when its just a pip install
@ -139,4 +139,9 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
if not benchmarks_folder_path.exists(): if not benchmarks_folder_path.exists():
benchmarks_folder_path.mkdir(exist_ok=True) benchmarks_folder_path.mkdir(exist_ok=True)
return HOME_DIRECTORY, CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH return (
HOME_DIRECTORY,
CONFIG_PATH,
REGRESSION_TESTS_PATH,
INFO_TESTS_PATH,
)