internal_info.json dynamic changes (#163)

pull/5155/head
Silen Naihin 2023-07-17 09:39:24 -04:00 committed by GitHub
parent ce4cefe7e7
commit dffc1dfd51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 93 deletions

View File

@ -3,7 +3,7 @@ import os
import sys
import time
from datetime import datetime
from typing import Any, Dict
from typing import Any, Dict, Optional
from agbenchmark.utils import get_highest_success_difficulty
@ -37,8 +37,18 @@ class ReportManager:
with open(self.filename, "w") as f:
json.dump(self.tests, f, indent=4)
def add_test(self, test_name: str, test_details: dict | list) -> None:
self.tests[test_name] = test_details
def add_test(
self,
test_name: str,
test_details: dict | list,
agent_name: Optional[str] = None,
) -> None:
if agent_name:
if agent_name not in self.tests:
self.tests[agent_name] = {}
self.tests[agent_name][test_name] = test_details
else:
self.tests[test_name] = test_details
self.save()

View File

@ -15,7 +15,7 @@ from agbenchmark.start_benchmark import (
REGRESSION_TESTS_PATH,
get_regression_data,
)
from agbenchmark.utils import calculate_success_percentage
from agbenchmark.utils import AGENT_NAME, calculate_success_percentage
def resolve_workspace(workspace: str) -> str:
@ -128,9 +128,10 @@ regression_manager = ReportManager(REGRESSION_TESTS_PATH)
# user facing reporting information
info_manager = ReportManager(INFO_TESTS_PATH)
INTERNAL_LOGS = Path(__file__).resolve().parent # agbenchmark/conftest.py
INTERNAL_LOGS_PATH = Path(__file__).resolve().parent / "reports"
# internal db step in replacement track pass/fail rate
internal_info = ReportManager(str(INTERNAL_LOGS / "internal_info.json"))
internal_info = ReportManager(str(INTERNAL_LOGS_PATH / "internal_info.json"))
def pytest_runtest_makereport(item: Any, call: Any) -> None:
@ -171,11 +172,22 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None:
regression_manager.remove_test(test_name)
info_details["metrics"]["fail_reason"] = str(call.excinfo.value)
prev_test_results: list[bool] = internal_info.tests.get(test_name, [])
prev_test_results: list[bool]
agent_tests: dict[str, list[bool]] = {}
# if the structure is nested inside of the agent name
if AGENT_NAME:
agent_tests = internal_info.tests.get(AGENT_NAME, {})
if agent_tests:
prev_test_results = agent_tests.get(test_name, [])
else:
prev_test_results = internal_info.tests.get(test_name, [])
if not mock:
# only add if it's an actual test
prev_test_results.append(info_details["metrics"]["success"])
internal_info.add_test(test_name, prev_test_results)
internal_info.add_test(test_name, prev_test_results, AGENT_NAME)
# can calculate success rate regardless of mock
info_details["metrics"]["success_%"] = calculate_success_percentage(

View File

@ -1,83 +0,0 @@
{
"TestBasicMemory": [
true,
true,
true
],
"TestBasicRetrieval": [
true,
true,
true
],
"TestCreateSimpleWebServer": [
false,
false,
false
],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [
false,
false,
false
],
"TestReadFile": [
true,
true,
true,
true
],
"TestRememberMultipleIds": [
true,
true,
true
],
"TestRememberMultipleIdsWithNoise": [
true,
true,
true
],
"TestRememberMultiplePhrasesWithNoise": [
true,
true,
true
],
"TestRetrieval2": [
true,
true,
true
],
"TestRetrieval3": [
true,
true,
true
],
"TestSearch": [
true,
true,
true,
true
],
"TestWriteFile": [
true,
true,
true,
false,
false,
false,
false,
true,
false,
true,
false,
false,
false,
false,
true
]
}

View File

@ -0,0 +1,40 @@
{
"mini-agi": {
"TestBasicMemory": [true, true, true],
"TestBasicRetrieval": [true, true, true],
"TestCreateSimpleWebServer": [false, false, false],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [false, false, false],
"TestReadFile": [true, true, true, true],
"TestRememberMultipleIds": [true, true, true],
"TestRememberMultipleIdsWithNoise": [true, true, true],
"TestRememberMultiplePhrasesWithNoise": [true, true, true],
"TestRetrieval2": [true, true, true],
"TestRetrieval3": [true, true, true],
"TestSearch": [true, true, true, true],
"TestWriteFile": [
true,
true,
true,
false,
false,
false,
false,
true,
false,
true,
false,
false,
false,
false,
true
]
}
}

View File

@ -17,7 +17,6 @@ HOME_ENV = os.getenv("HOME_ENV")
def calculate_info_test_path(reports_path: Path) -> str:
print("reports_pathreports_pathreports_pathreports_path", reports_path)
if not reports_path.exists():
reports_path.mkdir(parents=True, exist_ok=True)
return str(
@ -129,6 +128,7 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths(
benchmarks_folder_path
)
else:
# otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
# used when its just a pip install
@ -139,4 +139,9 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
if not benchmarks_folder_path.exists():
benchmarks_folder_path.mkdir(exist_ok=True)
return HOME_DIRECTORY, CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH
return (
HOME_DIRECTORY,
CONFIG_PATH,
REGRESSION_TESTS_PATH,
INFO_TESTS_PATH,
)