fixed multiple report folder bug
parent
d44a4f591d
commit
9eb01d85a3
|
@ -1,47 +0,0 @@
|
||||||
import json
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from .reports.ReportManager import ReportManager
|
|
||||||
from .utils.data_types import AgentBenchmarkConfig
|
|
||||||
|
|
||||||
BENCHMARK_START_TIME = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
|
||||||
|
|
||||||
|
|
||||||
def get_agent_benchmark_config() -> AgentBenchmarkConfig:
|
|
||||||
agent_benchmark_config_path = str(Path.cwd() / "agbenchmark_config" / "config.json")
|
|
||||||
try:
|
|
||||||
with open(agent_benchmark_config_path, "r") as f:
|
|
||||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
|
||||||
agent_benchmark_config.agent_benchmark_config_path = (
|
|
||||||
agent_benchmark_config_path
|
|
||||||
)
|
|
||||||
return agent_benchmark_config
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def get_report_managers() -> tuple[ReportManager, ReportManager, ReportManager]:
|
|
||||||
agent_benchmark_config = get_agent_benchmark_config()
|
|
||||||
# tests that consistently pass are considered regression tests
|
|
||||||
REGRESSION_MANAGER = ReportManager(
|
|
||||||
agent_benchmark_config.get_regression_reports_path(), BENCHMARK_START_TIME
|
|
||||||
)
|
|
||||||
|
|
||||||
# print(f"Using {REPORTS_PATH} for reports")
|
|
||||||
# user facing reporting information
|
|
||||||
INFO_MANAGER = ReportManager(
|
|
||||||
str(agent_benchmark_config.get_reports_path() / "report.json"),
|
|
||||||
BENCHMARK_START_TIME,
|
|
||||||
)
|
|
||||||
|
|
||||||
# internal db step in replacement track pass/fail rate
|
|
||||||
INTERNAL_INFO_MANAGER = ReportManager(
|
|
||||||
agent_benchmark_config.get_success_rate_path(), BENCHMARK_START_TIME
|
|
||||||
)
|
|
||||||
|
|
||||||
return REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER
|
|
||||||
|
|
||||||
|
|
||||||
(REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER) = get_report_managers()
|
|
|
@ -11,9 +11,59 @@ import pytest
|
||||||
import toml
|
import toml
|
||||||
from helicone.lock import HeliconeLockManager
|
from helicone.lock import HeliconeLockManager
|
||||||
|
|
||||||
from agbenchmark import BENCHMARK_START_TIME
|
|
||||||
from agbenchmark.utils.data_types import AgentBenchmarkConfig
|
from agbenchmark.utils.data_types import AgentBenchmarkConfig
|
||||||
|
|
||||||
|
from .reports.ReportManager import ReportManager
|
||||||
|
from .utils.data_types import AgentBenchmarkConfig
|
||||||
|
|
||||||
|
BENCHMARK_START_TIME_DT = datetime.now(timezone.utc)
|
||||||
|
BENCHMARK_START_TIME = BENCHMARK_START_TIME_DT.strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
||||||
|
|
||||||
|
|
||||||
|
def get_agent_benchmark_config() -> AgentBenchmarkConfig:
|
||||||
|
agent_benchmark_config_path = str(Path.cwd() / "agbenchmark_config" / "config.json")
|
||||||
|
try:
|
||||||
|
with open(agent_benchmark_config_path, "r") as f:
|
||||||
|
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||||
|
agent_benchmark_config.agent_benchmark_config_path = (
|
||||||
|
agent_benchmark_config_path
|
||||||
|
)
|
||||||
|
return agent_benchmark_config
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def get_report_managers() -> tuple[ReportManager, ReportManager, ReportManager]:
|
||||||
|
agent_benchmark_config = get_agent_benchmark_config()
|
||||||
|
# tests that consistently pass are considered regression tests
|
||||||
|
REGRESSION_MANAGER = ReportManager(
|
||||||
|
agent_benchmark_config.get_regression_reports_path(), BENCHMARK_START_TIME_DT
|
||||||
|
)
|
||||||
|
|
||||||
|
# print(f"Using {REPORTS_PATH} for reports")
|
||||||
|
# user facing reporting information
|
||||||
|
INFO_MANAGER = ReportManager(
|
||||||
|
str(
|
||||||
|
agent_benchmark_config.get_reports_path(
|
||||||
|
benchmark_start_time=BENCHMARK_START_TIME_DT
|
||||||
|
)
|
||||||
|
/ "report.json"
|
||||||
|
),
|
||||||
|
BENCHMARK_START_TIME_DT,
|
||||||
|
)
|
||||||
|
|
||||||
|
# internal db step in replacement track pass/fail rate
|
||||||
|
INTERNAL_INFO_MANAGER = ReportManager(
|
||||||
|
agent_benchmark_config.get_success_rate_path(), BENCHMARK_START_TIME_DT
|
||||||
|
)
|
||||||
|
|
||||||
|
return REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER
|
||||||
|
|
||||||
|
|
||||||
|
(REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER) = get_report_managers()
|
||||||
|
|
||||||
|
|
||||||
if os.environ.get("HELICONE_API_KEY"):
|
if os.environ.get("HELICONE_API_KEY"):
|
||||||
HeliconeLockManager.write_custom_property(
|
HeliconeLockManager.write_custom_property(
|
||||||
"benchmark_start_time", BENCHMARK_START_TIME
|
"benchmark_start_time", BENCHMARK_START_TIME
|
||||||
|
|
|
@ -71,7 +71,9 @@ class ReportManager:
|
||||||
"completion_time": datetime.now(timezone.utc).strftime(
|
"completion_time": datetime.now(timezone.utc).strftime(
|
||||||
"%Y-%m-%dT%H:%M:%S+00:00"
|
"%Y-%m-%dT%H:%M:%S+00:00"
|
||||||
),
|
),
|
||||||
"benchmark_start_time": self.benchmark_start_time,
|
"benchmark_start_time": self.benchmark_start_time.strftime(
|
||||||
|
"%Y-%m-%dT%H:%M:%S+00:00"
|
||||||
|
),
|
||||||
"metrics": {
|
"metrics": {
|
||||||
"run_time": str(round(time.time() - self.start_time, 2)) + " seconds",
|
"run_time": str(round(time.time() - self.start_time, 2)) + " seconds",
|
||||||
"highest_difficulty": get_highest_success_difficulty(self.tests),
|
"highest_difficulty": get_highest_success_difficulty(self.tests),
|
||||||
|
@ -89,7 +91,7 @@ class ReportManager:
|
||||||
|
|
||||||
save_single_radar_chart(
|
save_single_radar_chart(
|
||||||
agent_categories,
|
agent_categories,
|
||||||
config.get_reports_path() / "radar_chart.png",
|
config.get_reports_path(self.benchmark_start_time) / "radar_chart.png",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.save()
|
self.save()
|
||||||
|
|
|
@ -4,7 +4,7 @@ import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from agbenchmark import (
|
from agbenchmark.__main__ import (
|
||||||
INFO_MANAGER,
|
INFO_MANAGER,
|
||||||
INTERNAL_INFO_MANAGER,
|
INTERNAL_INFO_MANAGER,
|
||||||
REGRESSION_MANAGER,
|
REGRESSION_MANAGER,
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import datetime
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
@ -37,7 +38,9 @@ DIFFICULTY_MAP = {
|
||||||
STRING_DIFFICULTY_MAP = {e.value: DIFFICULTY_MAP[e] for e in DifficultyLevel}
|
STRING_DIFFICULTY_MAP = {e.value: DIFFICULTY_MAP[e] for e in DifficultyLevel}
|
||||||
|
|
||||||
|
|
||||||
def calculate_info_test_path(base_path: Path) -> Path:
|
def calculate_info_test_path(
|
||||||
|
base_path: Path, benchmark_start_time: datetime.datetime
|
||||||
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
Calculates the path to the directory where the test report will be saved.
|
Calculates the path to the directory where the test report will be saved.
|
||||||
"""
|
"""
|
||||||
|
@ -45,7 +48,7 @@ def calculate_info_test_path(base_path: Path) -> Path:
|
||||||
base_path.mkdir(parents=True, exist_ok=True)
|
base_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Get current UTC date-time stamp
|
# Get current UTC date-time stamp
|
||||||
date_stamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
|
date_stamp = benchmark_start_time.strftime("%Y%m%dT%H%M%S")
|
||||||
|
|
||||||
# Default run name
|
# Default run name
|
||||||
run_name = "full_run"
|
run_name = "full_run"
|
||||||
|
@ -102,8 +105,10 @@ class AgentBenchmarkConfig(BaseModel):
|
||||||
# ).resolve()
|
# ).resolve()
|
||||||
return Path.cwd() / "agbenchmark_config" / "reports"
|
return Path.cwd() / "agbenchmark_config" / "reports"
|
||||||
|
|
||||||
def get_reports_path(self) -> Path:
|
def get_reports_path(self, benchmark_start_time: datetime.datetime) -> Path:
|
||||||
return calculate_info_test_path(self.get_reports_location())
|
return calculate_info_test_path(
|
||||||
|
self.get_reports_location(), benchmark_start_time
|
||||||
|
)
|
||||||
|
|
||||||
def get_regression_reports_path(self) -> Path:
|
def get_regression_reports_path(self) -> Path:
|
||||||
return self.get_reports_location() / "regression_tests.json"
|
return self.get_reports_location() / "regression_tests.json"
|
||||||
|
|
|
@ -4,7 +4,7 @@ from typing import Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from agbenchmark import BENCHMARK_START_TIME
|
from agbenchmark.__main__ import BENCHMARK_START_TIME
|
||||||
from agbenchmark.agent_interface import HELICONE_GRAPHQL_LOGS
|
from agbenchmark.agent_interface import HELICONE_GRAPHQL_LOGS
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# radio charts, logs, helper functions for tests, anything else relevant.
|
# radio charts, logs, helper functions for tests, anything else relevant.
|
||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -109,103 +110,6 @@ def get_highest_success_difficulty(
|
||||||
return "No successful tests"
|
return "No successful tests"
|
||||||
|
|
||||||
|
|
||||||
def assign_paths(folder_path: Path) -> tuple[str, str, str, str, str]:
|
|
||||||
CONFIG_PATH = str(folder_path / "config.json")
|
|
||||||
|
|
||||||
reports_location = folder_path / "reports"
|
|
||||||
|
|
||||||
# if the user has a locally defined challenges path that they've added tests to
|
|
||||||
CHALLENGES_PATH = str(folder_path / "challenges")
|
|
||||||
if not os.path.exists(CHALLENGES_PATH):
|
|
||||||
CHALLENGES_PATH = str(Path(__file__).parent.parent / "challenges")
|
|
||||||
|
|
||||||
if not os.path.exists(reports_location):
|
|
||||||
os.makedirs(reports_location)
|
|
||||||
|
|
||||||
# from the ci
|
|
||||||
if REPORT_LOCATION:
|
|
||||||
reports_location = Path.cwd() / REPORT_LOCATION
|
|
||||||
|
|
||||||
REPORTS_PATH = calculate_info_test_path(reports_location)
|
|
||||||
|
|
||||||
REGRESSION_TESTS_PATH = str(reports_location / "regression_tests.json")
|
|
||||||
|
|
||||||
SUCCESS_RATE_PATH = str(reports_location / "success_rate.json")
|
|
||||||
|
|
||||||
return (
|
|
||||||
CONFIG_PATH,
|
|
||||||
REGRESSION_TESTS_PATH,
|
|
||||||
REPORTS_PATH,
|
|
||||||
SUCCESS_RATE_PATH,
|
|
||||||
CHALLENGES_PATH,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_dynamic_paths() -> tuple[Path, str, str, str, str, str]:
|
|
||||||
# the default home is where you're running from
|
|
||||||
HOME_DIRECTORY = Path(os.getcwd())
|
|
||||||
|
|
||||||
if os.path.join("Auto-GPT-Benchmarks", "backend") in str(
|
|
||||||
HOME_DIRECTORY
|
|
||||||
): # accounting for backend calls
|
|
||||||
HOME_DIRECTORY = HOME_DIRECTORY.parent
|
|
||||||
|
|
||||||
benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark"
|
|
||||||
|
|
||||||
if AGENT_NAME and not os.path.join("Auto-GPT-Benchmarks", "agent") in str(
|
|
||||||
HOME_DIRECTORY
|
|
||||||
):
|
|
||||||
# if the agent name is defined but the run is not from the agent repo, then home is the agent repo
|
|
||||||
# used for development of both a benchmark and an agent
|
|
||||||
HOME_DIRECTORY = HOME_DIRECTORY / "agent" / AGENT_NAME
|
|
||||||
benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark"
|
|
||||||
|
|
||||||
(
|
|
||||||
CONFIG_PATH,
|
|
||||||
REGRESSION_TESTS_PATH,
|
|
||||||
REPORTS_PATH,
|
|
||||||
SUCCESS_RATE_PATH,
|
|
||||||
CHALLENGES_PATH,
|
|
||||||
) = assign_paths(benchmarks_folder_path)
|
|
||||||
else:
|
|
||||||
# otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
|
|
||||||
# used when its just a pip install
|
|
||||||
(
|
|
||||||
CONFIG_PATH,
|
|
||||||
REGRESSION_TESTS_PATH,
|
|
||||||
REPORTS_PATH,
|
|
||||||
SUCCESS_RATE_PATH,
|
|
||||||
CHALLENGES_PATH,
|
|
||||||
) = assign_paths(benchmarks_folder_path)
|
|
||||||
|
|
||||||
if not benchmarks_folder_path.exists():
|
|
||||||
benchmarks_folder_path.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
if not os.path.exists(benchmarks_folder_path / "reports"):
|
|
||||||
os.makedirs(benchmarks_folder_path / "reports")
|
|
||||||
|
|
||||||
if not os.path.exists(REGRESSION_TESTS_PATH):
|
|
||||||
with open(REGRESSION_TESTS_PATH, "w"):
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not os.path.exists(SUCCESS_RATE_PATH):
|
|
||||||
with open(SUCCESS_RATE_PATH, "w"):
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not os.path.exists(Path(REPORTS_PATH) / "report.json"):
|
|
||||||
with open(Path(REPORTS_PATH) / "report.json", "w"):
|
|
||||||
pass
|
|
||||||
|
|
||||||
return (
|
|
||||||
HOME_DIRECTORY,
|
|
||||||
CONFIG_PATH,
|
|
||||||
REGRESSION_TESTS_PATH,
|
|
||||||
REPORTS_PATH,
|
|
||||||
SUCCESS_RATE_PATH,
|
|
||||||
CHALLENGES_PATH,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# def get_git_commit_sha(directory: Path) -> Optional[str]:
|
# def get_git_commit_sha(directory: Path) -> Optional[str]:
|
||||||
# try:
|
# try:
|
||||||
# repo = git.Repo(directory)
|
# repo = git.Repo(directory)
|
||||||
|
|
Loading…
Reference in New Issue