237 lines
8.1 KiB
Python
237 lines
8.1 KiB
Python
# radio charts, logs, helper functions for tests, anything else relevant.
|
|
import glob
|
|
import math
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
from agbenchmark.challenges.data_types import DIFFICULTY_MAP, DifficultyLevel
|
|
|
|
AGENT_NAME = os.getenv("AGENT_NAME")
|
|
HOME_ENV = os.getenv("HOME_ENV")
|
|
report_location = os.getenv("REPORT_LOCATION", None)
|
|
|
|
|
|
def calculate_info_test_path(reports_path: Path) -> str:
|
|
if report_location:
|
|
reports_path = Path(os.getcwd()) / report_location
|
|
|
|
command = sys.argv
|
|
|
|
if not reports_path.exists():
|
|
reports_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
json_files = glob.glob(str(reports_path / "*.json"))
|
|
|
|
# Default naming scheme
|
|
file_count = len(json_files)
|
|
run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
|
|
|
|
test_index = None
|
|
test_arg = None
|
|
if "--test" in command:
|
|
test_index = command.index("--test")
|
|
elif "--suite" in command:
|
|
test_index = command.index("--suite")
|
|
elif "--category" in command:
|
|
test_index = command.index("--category")
|
|
elif "--maintain" in command:
|
|
test_index = command.index("--maintain")
|
|
test_arg = "maintain"
|
|
elif "--improve" in command:
|
|
test_index = command.index("--improve")
|
|
test_arg = "improve"
|
|
|
|
# # If "--test" is in command
|
|
if test_index:
|
|
if not test_arg:
|
|
test_arg = command[test_index + 1] # Argument after --
|
|
|
|
# Try to find the highest prefix number among all files, then increment it
|
|
all_prefix_numbers = []
|
|
# count related files and assign the correct file number
|
|
related_files = []
|
|
prefix_number = 0.0
|
|
|
|
# Get all files that include the string that is the argument after --test
|
|
for file in json_files:
|
|
file_name = Path(file).name.rsplit(".", 1)[0]
|
|
file_parts = file_name.split("_")
|
|
try:
|
|
if "file" in file_parts[0]:
|
|
# default files are called file{num}
|
|
number = float(file_parts[0][4:])
|
|
else:
|
|
number = float(file_parts[0])
|
|
except:
|
|
number = file_count + 1
|
|
test_name = "_".join(file_parts[1:])
|
|
all_prefix_numbers.append(math.floor(number))
|
|
if test_arg == test_name:
|
|
prefix_number = number
|
|
related_files.append(test_name)
|
|
|
|
related_file_count = len(related_files)
|
|
|
|
# Determine the prefix based on the existing files
|
|
if related_file_count == 0:
|
|
max_prefix = max(all_prefix_numbers, default=0)
|
|
run_name = f"{max_prefix + 1}_{test_arg}.json"
|
|
else:
|
|
print(f"Found {related_file_count} files with '{test_arg}' in the name")
|
|
# Take the number from before the _ and add the .{number}
|
|
|
|
prefix = ""
|
|
math.floor(prefix_number)
|
|
|
|
run_name = f"{prefix}.{related_file_count}_{test_arg}.json"
|
|
|
|
new_file_path = reports_path / run_name
|
|
return str(new_file_path)
|
|
|
|
|
|
def replace_backslash(value: Any) -> Any:
|
|
if isinstance(value, str):
|
|
return re.sub(
|
|
r"\\+", "/", value
|
|
) # replace one or more backslashes with a forward slash
|
|
elif isinstance(value, list):
|
|
return [replace_backslash(i) for i in value]
|
|
elif isinstance(value, dict):
|
|
return {k: replace_backslash(v) for k, v in value.items()}
|
|
else:
|
|
return value
|
|
|
|
|
|
def calculate_success_percentage(results: list[bool]) -> float:
|
|
# Take the last 10 results or all if less than 10
|
|
last_results = results[-10:] if len(results) > 10 else results
|
|
success_count = last_results.count(True)
|
|
total_count = len(last_results)
|
|
if total_count == 0:
|
|
return 0
|
|
success_percentage = (success_count / total_count) * 100 # as a percentage
|
|
return round(success_percentage, 2)
|
|
|
|
|
|
def get_test_path(json_file: str | Path) -> str:
|
|
if isinstance(json_file, str):
|
|
json_file = Path(json_file)
|
|
|
|
# Find the index of "agbenchmark" in the path parts
|
|
try:
|
|
agbenchmark_index = json_file.parts.index("agbenchmark")
|
|
except ValueError:
|
|
raise ValueError("Invalid challenge location.")
|
|
|
|
# Create the path from "agbenchmark" onwards
|
|
challenge_location = Path(*json_file.parts[agbenchmark_index:])
|
|
|
|
formatted_location = replace_backslash(str(challenge_location))
|
|
if isinstance(formatted_location, str):
|
|
return formatted_location
|
|
else:
|
|
return str(challenge_location)
|
|
|
|
|
|
def get_highest_success_difficulty(
|
|
data: dict, just_string: Optional[bool] = None
|
|
) -> str:
|
|
highest_difficulty = None
|
|
highest_difficulty_level = 0
|
|
|
|
for test_name, test_data in data.items():
|
|
if test_data.get("tests", None):
|
|
highest_difficulty_str = test_data["metrics"]["highest_difficulty"]
|
|
try:
|
|
highest_difficulty = DifficultyLevel[highest_difficulty_str]
|
|
highest_difficulty_level = DIFFICULTY_MAP[highest_difficulty]
|
|
except KeyError:
|
|
print(
|
|
f"Unexpected difficulty level '{highest_difficulty_str}' in test '{test_name}'"
|
|
)
|
|
continue
|
|
else:
|
|
if test_data["metrics"]["success"]:
|
|
difficulty_str = test_data["metrics"]["difficulty"]
|
|
|
|
try:
|
|
difficulty_enum = DifficultyLevel[difficulty_str.lower()]
|
|
difficulty_level = DIFFICULTY_MAP[difficulty_enum]
|
|
|
|
if difficulty_level > highest_difficulty_level:
|
|
highest_difficulty = difficulty_enum
|
|
highest_difficulty_level = difficulty_level
|
|
except KeyError:
|
|
print(
|
|
f"Unexpected difficulty level '{difficulty_str}' in test '{test_name}'"
|
|
)
|
|
continue
|
|
|
|
if highest_difficulty is not None:
|
|
highest_difficulty_str = highest_difficulty.name # convert enum to string
|
|
else:
|
|
highest_difficulty_str = ""
|
|
|
|
if highest_difficulty_level and not just_string:
|
|
return f"{highest_difficulty_str}: {highest_difficulty_level}"
|
|
elif highest_difficulty_str:
|
|
return highest_difficulty_str
|
|
return "No successful tests"
|
|
|
|
|
|
def assign_paths(folder_path: Path) -> tuple[str, str, str]:
|
|
CONFIG_PATH = str(folder_path / "config.json")
|
|
REGRESSION_TESTS_PATH = str(folder_path / "regression_tests.json")
|
|
|
|
if HOME_ENV == "ci" and AGENT_NAME:
|
|
INFO_TESTS_PATH = calculate_info_test_path(
|
|
Path(os.getcwd()) / "agbenchmark" / "reports" / AGENT_NAME
|
|
)
|
|
else:
|
|
INFO_TESTS_PATH = calculate_info_test_path(folder_path / "reports")
|
|
|
|
return CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH
|
|
|
|
|
|
def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
|
|
# the default home is where you're running from
|
|
HOME_DIRECTORY = Path(os.getcwd())
|
|
benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark"
|
|
|
|
if AGENT_NAME and not os.path.join("Auto-GPT-Benchmarks", "agent") in str(
|
|
HOME_DIRECTORY
|
|
):
|
|
# if the agent name is defined but the run is not from the agent repo, then home is the agent repo
|
|
# used for development of both a benchmark and an agent
|
|
HOME_DIRECTORY = Path(os.getcwd()) / "agent" / AGENT_NAME
|
|
benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark"
|
|
|
|
CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths(
|
|
benchmarks_folder_path
|
|
)
|
|
|
|
else:
|
|
# otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
|
|
# used when its just a pip install
|
|
CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths(
|
|
benchmarks_folder_path
|
|
)
|
|
|
|
if not benchmarks_folder_path.exists():
|
|
benchmarks_folder_path.mkdir(exist_ok=True)
|
|
|
|
return (
|
|
HOME_DIRECTORY,
|
|
CONFIG_PATH,
|
|
REGRESSION_TESTS_PATH,
|
|
INFO_TESTS_PATH,
|
|
)
|