Fixing benchmark code
parent
c73e90c4e6
commit
ef2107d9c2
|
@ -9,11 +9,10 @@ from typing import Any
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
from agbenchmark.utils.utils import find_absolute_benchmark_path
|
||||||
from fastapi import FastAPI, Query
|
from fastapi import FastAPI, Query
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from agbenchmark.utils.utils import find_absolute_benchmark_path
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
origins = ["http://localhost:3000"]
|
origins = ["http://localhost:3000"]
|
||||||
|
|
|
@ -5,10 +5,10 @@ import sys
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
import toml
|
|
||||||
|
|
||||||
import click
|
import click
|
||||||
import pytest
|
import pytest
|
||||||
|
import toml
|
||||||
from helicone.lock import HeliconeLockManager
|
from helicone.lock import HeliconeLockManager
|
||||||
|
|
||||||
from benchmark.utils.data_types import AgentBenchmarkConfig
|
from benchmark.utils.data_types import AgentBenchmarkConfig
|
||||||
|
@ -72,7 +72,9 @@ def run_benchmark(
|
||||||
try:
|
try:
|
||||||
with open(agent_benchmark_config_path, "r") as f:
|
with open(agent_benchmark_config_path, "r") as f:
|
||||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||||
agent_benchmark_config.agent_benchmark_config_path = agent_benchmark_config_path
|
agent_benchmark_config.agent_benchmark_config_path = (
|
||||||
|
agent_benchmark_config_path
|
||||||
|
)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||||
return 1
|
return 1
|
||||||
|
@ -97,8 +99,9 @@ def run_benchmark(
|
||||||
)
|
)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
assert not(agent_benchmark_config.api_mode and not agent_benchmark_config.host), \
|
assert not (
|
||||||
"Error: host needs to be added to the config if api_mode is set to True."
|
agent_benchmark_config.api_mode and not agent_benchmark_config.host
|
||||||
|
), "Error: host needs to be added to the config if api_mode is set to True."
|
||||||
|
|
||||||
print("Current configuration:")
|
print("Current configuration:")
|
||||||
for key, value in vars(agent_benchmark_config).items():
|
for key, value in vars(agent_benchmark_config).items():
|
||||||
|
@ -200,7 +203,12 @@ def cli() -> None:
|
||||||
)
|
)
|
||||||
@click.option("--nc", is_flag=True, help="Run without cutoff")
|
@click.option("--nc", is_flag=True, help="Run without cutoff")
|
||||||
@click.option("--cutoff", help="Set or override tests cutoff (seconds)")
|
@click.option("--cutoff", help="Set or override tests cutoff (seconds)")
|
||||||
@click.option("--agent-config", type=click.Path(exists=True), help="Path to the agent benchmark_config.json file,", required=True)
|
@click.option(
|
||||||
|
"--agent-config",
|
||||||
|
type=click.Path(exists=True),
|
||||||
|
help="Path to the agent benchmark_config.json file,",
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
def start(
|
def start(
|
||||||
maintain: bool,
|
maintain: bool,
|
||||||
improve: bool,
|
improve: bool,
|
||||||
|
@ -220,8 +228,9 @@ def start(
|
||||||
original_stdout = sys.stdout # Save the original standard output
|
original_stdout = sys.stdout # Save the original standard output
|
||||||
exit_code = None
|
exit_code = None
|
||||||
|
|
||||||
|
assert (
|
||||||
assert "benchmark_config.json" in agent_config, "benchmark_config.json must be provided"
|
"benchmark_config.json" in agent_config
|
||||||
|
), "benchmark_config.json must be provided"
|
||||||
|
|
||||||
if backend:
|
if backend:
|
||||||
with open("backend/backend_stdout.txt", "w") as f:
|
with open("backend/backend_stdout.txt", "w") as f:
|
||||||
|
@ -266,7 +275,9 @@ def start(
|
||||||
def version():
|
def version():
|
||||||
"""Print the version of the benchmark tool."""
|
"""Print the version of the benchmark tool."""
|
||||||
current_directory = Path(__file__).resolve().parent
|
current_directory = Path(__file__).resolve().parent
|
||||||
version = toml.load(current_directory / ".." / "pyproject.toml")["tool"]["poetry"]["version"]
|
version = toml.load(current_directory / ".." / "pyproject.toml")["tool"]["poetry"][
|
||||||
|
"version"
|
||||||
|
]
|
||||||
print(f"Benchmark Tool Version {version}")
|
print(f"Benchmark Tool Version {version}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,6 @@ async def run_api_agent(
|
||||||
|
|
||||||
artifacts = await api_instance.list_agent_task_artifacts(task_id=task_id)
|
artifacts = await api_instance.list_agent_task_artifacts(task_id=task_id)
|
||||||
for artifact in artifacts:
|
for artifact in artifacts:
|
||||||
|
|
||||||
if artifact.relative_path:
|
if artifact.relative_path:
|
||||||
folder_path = os.path.join(config["workspace"], artifact.relative_path)
|
folder_path = os.path.join(config["workspace"], artifact.relative_path)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -12,7 +12,6 @@ from typing import Any, List
|
||||||
import psutil
|
import psutil
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
helicone_graphql_logs = os.getenv("HELICONE_GRAPHQL_LOGS")
|
helicone_graphql_logs = os.getenv("HELICONE_GRAPHQL_LOGS")
|
||||||
|
|
|
@ -1,15 +1,22 @@
|
||||||
import unittest
|
import unittest
|
||||||
from url_shortener import shorten_url, retrieve_url
|
|
||||||
|
from url_shortener import retrieve_url, shorten_url
|
||||||
|
|
||||||
|
|
||||||
class TestURLShortener(unittest.TestCase):
|
class TestURLShortener(unittest.TestCase):
|
||||||
def test_url_retrieval(self):
|
def test_url_retrieval(self):
|
||||||
# Shorten the URL to get its shortened form
|
# Shorten the URL to get its shortened form
|
||||||
shortened_url = shorten_url('https://www.example.com')
|
shortened_url = shorten_url("https://www.example.com")
|
||||||
|
|
||||||
# Retrieve the original URL using the shortened URL directly
|
# Retrieve the original URL using the shortened URL directly
|
||||||
retrieved_url = retrieve_url(shortened_url)
|
retrieved_url = retrieve_url(shortened_url)
|
||||||
|
|
||||||
self.assertEqual(retrieved_url, 'https://www.example.com', "Retrieved URL does not match the original!")
|
self.assertEqual(
|
||||||
|
retrieved_url,
|
||||||
|
"https://www.example.com",
|
||||||
|
"Retrieved URL does not match the original!",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -3,6 +3,7 @@ import base64
|
||||||
|
|
||||||
URL_MAPPING = {}
|
URL_MAPPING = {}
|
||||||
|
|
||||||
|
|
||||||
def shorten_url(url):
|
def shorten_url(url):
|
||||||
# Convert the URL to base64
|
# Convert the URL to base64
|
||||||
encoded_url = base64.b64encode(url.encode()).decode()
|
encoded_url = base64.b64encode(url.encode()).decode()
|
||||||
|
@ -12,13 +13,15 @@ def shorten_url(url):
|
||||||
URL_MAPPING[short_url] = url
|
URL_MAPPING[short_url] = url
|
||||||
return short_url
|
return short_url
|
||||||
|
|
||||||
|
|
||||||
def retrieve_url(short_url):
|
def retrieve_url(short_url):
|
||||||
return URL_MAPPING.get(short_url, "URL not found")
|
return URL_MAPPING.get(short_url, "URL not found")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="URL Shortener")
|
parser = argparse.ArgumentParser(description="URL Shortener")
|
||||||
parser.add_argument('-s', '--shorten', type=str, help="URL to be shortened")
|
parser.add_argument("-s", "--shorten", type=str, help="URL to be shortened")
|
||||||
parser.add_argument('-r', '--retrieve', type=str, help="Short URL to be retrieved")
|
parser.add_argument("-r", "--retrieve", type=str, help="Short URL to be retrieved")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -32,5 +35,6 @@ def main():
|
||||||
else:
|
else:
|
||||||
print("No valid arguments provided.")
|
print("No valid arguments provided.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -1,38 +1,45 @@
|
||||||
import pprint
|
import pprint
|
||||||
|
|
||||||
|
|
||||||
def column(matrix, i):
|
def column(matrix, i):
|
||||||
return [row[i] for row in matrix]
|
return [row[i] for row in matrix]
|
||||||
|
|
||||||
|
|
||||||
def check(list):
|
def check(list):
|
||||||
if len(set(list)) <= 1:
|
if len(set(list)) <= 1:
|
||||||
if list[0] != 0:
|
if list[0] != 0:
|
||||||
return list[0]
|
return list[0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def checkDiagLeft(board):
|
def checkDiagLeft(board):
|
||||||
if (board[0][0] == board[1][1] and board[1][1] == board[2][2]):
|
if board[0][0] == board[1][1] and board[1][1] == board[2][2]:
|
||||||
if board[0][0] != 0:
|
if board[0][0] != 0:
|
||||||
return board[0][0]
|
return board[0][0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def checkDiagRight(board):
|
def checkDiagRight(board):
|
||||||
if (board[2][0] == board[1][1] and board[1][1] == board[0][2]):
|
if board[2][0] == board[1][1] and board[1][1] == board[0][2]:
|
||||||
if board[2][0] != 0:
|
if board[2][0] != 0:
|
||||||
return board[2][0]
|
return board[2][0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def placeItem(row, column, board, current_player):
|
def placeItem(row, column, board, current_player):
|
||||||
if board[row][column] != 0:
|
if board[row][column] != 0:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
board[row][column] = current_player
|
board[row][column] = current_player
|
||||||
|
|
||||||
|
|
||||||
def swapPlayers(player):
|
def swapPlayers(player):
|
||||||
if (player == 2):
|
if player == 2:
|
||||||
return 1
|
return 1
|
||||||
else:
|
else:
|
||||||
return 2
|
return 2
|
||||||
|
|
||||||
|
|
||||||
def winner(board):
|
def winner(board):
|
||||||
for rowIndex in board:
|
for rowIndex in board:
|
||||||
if check(rowIndex) is not None:
|
if check(rowIndex) is not None:
|
||||||
|
@ -46,23 +53,35 @@ def winner(board):
|
||||||
return checkDiagRight(board)
|
return checkDiagRight(board)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def getLocation():
|
def getLocation():
|
||||||
location = input("Choose where to play. Enter two numbers separated by a comma, for example: 1,1 ")
|
location = input(
|
||||||
|
"Choose where to play. Enter two numbers separated by a comma, for example: 1,1 "
|
||||||
|
)
|
||||||
print(f"\nYou picked {location}")
|
print(f"\nYou picked {location}")
|
||||||
coordinates = [int(x) for x in location.split(',')]
|
coordinates = [int(x) for x in location.split(",")]
|
||||||
while (len(coordinates) != 2 or coordinates[0] < 0 or coordinates[0] > 2 or coordinates[1] < 0 or coordinates[1] > 2):
|
while (
|
||||||
|
len(coordinates) != 2
|
||||||
|
or coordinates[0] < 0
|
||||||
|
or coordinates[0] > 2
|
||||||
|
or coordinates[1] < 0
|
||||||
|
or coordinates[1] > 2
|
||||||
|
):
|
||||||
print("You inputted a location in an invalid format")
|
print("You inputted a location in an invalid format")
|
||||||
location = input("Choose where to play. Enter two numbers separated by a comma, for example: 1,1 ")
|
location = input(
|
||||||
coordinates = [int(x) for x in location.split(',')]
|
"Choose where to play. Enter two numbers separated by a comma, for example: 1,1 "
|
||||||
|
)
|
||||||
|
coordinates = [int(x) for x in location.split(",")]
|
||||||
return coordinates
|
return coordinates
|
||||||
|
|
||||||
|
|
||||||
def gamePlay():
|
def gamePlay():
|
||||||
num_moves = 0
|
num_moves = 0
|
||||||
pp = pprint.PrettyPrinter(width=20)
|
pp = pprint.PrettyPrinter(width=20)
|
||||||
current_player = 1
|
current_player = 1
|
||||||
board = [[0 for x in range(3)] for x in range(3)]
|
board = [[0 for x in range(3)] for x in range(3)]
|
||||||
|
|
||||||
while (num_moves < 9 and winner(board) == 0):
|
while num_moves < 9 and winner(board) == 0:
|
||||||
print("This is the current board: ")
|
print("This is the current board: ")
|
||||||
pp.pprint(board)
|
pp.pprint(board)
|
||||||
coordinates = getLocation()
|
coordinates = getLocation()
|
||||||
|
@ -75,5 +94,6 @@ def gamePlay():
|
||||||
if winner(board) == 0:
|
if winner(board) == 0:
|
||||||
print("Draw")
|
print("Draw")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
if __name__ == "__main__":
|
||||||
gamePlay()
|
gamePlay()
|
||||||
|
|
|
@ -1,18 +1,20 @@
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
def run_game_with_inputs(inputs):
|
def run_game_with_inputs(inputs):
|
||||||
# Start the game process
|
# Start the game process
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
['python', 'tic_tac_toe.py'],
|
["python", "tic_tac_toe.py"],
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
text=True
|
text=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Send the input moves one by one
|
# Send the input moves one by one
|
||||||
output, errors = process.communicate('\n'.join(inputs))
|
output, errors = process.communicate("\n".join(inputs))
|
||||||
|
|
||||||
# Print the inputs and outputs
|
# Print the inputs and outputs
|
||||||
print("Inputs:\n", "\n".join(inputs))
|
print("Inputs:\n", "\n".join(inputs))
|
||||||
|
@ -22,14 +24,18 @@ def run_game_with_inputs(inputs):
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("inputs, expected_output", [
|
@pytest.mark.parametrize(
|
||||||
(["0,0", "1,0", "0,1", "1,1", "0,2"], "Player 1 won!"),
|
"inputs, expected_output",
|
||||||
(["1,0", "0,0", "1,1", "0,1", "2,0", "0,2"], "Player 2 won!"),
|
[
|
||||||
(["0,0", "0,1", "0,2", "1,1", "1,0", "1,2", "2,1", "2,0", "2,2"], "Draw")
|
(["0,0", "1,0", "0,1", "1,1", "0,2"], "Player 1 won!"),
|
||||||
])
|
(["1,0", "0,0", "1,1", "0,1", "2,0", "0,2"], "Player 2 won!"),
|
||||||
|
(["0,0", "0,1", "0,2", "1,1", "1,0", "1,2", "2,1", "2,0", "2,2"], "Draw"),
|
||||||
|
],
|
||||||
|
)
|
||||||
def test_game(inputs, expected_output):
|
def test_game(inputs, expected_output):
|
||||||
output = run_game_with_inputs(inputs)
|
output = run_game_with_inputs(inputs)
|
||||||
assert expected_output in output
|
assert expected_output in output
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
if __name__ == "__main__":
|
||||||
pytest.main()
|
pytest.main()
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from abstract_class import ShipPlacement, Turn
|
from abstract_class import ShipPlacement, Turn
|
||||||
|
|
||||||
from battleship import Battleship
|
from battleship import Battleship
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def battleship_game():
|
def battleship_game():
|
||||||
return Battleship()
|
return Battleship()
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import ValidationError
|
|
||||||
|
|
||||||
from abstract_class import ShipPlacement, Turn
|
from abstract_class import ShipPlacement, Turn
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
|
|
||||||
def test_ship_placement_out_of_bounds(battleship_game):
|
def test_ship_placement_out_of_bounds(battleship_game):
|
||||||
|
@ -51,9 +50,7 @@ def test_cant_hit_before_ships_placed(battleship_game):
|
||||||
|
|
||||||
|
|
||||||
def test_cant_place_ship_after_all_ships_placed(battleship_game, initialized_game_id):
|
def test_cant_place_ship_after_all_ships_placed(battleship_game, initialized_game_id):
|
||||||
game = battleship_game.get_game(
|
game = battleship_game.get_game(initialized_game_id)
|
||||||
initialized_game_id
|
|
||||||
)
|
|
||||||
additional_ship = ShipPlacement(
|
additional_ship = ShipPlacement(
|
||||||
ship_type="carrier", start={"row": 2, "column": "E"}, direction="horizontal"
|
ship_type="carrier", start={"row": 2, "column": "E"}, direction="horizontal"
|
||||||
)
|
)
|
||||||
|
|
|
@ -86,9 +86,7 @@ class Battleship(AbstractBattleship):
|
||||||
game.turns.append(turn)
|
game.turns.append(turn)
|
||||||
|
|
||||||
if hit_ship == "hit":
|
if hit_ship == "hit":
|
||||||
return TurnResponse(
|
return TurnResponse(result="miss", ship_type=None)
|
||||||
result="miss", ship_type=None
|
|
||||||
)
|
|
||||||
|
|
||||||
if hit_ship:
|
if hit_ship:
|
||||||
ship_placement = next(sp for sp in game.ships if sp.ship_type == hit_ship)
|
ship_placement = next(sp for sp in game.ships if sp.ship_type == hit_ship)
|
||||||
|
@ -133,9 +131,7 @@ class Battleship(AbstractBattleship):
|
||||||
)
|
)
|
||||||
|
|
||||||
if hits == total_ships_length:
|
if hits == total_ships_length:
|
||||||
return GameStatus(
|
return GameStatus(is_game_over=True, winner="player")
|
||||||
is_game_over=True, winner="player"
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return GameStatus(is_game_over=False, winner=None)
|
return GameStatus(is_game_over=False, winner=None)
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from abstract_class import ShipPlacement, Turn
|
from abstract_class import ShipPlacement, Turn
|
||||||
|
|
||||||
from battleship import Battleship
|
from battleship import Battleship
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def battleship_game():
|
def battleship_game():
|
||||||
return Battleship()
|
return Battleship()
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import ValidationError
|
|
||||||
|
|
||||||
from abstract_class import ShipPlacement, Turn
|
from abstract_class import ShipPlacement, Turn
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
|
|
||||||
def test_ship_placement_out_of_bounds(battleship_game):
|
def test_ship_placement_out_of_bounds(battleship_game):
|
||||||
|
@ -51,9 +50,7 @@ def test_cant_hit_before_ships_placed(battleship_game):
|
||||||
|
|
||||||
|
|
||||||
def test_cant_place_ship_after_all_ships_placed(battleship_game, initialized_game_id):
|
def test_cant_place_ship_after_all_ships_placed(battleship_game, initialized_game_id):
|
||||||
game = battleship_game.get_game(
|
game = battleship_game.get_game(initialized_game_id)
|
||||||
initialized_game_id
|
|
||||||
)
|
|
||||||
additional_ship = ShipPlacement(
|
additional_ship = ShipPlacement(
|
||||||
ship_type="carrier", start={"row": 2, "column": "E"}, direction="horizontal"
|
ship_type="carrier", start={"row": 2, "column": "E"}, direction="horizontal"
|
||||||
)
|
)
|
||||||
|
|
|
@ -16,7 +16,7 @@ from benchmark.reports.reports import (
|
||||||
generate_single_call_report,
|
generate_single_call_report,
|
||||||
session_finish,
|
session_finish,
|
||||||
)
|
)
|
||||||
from benchmark.utils.data_types import SuiteConfig, AgentBenchmarkConfig
|
from benchmark.utils.data_types import AgentBenchmarkConfig, SuiteConfig
|
||||||
|
|
||||||
GLOBAL_TIMEOUT = (
|
GLOBAL_TIMEOUT = (
|
||||||
1500 # The tests will stop after 25 minutes so we can send the reports.
|
1500 # The tests will stop after 25 minutes so we can send the reports.
|
||||||
|
@ -31,16 +31,15 @@ def load_config_from_request(request: Any) -> AgentBenchmarkConfig:
|
||||||
try:
|
try:
|
||||||
with open(agent_benchmark_config_path, "r") as f:
|
with open(agent_benchmark_config_path, "r") as f:
|
||||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||||
agent_benchmark_config.agent_benchmark_config_path = agent_benchmark_config_path
|
agent_benchmark_config.agent_benchmark_config_path = (
|
||||||
|
agent_benchmark_config_path
|
||||||
|
)
|
||||||
return agent_benchmark_config
|
return agent_benchmark_config
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_workspace(workspace: str) -> str:
|
def resolve_workspace(workspace: str) -> str:
|
||||||
if workspace.startswith("${") and workspace.endswith("}"):
|
if workspace.startswith("${") and workspace.endswith("}"):
|
||||||
# Extract the string inside ${...}
|
# Extract the string inside ${...}
|
||||||
|
@ -65,7 +64,9 @@ def config(request: Any) -> Any:
|
||||||
try:
|
try:
|
||||||
with open(agent_benchmark_config_path, "r") as f:
|
with open(agent_benchmark_config_path, "r") as f:
|
||||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||||
agent_benchmark_config.agent_benchmark_config_path = agent_benchmark_config_path
|
agent_benchmark_config.agent_benchmark_config_path = (
|
||||||
|
agent_benchmark_config_path
|
||||||
|
)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||||
raise
|
raise
|
||||||
|
@ -73,8 +74,12 @@ def config(request: Any) -> Any:
|
||||||
if isinstance(config["workspace"], str):
|
if isinstance(config["workspace"], str):
|
||||||
config["workspace"] = resolve_workspace(agent_benchmark_config.workspace)
|
config["workspace"] = resolve_workspace(agent_benchmark_config.workspace)
|
||||||
else: # it's a input output dict
|
else: # it's a input output dict
|
||||||
config["workspace"]["input"] = resolve_workspace(agent_benchmark_config.workspace / "input")
|
config["workspace"]["input"] = resolve_workspace(
|
||||||
config["workspace"]["output"] = resolve_workspace(agent_benchmark_config.workspace / "output")
|
agent_benchmark_config.workspace / "input"
|
||||||
|
)
|
||||||
|
config["workspace"]["output"] = resolve_workspace(
|
||||||
|
agent_benchmark_config.workspace / "output"
|
||||||
|
)
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
@ -238,9 +243,11 @@ def scores(request: Any) -> None:
|
||||||
# this is adding the dependency marker and category markers automatically from the json
|
# this is adding the dependency marker and category markers automatically from the json
|
||||||
def pytest_collection_modifyitems(items: Any, config: Any) -> None:
|
def pytest_collection_modifyitems(items: Any, config: Any) -> None:
|
||||||
try:
|
try:
|
||||||
with open(config.getoption('--agent_config_path'), "r") as f:
|
with open(config.getoption("--agent_config_path"), "r") as f:
|
||||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||||
agent_benchmark_config.agent_benchmark_config_path = config.getoption('--agent_config_path')
|
agent_benchmark_config.agent_benchmark_config_path = config.getoption(
|
||||||
|
"--agent_config_path"
|
||||||
|
)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||||
raise
|
raise
|
||||||
|
|
|
@ -11,7 +11,7 @@ from typing import Any, Callable, Dict, Optional
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from benchmark.utils.challenge import Challenge
|
from benchmark.utils.challenge import Challenge
|
||||||
from benchmark.utils.data_types import ChallengeData, SuiteConfig, AgentBenchmarkConfig
|
from benchmark.utils.data_types import AgentBenchmarkConfig, ChallengeData, SuiteConfig
|
||||||
from benchmark.utils.utils import get_test_path
|
from benchmark.utils.utils import get_test_path
|
||||||
|
|
||||||
DATA_CATEGORY = {}
|
DATA_CATEGORY = {}
|
||||||
|
@ -222,7 +222,7 @@ def create_challenge(
|
||||||
def generate_tests() -> None: # sourcery skip: invert-any-all
|
def generate_tests() -> None: # sourcery skip: invert-any-all
|
||||||
print("Generating tests...")
|
print("Generating tests...")
|
||||||
|
|
||||||
challenges_path = os.path.join(os.path.dirname(__file__), 'challenges')
|
challenges_path = os.path.join(os.path.dirname(__file__), "challenges")
|
||||||
|
|
||||||
json_files = deque(
|
json_files = deque(
|
||||||
glob.glob(
|
glob.glob(
|
||||||
|
@ -239,14 +239,16 @@ def generate_tests() -> None: # sourcery skip: invert-any-all
|
||||||
try:
|
try:
|
||||||
with open(agent_benchmark_config_path, "r") as f:
|
with open(agent_benchmark_config_path, "r") as f:
|
||||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||||
agent_benchmark_config.agent_benchmark_config_path = agent_benchmark_config_path
|
agent_benchmark_config.agent_benchmark_config_path = (
|
||||||
|
agent_benchmark_config_path
|
||||||
|
)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
regression_reports_path = agent_benchmark_config.get_regression_reports_path()
|
regression_reports_path = agent_benchmark_config.get_regression_reports_path()
|
||||||
if regression_reports_path and os.path.exists(regression_reports_path):
|
if regression_reports_path and os.path.exists(regression_reports_path):
|
||||||
with open(regression_reports_path, 'r') as f:
|
with open(regression_reports_path, "r") as f:
|
||||||
regression_tests = json.load(f)
|
regression_tests = json.load(f)
|
||||||
else:
|
else:
|
||||||
regression_tests = {}
|
regression_tests = {}
|
||||||
|
|
|
@ -6,12 +6,13 @@ from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from benchmark.__main__ import BENCHMARK_START_TIME
|
||||||
from benchmark.reports.processing.graphs import save_single_radar_chart
|
from benchmark.reports.processing.graphs import save_single_radar_chart
|
||||||
from benchmark.reports.processing.process_report import get_agent_category
|
from benchmark.reports.processing.process_report import get_agent_category
|
||||||
from benchmark.reports.processing.report_types import Report
|
from benchmark.reports.processing.report_types import Report
|
||||||
from benchmark.utils.utils import get_highest_success_difficulty
|
|
||||||
from benchmark.utils.data_types import AgentBenchmarkConfig
|
from benchmark.utils.data_types import AgentBenchmarkConfig
|
||||||
from benchmark.__main__ import BENCHMARK_START_TIME
|
from benchmark.utils.utils import get_highest_success_difficulty
|
||||||
|
|
||||||
|
|
||||||
class ReportManager:
|
class ReportManager:
|
||||||
"""Abstracts interaction with the regression tests file"""
|
"""Abstracts interaction with the regression tests file"""
|
||||||
|
@ -24,7 +25,7 @@ class ReportManager:
|
||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
if not os.path.exists(self.filename):
|
if not os.path.exists(self.filename):
|
||||||
os.makedirs(os.path.dirname(self.filename), exist_ok=True)
|
os.makedirs(os.path.dirname(self.filename), exist_ok=True)
|
||||||
with open(self.filename, 'w') as f:
|
with open(self.filename, "w") as f:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -62,13 +63,12 @@ class ReportManager:
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
def end_info_report(self, config: AgentBenchmarkConfig) -> None:
|
def end_info_report(self, config: AgentBenchmarkConfig) -> None:
|
||||||
|
|
||||||
command = " ".join(sys.argv)
|
command = " ".join(sys.argv)
|
||||||
|
|
||||||
self.tests = {
|
self.tests = {
|
||||||
"command": command.split(os.sep)[-1],
|
"command": command.split(os.sep)[-1],
|
||||||
"benchmark_git_commit_sha": '---',
|
"benchmark_git_commit_sha": "---",
|
||||||
"agent_git_commit_sha": '---',
|
"agent_git_commit_sha": "---",
|
||||||
"completion_time": datetime.now(timezone.utc).strftime(
|
"completion_time": datetime.now(timezone.utc).strftime(
|
||||||
"%Y-%m-%dT%H:%M:%S+00:00"
|
"%Y-%m-%dT%H:%M:%S+00:00"
|
||||||
),
|
),
|
||||||
|
@ -79,7 +79,9 @@ class ReportManager:
|
||||||
"total_cost": self.get_total_costs(),
|
"total_cost": self.get_total_costs(),
|
||||||
},
|
},
|
||||||
"tests": self.tests,
|
"tests": self.tests,
|
||||||
"config": {k: v for k, v in json.loads(config.json()).items() if v is not None},
|
"config": {
|
||||||
|
k: v for k, v in json.loads(config.json()).items() if v is not None
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
converted_data = Report.parse_obj(self.tests)
|
converted_data = Report.parse_obj(self.tests)
|
||||||
|
@ -88,7 +90,6 @@ class ReportManager:
|
||||||
|
|
||||||
save_single_radar_chart(
|
save_single_radar_chart(
|
||||||
agent_categories,
|
agent_categories,
|
||||||
|
|
||||||
config.get_reports_path() / "radar_chart.png",
|
config.get_reports_path() / "radar_chart.png",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,13 @@ import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel, SuiteConfig, AgentBenchmarkConfig
|
from benchmark.reports.ReportManager import ReportManager
|
||||||
|
from benchmark.utils.data_types import (
|
||||||
|
DIFFICULTY_MAP,
|
||||||
|
AgentBenchmarkConfig,
|
||||||
|
DifficultyLevel,
|
||||||
|
SuiteConfig,
|
||||||
|
)
|
||||||
from benchmark.utils.get_data_from_helicone import get_data_from_helicone
|
from benchmark.utils.get_data_from_helicone import get_data_from_helicone
|
||||||
from benchmark.utils.utils import (
|
from benchmark.utils.utils import (
|
||||||
calculate_success_percentage,
|
calculate_success_percentage,
|
||||||
|
@ -12,8 +18,6 @@ from benchmark.utils.utils import (
|
||||||
get_test_path,
|
get_test_path,
|
||||||
replace_backslash,
|
replace_backslash,
|
||||||
)
|
)
|
||||||
from benchmark.reports.ReportManager import ReportManager
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_agent_benchmark_config() -> AgentBenchmarkConfig:
|
def get_agent_benchmark_config() -> AgentBenchmarkConfig:
|
||||||
|
@ -24,23 +28,32 @@ def get_agent_benchmark_config() -> AgentBenchmarkConfig:
|
||||||
try:
|
try:
|
||||||
with open(agent_benchmark_config_path, "r") as f:
|
with open(agent_benchmark_config_path, "r") as f:
|
||||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||||
agent_benchmark_config.agent_benchmark_config_path = agent_benchmark_config_path
|
agent_benchmark_config.agent_benchmark_config_path = (
|
||||||
|
agent_benchmark_config_path
|
||||||
|
)
|
||||||
return agent_benchmark_config
|
return agent_benchmark_config
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def get_report_managers() -> tuple[ReportManager, ReportManager, ReportManager]:
|
def get_report_managers() -> tuple[ReportManager, ReportManager, ReportManager]:
|
||||||
agent_benchmark_config = get_agent_benchmark_config()
|
agent_benchmark_config = get_agent_benchmark_config()
|
||||||
# tests that consistently pass are considered regression tests
|
# tests that consistently pass are considered regression tests
|
||||||
REGRESSION_MANAGER = ReportManager(agent_benchmark_config.get_regression_reports_path())
|
REGRESSION_MANAGER = ReportManager(
|
||||||
|
agent_benchmark_config.get_regression_reports_path()
|
||||||
|
)
|
||||||
|
|
||||||
# print(f"Using {REPORTS_PATH} for reports")
|
# print(f"Using {REPORTS_PATH} for reports")
|
||||||
# user facing reporting information
|
# user facing reporting information
|
||||||
INFO_MANAGER = ReportManager(str(agent_benchmark_config.get_reports_path() / "report.json"))
|
INFO_MANAGER = ReportManager(
|
||||||
|
str(agent_benchmark_config.get_reports_path() / "report.json")
|
||||||
|
)
|
||||||
|
|
||||||
# internal db step in replacement track pass/fail rate
|
# internal db step in replacement track pass/fail rate
|
||||||
INTERNAL_INFO_MANAGER = ReportManager(agent_benchmark_config.get_success_rate_path())
|
INTERNAL_INFO_MANAGER = ReportManager(
|
||||||
|
agent_benchmark_config.get_success_rate_path()
|
||||||
|
)
|
||||||
|
|
||||||
return REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER
|
return REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER
|
||||||
|
|
||||||
|
@ -132,16 +145,12 @@ def get_previous_test_results(
|
||||||
agent_tests: dict[str, list[bool]] = {}
|
agent_tests: dict[str, list[bool]] = {}
|
||||||
mock = "--mock" in sys.argv # Check if --mock is in sys.argv
|
mock = "--mock" in sys.argv # Check if --mock is in sys.argv
|
||||||
|
|
||||||
prev_test_results = INTERNAL_INFO_MANAGER.tests.get(
|
prev_test_results = INTERNAL_INFO_MANAGER.tests.get(test_name, [])
|
||||||
test_name, []
|
|
||||||
)
|
|
||||||
|
|
||||||
if not mock:
|
if not mock:
|
||||||
# only add if it's an actual test
|
# only add if it's an actual test
|
||||||
prev_test_results.append(info_details["metrics"]["success"])
|
prev_test_results.append(info_details["metrics"]["success"])
|
||||||
INTERNAL_INFO_MANAGER.add_test(
|
INTERNAL_INFO_MANAGER.add_test(test_name, prev_test_results)
|
||||||
test_name, prev_test_results
|
|
||||||
)
|
|
||||||
|
|
||||||
# can calculate success rate regardless of mock
|
# can calculate success rate regardless of mock
|
||||||
info_details["metrics"]["success_%"] = calculate_success_percentage(
|
info_details["metrics"]["success_%"] = calculate_success_percentage(
|
||||||
|
@ -199,8 +208,8 @@ def generate_single_call_report(
|
||||||
},
|
},
|
||||||
"answers": answers,
|
"answers": answers,
|
||||||
}
|
}
|
||||||
if 'metadata' in challenge_data:
|
if "metadata" in challenge_data:
|
||||||
info_details['metadata'] = challenge_data['metadata']
|
info_details["metadata"] = challenge_data["metadata"]
|
||||||
|
|
||||||
mock = "--mock" in sys.argv # Check if --mock is in sys.argv
|
mock = "--mock" in sys.argv # Check if --mock is in sys.argv
|
||||||
|
|
||||||
|
@ -298,9 +307,7 @@ def generate_separate_suite_reports(suite_reports: dict) -> None:
|
||||||
}
|
}
|
||||||
|
|
||||||
for name in suite_file_datum:
|
for name in suite_file_datum:
|
||||||
test_data = INFO_MANAGER.tests[
|
test_data = INFO_MANAGER.tests[name] # get the individual test reports
|
||||||
name
|
|
||||||
] # get the individual test reports
|
|
||||||
data[name] = test_data # this is for calculating highest difficulty
|
data[name] = test_data # this is for calculating highest difficulty
|
||||||
INFO_MANAGER.remove_test(name)
|
INFO_MANAGER.remove_test(name)
|
||||||
|
|
||||||
|
@ -330,7 +337,6 @@ def session_finish(suite_reports: dict) -> None:
|
||||||
|
|
||||||
agent_benchmark_config = get_agent_benchmark_config()
|
agent_benchmark_config = get_agent_benchmark_config()
|
||||||
|
|
||||||
|
|
||||||
INTERNAL_INFO_MANAGER.save()
|
INTERNAL_INFO_MANAGER.save()
|
||||||
INFO_MANAGER.end_info_report(agent_benchmark_config)
|
INFO_MANAGER.end_info_report(agent_benchmark_config)
|
||||||
REGRESSION_MANAGER.save()
|
REGRESSION_MANAGER.save()
|
||||||
|
|
|
@ -9,14 +9,13 @@ from typing import Any, Optional
|
||||||
import click
|
import click
|
||||||
import pytest
|
import pytest
|
||||||
from helicone.lock import HeliconeLockManager
|
from helicone.lock import HeliconeLockManager
|
||||||
import sys
|
|
||||||
sys.path.append('/Users/swifty/dev/Auto-GPT/benchmark')
|
sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark")
|
||||||
|
|
||||||
from agbenchmark.reports.ReportManager import ReportManager
|
from agbenchmark.reports.ReportManager import ReportManager
|
||||||
from agbenchmark.utils.utils import (
|
from agbenchmark.utils.utils import ( # get_git_commit_sha,
|
||||||
AGENT_NAME,
|
AGENT_NAME,
|
||||||
calculate_dynamic_paths,
|
calculate_dynamic_paths,
|
||||||
# get_git_commit_sha,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
CURRENT_DIRECTORY = Path(__file__).resolve().parent
|
CURRENT_DIRECTORY = Path(__file__).resolve().parent
|
||||||
|
@ -34,8 +33,8 @@ if os.environ.get("HELICONE_API_KEY"):
|
||||||
SUCCESS_RATE_PATH,
|
SUCCESS_RATE_PATH,
|
||||||
CHALLENGES_PATH,
|
CHALLENGES_PATH,
|
||||||
) = calculate_dynamic_paths()
|
) = calculate_dynamic_paths()
|
||||||
BENCHMARK_GIT_COMMIT_SHA = "---" # get_git_commit_sha(HOME_DIRECTORY / ".." / "..")
|
BENCHMARK_GIT_COMMIT_SHA = "---" # get_git_commit_sha(HOME_DIRECTORY / ".." / "..")
|
||||||
AGENT_GIT_COMMIT_SHA = "---" # get_git_commit_sha(HOME_DIRECTORY)
|
AGENT_GIT_COMMIT_SHA = "---" # get_git_commit_sha(HOME_DIRECTORY)
|
||||||
# open a file in the challenges/optional_categories
|
# open a file in the challenges/optional_categories
|
||||||
with open(
|
with open(
|
||||||
Path(__file__).resolve().parent / "challenges" / "optional_categories.json"
|
Path(__file__).resolve().parent / "challenges" / "optional_categories.json"
|
||||||
|
@ -334,13 +333,16 @@ def get_regression_data() -> Any:
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
def version():
|
def version():
|
||||||
"""Print the version of the benchmark tool."""
|
"""Print the version of the benchmark tool."""
|
||||||
import toml
|
import toml
|
||||||
version = toml.load(CURRENT_DIRECTORY / ".." / "pyproject.toml")["tool"]["poetry"]["version"]
|
|
||||||
print(f"Benchmark Tool Version {version}")
|
|
||||||
|
|
||||||
|
version = toml.load(CURRENT_DIRECTORY / ".." / "pyproject.toml")["tool"]["poetry"][
|
||||||
|
"version"
|
||||||
|
]
|
||||||
|
print(f"Benchmark Tool Version {version}")
|
||||||
|
|
||||||
|
|
||||||
# def run_from_backend(
|
# def run_from_backend(
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
import glob
|
import glob
|
||||||
import json
|
import json
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
import sys
|
|
||||||
from pydantic import BaseModel, root_validator, validator
|
from pydantic import BaseModel, root_validator, validator
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
class DifficultyLevel(Enum):
|
class DifficultyLevel(Enum):
|
||||||
interface = "interface"
|
interface = "interface"
|
||||||
basic = "basic"
|
basic = "basic"
|
||||||
|
@ -29,6 +32,7 @@ DIFFICULTY_MAP = {
|
||||||
|
|
||||||
STRING_DIFFICULTY_MAP = {e.value: DIFFICULTY_MAP[e] for e in DifficultyLevel}
|
STRING_DIFFICULTY_MAP = {e.value: DIFFICULTY_MAP[e] for e in DifficultyLevel}
|
||||||
|
|
||||||
|
|
||||||
def calculate_info_test_path(base_path: Path) -> Path:
|
def calculate_info_test_path(base_path: Path) -> Path:
|
||||||
"""
|
"""
|
||||||
Calculates the path to the directory where the test report will be saved.
|
Calculates the path to the directory where the test report will be saved.
|
||||||
|
@ -69,6 +73,7 @@ def calculate_info_test_path(base_path: Path) -> Path:
|
||||||
|
|
||||||
return report_path
|
return report_path
|
||||||
|
|
||||||
|
|
||||||
class AgentBenchmarkConfig(BaseModel):
|
class AgentBenchmarkConfig(BaseModel):
|
||||||
"""
|
"""
|
||||||
This class represents the configuration for the Agent Benchmark.
|
This class represents the configuration for the Agent Benchmark.
|
||||||
|
@ -79,6 +84,7 @@ class AgentBenchmarkConfig(BaseModel):
|
||||||
- api_mode: A boolean indicating whether the benchmark is run in API mode.
|
- api_mode: A boolean indicating whether the benchmark is run in API mode.
|
||||||
- host: The host where the benchmark is run.
|
- host: The host where the benchmark is run.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
agent_benchmark_config_path: Path | None = None
|
agent_benchmark_config_path: Path | None = None
|
||||||
entry_path: Path
|
entry_path: Path
|
||||||
workspace: Path
|
workspace: Path
|
||||||
|
@ -88,19 +94,24 @@ class AgentBenchmarkConfig(BaseModel):
|
||||||
|
|
||||||
def get_reports_location(self) -> Path:
|
def get_reports_location(self) -> Path:
|
||||||
if not self.reports_folder:
|
if not self.reports_folder:
|
||||||
self.reports_folder = (self.agent_benchmark_config_path / self.entry_path.parent / ".." / "reports").resolve()
|
self.reports_folder = (
|
||||||
|
self.agent_benchmark_config_path
|
||||||
|
/ self.entry_path.parent
|
||||||
|
/ ".."
|
||||||
|
/ "reports"
|
||||||
|
).resolve()
|
||||||
return self.reports_folder
|
return self.reports_folder
|
||||||
|
|
||||||
def get_reports_path(self) -> Path:
|
def get_reports_path(self) -> Path:
|
||||||
return calculate_info_test_path(self.get_reports_location())
|
return calculate_info_test_path(self.get_reports_location())
|
||||||
|
|
||||||
def get_regression_reports_path(self) -> Path:
|
def get_regression_reports_path(self) -> Path:
|
||||||
|
|
||||||
return self.get_reports_location() / "regression_tests.json"
|
return self.get_reports_location() / "regression_tests.json"
|
||||||
|
|
||||||
def get_success_rate_path(self) -> Path:
|
def get_success_rate_path(self) -> Path:
|
||||||
return self.get_reports_location() / "success_rate.json"
|
return self.get_reports_location() / "success_rate.json"
|
||||||
|
|
||||||
|
|
||||||
class Info(BaseModel):
|
class Info(BaseModel):
|
||||||
difficulty: DifficultyLevel
|
difficulty: DifficultyLevel
|
||||||
description: str
|
description: str
|
||||||
|
|
|
@ -67,7 +67,6 @@ def pytest_addoption(parser: Parser) -> None:
|
||||||
for action in group.options:
|
for action in group.options:
|
||||||
current_options += action._short_opts + action._long_opts
|
current_options += action._short_opts + action._long_opts
|
||||||
|
|
||||||
|
|
||||||
group = parser.getgroup("depends")
|
group = parser.getgroup("depends")
|
||||||
|
|
||||||
# Add a flag to list all names + the tests they resolve to
|
# Add a flag to list all names + the tests they resolve to
|
||||||
|
|
|
@ -16,9 +16,6 @@ AGENT_NAME = os.getenv("AGENT_NAME")
|
||||||
REPORT_LOCATION = os.getenv("REPORT_LOCATION", None)
|
REPORT_LOCATION = os.getenv("REPORT_LOCATION", None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def replace_backslash(value: Any) -> Any:
|
def replace_backslash(value: Any) -> Any:
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
|
|
Loading…
Reference in New Issue