diff --git a/agbenchmark/challenges b/agbenchmark/challenges index 59d44a6f4..8153f05f7 160000 --- a/agbenchmark/challenges +++ b/agbenchmark/challenges @@ -1 +1 @@ -Subproject commit 59d44a6f4f3cd778d9034857f7277428269b2028 +Subproject commit 8153f05f758d86ddf01fe7d8cb4fb7c53ddc4d72 diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py index ba14aa27a..ba62b6982 100644 --- a/agbenchmark/conftest.py +++ b/agbenchmark/conftest.py @@ -5,6 +5,7 @@ import sys import time from pathlib import Path # noqa from typing import Any, Dict, Generator +from helicone.lock import HeliconeLockManager import pytest @@ -132,6 +133,9 @@ suite_reports: dict[str, list] = {} def pytest_runtest_makereport(item: Any, call: Any) -> None: challenge_data = item.funcargs.get("challenge_data", None) + + HeliconeLockManager.write_custom_property("challenge", challenge_data["name"]) + if not challenge_data: # this will only happen for dummy dependency setup tests return diff --git a/agbenchmark/reports/ReportManager.py b/agbenchmark/reports/ReportManager.py index a5c99b6a0..08c6cbd82 100644 --- a/agbenchmark/reports/ReportManager.py +++ b/agbenchmark/reports/ReportManager.py @@ -5,6 +5,7 @@ import time from datetime import datetime from typing import Any, Dict, Optional +from agbenchmark.start_benchmark import BENCHMARK_START_TIME from agbenchmark.utils.utils import get_highest_success_difficulty @@ -62,6 +63,7 @@ class ReportManager: self.tests = { "command": command.split(os.sep)[-1], "completion_time": datetime.now().strftime("%Y-%m-%d-%H:%M"), + "benchmark_start_time": BENCHMARK_START_TIME, "metrics": { "run_time": str(round(time.time() - self.start_time, 2)) + " seconds", "highest_difficulty": get_highest_success_difficulty(self.tests), diff --git a/agbenchmark/start_benchmark.py b/agbenchmark/start_benchmark.py index 8a757a9ef..0ca906be0 100644 --- a/agbenchmark/start_benchmark.py +++ b/agbenchmark/start_benchmark.py @@ -1,16 +1,22 @@ import json import os import sys +from datetime import datetime from pathlib import Path from typing import Any +from helicone.lock import HeliconeLockManager +from agbenchmark.utils.utils import AGENT_NAME, calculate_dynamic_paths import click import pytest -from agbenchmark.utils.utils import calculate_dynamic_paths CURRENT_DIRECTORY = Path(__file__).resolve().parent +BENCHMARK_START_TIME = datetime.now().strftime("%Y-%m-%d-%H:%M") +HeliconeLockManager.write_custom_property("benchmark_start_time", BENCHMARK_START_TIME) +if AGENT_NAME: + HeliconeLockManager.write_custom_property("agent_name", AGENT_NAME) ( HOME_DIRECTORY, CONFIG_PATH, diff --git a/agent/Auto-GPT b/agent/Auto-GPT index 1d9a7c6d5..dcb632b6b 160000 --- a/agent/Auto-GPT +++ b/agent/Auto-GPT @@ -1 +1 @@ -Subproject commit 1d9a7c6d5792378e1fb34c8591c7dc6eb1dd1a1d +Subproject commit dcb632b6b643001f01e6169645f58dc60b1be708 diff --git a/poetry.lock b/poetry.lock index f91684580..7fbc61d37 100644 --- a/poetry.lock +++ b/poetry.lock @@ -514,6 +514,21 @@ files = [ google-auth = ">=1.12.0" google-auth-oauthlib = ">=0.4.1" +[[package]] +name = "helicone" +version = "1.0.6" +description = "A Python wrapper for the OpenAI API that logs all requests to Helicone." +optional = false +python-versions = ">=3.8.1" +files = [ + {file = "helicone-1.0.6-py3-none-any.whl", hash = "sha256:a735096a3d616399210a2cf951360b45418fbea6af5f21fb61b6a1b99c4d75da"}, + {file = "helicone-1.0.6.tar.gz", hash = "sha256:25f9459a9b427726d3373cccc64776034b6db5fe39b3f5c48efdbad9a37fffb4"}, +] + +[package.dependencies] +lockfile = ">=0.12.2,<0.13.0" +openai = ">=0.27.0,<0.28.0" + [[package]] name = "httplib2" version = "0.22.0" @@ -567,6 +582,17 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] +[[package]] +name = "lockfile" +version = "0.12.2" +description = "Platform-independent file locking module" +optional = false +python-versions = "*" +files = [ + {file = "lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa"}, + {file = "lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799"}, +] + [[package]] name = "mccabe" version = "0.6.1" @@ -1439,4 +1465,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "69391ed283544bc6ce50a65f5c8fb0aa4e2ced538159f5ad0464ba255882b45c" +content-hash = "54bf8b431e1c64c947e36d48ebef5463bf84cf0c909ce17b639f9ef1228389d5" diff --git a/pyproject.toml b/pyproject.toml index bc9564b16..783f7f261 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ click = "^8.1.3" types-requests = "^2.31.0.1" pexpect = "^4.8.0" psutil = "^5.9.5" +helicone = "^1.0.6" [tool.poetry.group.dev.dependencies] flake8 = "^3.9.2" diff --git a/send_to_googledrive.py b/send_to_googledrive.py index 0e2cebe5a..715bc56b8 100644 --- a/send_to_googledrive.py +++ b/send_to_googledrive.py @@ -56,6 +56,7 @@ for sub_dir in os.listdir(base_dir): "Agent": sub_dir, "Command": data.get("command", ""), "Completion Time": data.get("completion_time", ""), + "Benchmark Start Time": data.get("benchmark_start_time", ""), "Total Run Time": data.get("metrics", {}).get("run_time", ""), "Highest Difficulty": data.get("metrics", {}).get( "highest_difficulty", ""