diff --git a/agbenchmark/challenges b/agbenchmark/challenges
index 59d44a6f4..8153f05f7 160000
--- a/agbenchmark/challenges
+++ b/agbenchmark/challenges
@@ -1 +1 @@
-Subproject commit 59d44a6f4f3cd778d9034857f7277428269b2028
+Subproject commit 8153f05f758d86ddf01fe7d8cb4fb7c53ddc4d72
diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py
index ba14aa27a..ba62b6982 100644
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@@ -5,6 +5,7 @@ import sys
 import time
 from pathlib import Path  # noqa
 from typing import Any, Dict, Generator
+from helicone.lock import HeliconeLockManager
 
 import pytest
 
@@ -132,6 +133,9 @@ suite_reports: dict[str, list] = {}
 
 def pytest_runtest_makereport(item: Any, call: Any) -> None:
     challenge_data = item.funcargs.get("challenge_data", None)
+
+    HeliconeLockManager.write_custom_property("challenge", challenge_data["name"])
+
     if not challenge_data:
         # this will only happen for dummy dependency setup tests
         return
diff --git a/agbenchmark/reports/ReportManager.py b/agbenchmark/reports/ReportManager.py
index a5c99b6a0..08c6cbd82 100644
--- a/agbenchmark/reports/ReportManager.py
+++ b/agbenchmark/reports/ReportManager.py
@@ -5,6 +5,7 @@ import time
 from datetime import datetime
 from typing import Any, Dict, Optional
 
+from agbenchmark.start_benchmark import BENCHMARK_START_TIME
 from agbenchmark.utils.utils import get_highest_success_difficulty
 
 
@@ -62,6 +63,7 @@ class ReportManager:
         self.tests = {
             "command": command.split(os.sep)[-1],
             "completion_time": datetime.now().strftime("%Y-%m-%d-%H:%M"),
+            "benchmark_start_time": BENCHMARK_START_TIME,
             "metrics": {
                 "run_time": str(round(time.time() - self.start_time, 2)) + " seconds",
                 "highest_difficulty": get_highest_success_difficulty(self.tests),
diff --git a/agbenchmark/start_benchmark.py b/agbenchmark/start_benchmark.py
index 8a757a9ef..0ca906be0 100644
--- a/agbenchmark/start_benchmark.py
+++ b/agbenchmark/start_benchmark.py
@@ -1,16 +1,22 @@
 import json
 import os
 import sys
+from datetime import datetime
 from pathlib import Path
 from typing import Any
+from helicone.lock import HeliconeLockManager
+from agbenchmark.utils.utils import AGENT_NAME, calculate_dynamic_paths
 
 import click
 import pytest
 
-from agbenchmark.utils.utils import calculate_dynamic_paths
 
 CURRENT_DIRECTORY = Path(__file__).resolve().parent
+BENCHMARK_START_TIME = datetime.now().strftime("%Y-%m-%d-%H:%M")
 
+HeliconeLockManager.write_custom_property("benchmark_start_time", BENCHMARK_START_TIME)
+if AGENT_NAME:
+    HeliconeLockManager.write_custom_property("agent_name", AGENT_NAME)
 (
     HOME_DIRECTORY,
     CONFIG_PATH,
diff --git a/agent/Auto-GPT b/agent/Auto-GPT
index 1d9a7c6d5..dcb632b6b 160000
--- a/agent/Auto-GPT
+++ b/agent/Auto-GPT
@@ -1 +1 @@
-Subproject commit 1d9a7c6d5792378e1fb34c8591c7dc6eb1dd1a1d
+Subproject commit dcb632b6b643001f01e6169645f58dc60b1be708
diff --git a/poetry.lock b/poetry.lock
index f91684580..7fbc61d37 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -514,6 +514,21 @@ files = [
 google-auth = ">=1.12.0"
 google-auth-oauthlib = ">=0.4.1"
 
+[[package]]
+name = "helicone"
+version = "1.0.6"
+description = "A Python wrapper for the OpenAI API that logs all requests to Helicone."
+optional = false
+python-versions = ">=3.8.1"
+files = [
+    {file = "helicone-1.0.6-py3-none-any.whl", hash = "sha256:a735096a3d616399210a2cf951360b45418fbea6af5f21fb61b6a1b99c4d75da"},
+    {file = "helicone-1.0.6.tar.gz", hash = "sha256:25f9459a9b427726d3373cccc64776034b6db5fe39b3f5c48efdbad9a37fffb4"},
+]
+
+[package.dependencies]
+lockfile = ">=0.12.2,<0.13.0"
+openai = ">=0.27.0,<0.28.0"
+
 [[package]]
 name = "httplib2"
 version = "0.22.0"
@@ -567,6 +582,17 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"
 plugins = ["setuptools"]
 requirements-deprecated-finder = ["pip-api", "pipreqs"]
 
+[[package]]
+name = "lockfile"
+version = "0.12.2"
+description = "Platform-independent file locking module"
+optional = false
+python-versions = "*"
+files = [
+    {file = "lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa"},
+    {file = "lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799"},
+]
+
 [[package]]
 name = "mccabe"
 version = "0.6.1"
@@ -1439,4 +1465,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "69391ed283544bc6ce50a65f5c8fb0aa4e2ced538159f5ad0464ba255882b45c"
+content-hash = "54bf8b431e1c64c947e36d48ebef5463bf84cf0c909ce17b639f9ef1228389d5"
diff --git a/pyproject.toml b/pyproject.toml
index bc9564b16..783f7f261 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ click = "^8.1.3"
 types-requests = "^2.31.0.1"
 pexpect = "^4.8.0"
 psutil = "^5.9.5"
+helicone = "^1.0.6"
 
 [tool.poetry.group.dev.dependencies]
 flake8 = "^3.9.2"
diff --git a/send_to_googledrive.py b/send_to_googledrive.py
index 0e2cebe5a..715bc56b8 100644
--- a/send_to_googledrive.py
+++ b/send_to_googledrive.py
@@ -56,6 +56,7 @@ for sub_dir in os.listdir(base_dir):
                         "Agent": sub_dir,
                         "Command": data.get("command", ""),
                         "Completion Time": data.get("completion_time", ""),
+                        "Benchmark Start Time": data.get("benchmark_start_time", ""),
                         "Total Run Time": data.get("metrics", {}).get("run_time", ""),
                         "Highest Difficulty": data.get("metrics", {}).get(
                             "highest_difficulty", ""