AutoGPT/classic/benchmark/reports/match_records.py

import glob
import json
import os
from typing import Dict, List, Optional, Union

import pandas as pd
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
from pydantic import BaseModel, Field

# from agbenchmark.reports.processing.report_types import Report, SuiteTest


class Metrics(BaseModel):
    difficulty: str
    success: bool
    success_percent: float = Field(alias="success_%")
    run_time: Optional[str] = None
    fail_reason: Optional[str] = None
    attempted: Optional[bool] = None


class MetricsOverall(BaseModel):
    run_time: str
    highest_difficulty: str
    percentage: Optional[float] = None


class Test(BaseModel):
    data_path: str
    is_regression: bool
    answer: str
    description: str
    metrics: Metrics
    category: List[str]
    task: Optional[str] = None
    reached_cutoff: Optional[bool] = None


class SuiteTest(BaseModel):
    data_path: str
    metrics: MetricsOverall
    tests: Dict[str, Test]
    category: Optional[List[str]] = None
    task: Optional[str] = None
    reached_cutoff: Optional[bool] = None


class Report(BaseModel):
    command: str
    completion_time: str
    benchmark_start_time: str
    metrics: MetricsOverall
    tests: Dict[str, Union[Test, SuiteTest]]
    config: Dict[str, str | dict[str, str]]


def get_reports():
    # Initialize an empty list to store the report data
    report_data = []

    # Get the current working directory
    current_dir = os.getcwd()

    # Check if the current directory ends with 'reports'
    if current_dir.endswith("reports"):
        reports_dir = "/"
    else:
        reports_dir = "reports"

    # Iterate over all agent directories in the reports directory
    for agent_name in os.listdir(reports_dir):
        if agent_name is None:
            continue
        agent_dir = os.path.join(reports_dir, agent_name)

        # Check if the item is a directory (an agent directory)
        if os.path.isdir(agent_dir):
            # Construct the path to the report.json file
            # Get all directories and files, but note that this will also include any file, not just directories.
            run_dirs = glob.glob(os.path.join(agent_dir, "*"))

            # Get all json files starting with 'file'
            # old_report_files = glob.glob(os.path.join(agent_dir, "file*.json"))

            # For each run directory, add the report.json to the end
            # Only include the path if it's actually a directory
            report_files = [
                os.path.join(run_dir, "report.json")
                for run_dir in run_dirs
                if os.path.isdir(run_dir)
            ]
            # old_report_files already contains the full paths, so no need to join again
            # report_files = report_files + old_report_files
            for report_file in report_files:
                # Check if the report.json file exists
                if os.path.isfile(report_file):
                    # Open the report.json file
                    with open(report_file, "r") as f:
                        # Load the JSON data from the file
                        json_data = json.load(f)
                        print(f"Processing {report_file}")
                        report = Report.model_validate(json_data)

                        for test_name, test_data in report.tests.items():
                            test_json = {
                                "agent": agent_name.lower(),
                                "benchmark_start_time": report.benchmark_start_time,
                            }

                            if isinstance(test_data, SuiteTest):
                                if (
                                    test_data.category
                                ):  # this means it's a same task test
                                    test_json["challenge"] = test_name
                                    test_json["attempted"] = test_data.tests[
                                        list(test_data.tests.keys())[0]
                                    ].metrics.attempted
                                    test_json["categories"] = ", ".join(
                                        test_data.category
                                    )
                                    test_json["task"] = test_data.task
                                    test_json["success"] = test_data.metrics.percentage
                                    test_json[
                                        "difficulty"
                                    ] = test_data.metrics.highest_difficulty
                                    test_json[
                                        "success_%"
                                    ] = test_data.metrics.percentage
                                    test_json["run_time"] = test_data.metrics.run_time
                                    test_json["is_regression"] = test_data.tests[
                                        list(test_data.tests.keys())[0]
                                    ].is_regression
                                else:  # separate tasks in 1 suite
                                    for (
                                        suite_test_name,
                                        suite_data,
                                    ) in test_data.tests.items():
                                        test_json["challenge"] = suite_test_name
                                        test_json[
                                            "attempted"
                                        ] = suite_data.metrics.attempted
                                        test_json["categories"] = ", ".join(
                                            suite_data.category
                                        )
                                        test_json["task"] = suite_data.task
                                        test_json["success"] = (
                                            100.0 if suite_data.metrics.success else 0
                                        )
                                        test_json[
                                            "difficulty"
                                        ] = suite_data.metrics.difficulty
                                        test_json[
                                            "success_%"
                                        ] = suite_data.metrics.success_percentage
                                        test_json[
                                            "run_time"
                                        ] = suite_data.metrics.run_time
                                        test_json[
                                            "is_regression"
                                        ] = suite_data.is_regression

                            else:
                                test_json["challenge"] = test_name
                                test_json["attempted"] = test_data.metrics.attempted
                                test_json["categories"] = ", ".join(test_data.category)
                                test_json["task"] = test_data.task
                                test_json["success"] = (
                                    100.0 if test_data.metrics.success else 0
                                )
                                test_json["difficulty"] = test_data.metrics.difficulty
                                test_json[
                                    "success_%"
                                ] = test_data.metrics.success_percentage
                                test_json["run_time"] = test_data.metrics.run_time
                                test_json["is_regression"] = test_data.is_regression

                            report_data.append(test_json)

    return pd.DataFrame(report_data)


def get_helicone_data():
    helicone_api_key = os.getenv("HELICONE_API_KEY")

    url = "https://www.helicone.ai/api/graphql"
    # Replace <KEY> with your personal access key
    transport = AIOHTTPTransport(
        url=url, headers={"authorization": f"Bearer {helicone_api_key}"}
    )

    client = Client(transport=transport, fetch_schema_from_transport=True)

    SIZE = 250

    i = 0

    data = []
    print("Fetching data from Helicone")
    while True:
        query = gql(
            """
            query ExampleQuery($limit: Int, $offset: Int){
                heliconeRequest(
                    limit: $limit
                    offset: $offset
                ) {
                    costUSD
                    prompt
                    properties{
                        name
                        value
                    }
                    
                    requestBody
                    response
                    createdAt

                }

                }
        """
        )
        print(f"Fetching {i * SIZE} to {(i + 1) * SIZE} records")
        try:
            result = client.execute(
                query, variable_values={"limit": SIZE, "offset": i * SIZE}
            )
        except Exception as e:
            print(f"Error occurred: {e}")
            result = None

        i += 1

        if result:
            for item in result["heliconeRequest"]:
                properties = {
                    prop["name"]: prop["value"] for prop in item["properties"]
                }
                data.append(
                    {
                        "createdAt": item["createdAt"],
                        "agent": properties.get("agent"),
                        "costUSD": item["costUSD"],
                        "job_id": properties.get("job_id"),
                        "challenge": properties.get("challenge"),
                        "benchmark_start_time": properties.get("benchmark_start_time"),
                        "prompt": item["prompt"],
                        "response": item["response"],
                        "model": item["requestBody"].get("model"),
                        "request": item["requestBody"].get("messages"),
                    }
                )

        if not result or (len(result["heliconeRequest"]) == 0):
            print("No more results")
            break

    df = pd.DataFrame(data)
    # Drop rows where agent is None
    df = df.dropna(subset=["agent"])

    # Convert the remaining agent names to lowercase
    df["agent"] = df["agent"].str.lower()

    return df


if os.path.exists("raw_reports.pkl") and os.path.exists("raw_helicone.pkl"):
    reports_df = pd.read_pickle("raw_reports.pkl")
    helicone_df = pd.read_pickle("raw_helicone.pkl")
else:
    reports_df = get_reports()
    reports_df.to_pickle("raw_reports.pkl")
    helicone_df = get_helicone_data()
    helicone_df.to_pickle("raw_helicone.pkl")


def try_formats(date_str):
    formats = ["%Y-%m-%d-%H:%M", "%Y-%m-%dT%H:%M:%S%z"]
    for fmt in formats:
        try:
            return pd.to_datetime(date_str, format=fmt)
        except ValueError:
            pass
    return None


helicone_df["benchmark_start_time"] = pd.to_datetime(
    helicone_df["benchmark_start_time"].apply(try_formats), utc=True
)
helicone_df = helicone_df.dropna(subset=["benchmark_start_time"])
helicone_df["createdAt"] = pd.to_datetime(
    helicone_df["createdAt"], unit="ms", origin="unix"
)
reports_df["benchmark_start_time"] = pd.to_datetime(
    reports_df["benchmark_start_time"].apply(try_formats), utc=True
)
reports_df = reports_df.dropna(subset=["benchmark_start_time"])

assert pd.api.types.is_datetime64_any_dtype(
    helicone_df["benchmark_start_time"]
), "benchmark_start_time in helicone_df is not datetime"
assert pd.api.types.is_datetime64_any_dtype(
    reports_df["benchmark_start_time"]
), "benchmark_start_time in reports_df is not datetime"

reports_df["report_time"] = reports_df["benchmark_start_time"]

# df = pd.merge_asof(
#     helicone_df.sort_values("benchmark_start_time"),
#     reports_df.sort_values("benchmark_start_time"),
#     left_on="benchmark_start_time",
#     right_on="benchmark_start_time",
#     by=["agent", "challenge"],
#     direction="backward",
# )

df = pd.merge(
    helicone_df,
    reports_df,
    on=["benchmark_start_time", "agent", "challenge"],
    how="inner",
)

df.to_pickle("df.pkl")
print(df.info())
print("Data saved to df.pkl")
print("To load the data use: df = pd.read_pickle('df.pkl')")
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`import glob`
Structure challenges (#5296) 2023-09-22 03:06:37 +00:00			`import json`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`import os`
small data changes 2023-09-12 01:20:03 +00:00			`from typing import Dict, List, Optional, Union`

Structure challenges (#5296) 2023-09-22 03:06:37 +00:00			`import pandas as pd`
			`from gql import Client, gql`
			`from gql.transport.aiohttp import AIOHTTPTransport`
small data changes 2023-09-12 01:20:03 +00:00			`from pydantic import BaseModel, Field`

Structure challenges (#5296) 2023-09-22 03:06:37 +00:00			`# from agbenchmark.reports.processing.report_types import Report, SuiteTest`

small data changes 2023-09-12 01:20:03 +00:00
			`class Metrics(BaseModel):`
			`difficulty: str`
			`success: bool`
feat(forge, agent, benchmark): Upgrade to Pydantic v2 (#7280) Update Pydantic dependency of `autogpt`, `forge` and `benchmark` to `^2.7` [Pydantic Migration Guide](https://docs.pydantic.dev/2.7/migration/) - Migrate usages of now-deprecated functions to their replacements - Update `Field` definitions - Ellipsis `...` for required fields is deprecated - `Field` no longer supports extra `kwargs`, replace use of this feature with field metadata - Replace `Config` class for specifying model configuration with `model_config = ConfigDict(..)` - Removed `ModelContainer` in `BaseAgent`, component configuration dict is now directly serialized using Pydantic v2 helper functions - Forked `agent-protocol` and updated `packages/client/python` for Pydantic v2 support: https://github.com/Significant-Gravitas/agent-protocol --------- Co-authored-by: Reinier van der Leer <pwuts@agpt.co> 2024-07-02 18:45:32 +00:00			`success_percent: float = Field(alias="success_%")`
small data changes 2023-09-12 01:20:03 +00:00			`run_time: Optional[str] = None`
			`fail_reason: Optional[str] = None`
			`attempted: Optional[bool] = None`


			`class MetricsOverall(BaseModel):`
			`run_time: str`
			`highest_difficulty: str`
			`percentage: Optional[float] = None`


			`class Test(BaseModel):`
			`data_path: str`
			`is_regression: bool`
			`answer: str`
			`description: str`
			`metrics: Metrics`
			`category: List[str]`
			`task: Optional[str] = None`
			`reached_cutoff: Optional[bool] = None`


			`class SuiteTest(BaseModel):`
			`data_path: str`
			`metrics: MetricsOverall`
			`tests: Dict[str, Test]`
			`category: Optional[List[str]] = None`
			`task: Optional[str] = None`
			`reached_cutoff: Optional[bool] = None`


			`class Report(BaseModel):`
			`command: str`
			`completion_time: str`
			`benchmark_start_time: str`
			`metrics: MetricsOverall`
			`tests: Dict[str, Union[Test, SuiteTest]]`
			`config: Dict[str, str \| dict[str, str]]`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00
			`def get_reports():`
			`# Initialize an empty list to store the report data`
			`report_data = []`

			`# Get the current working directory`
			`current_dir = os.getcwd()`

			`# Check if the current directory ends with 'reports'`
			`if current_dir.endswith("reports"):`
			`reports_dir = "/"`
			`else:`
			`reports_dir = "reports"`

			`# Iterate over all agent directories in the reports directory`
			`for agent_name in os.listdir(reports_dir):`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`if agent_name is None:`
			`continue`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`agent_dir = os.path.join(reports_dir, agent_name)`

			`# Check if the item is a directory (an agent directory)`
			`if os.path.isdir(agent_dir):`
			`# Construct the path to the report.json file`
small data changes 2023-09-12 01:20:03 +00:00			`# Get all directories and files, but note that this will also include any file, not just directories.`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`run_dirs = glob.glob(os.path.join(agent_dir, "*"))`

small data changes 2023-09-12 01:20:03 +00:00			`# Get all json files starting with 'file'`
			`# old_report_files = glob.glob(os.path.join(agent_dir, "file*.json"))`

bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`# For each run directory, add the report.json to the end`
small data changes 2023-09-12 01:20:03 +00:00			`# Only include the path if it's actually a directory`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`report_files = [`
small data changes 2023-09-12 01:20:03 +00:00			`os.path.join(run_dir, "report.json")`
			`for run_dir in run_dirs`
			`if os.path.isdir(run_dir)`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`]`
small data changes 2023-09-12 01:20:03 +00:00			`# old_report_files already contains the full paths, so no need to join again`
			`# report_files = report_files + old_report_files`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`for report_file in report_files:`
			`# Check if the report.json file exists`
			`if os.path.isfile(report_file):`
			`# Open the report.json file`
			`with open(report_file, "r") as f:`
			`# Load the JSON data from the file`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`json_data = json.load(f)`
small data changes 2023-09-12 01:20:03 +00:00			`print(f"Processing {report_file}")`
feat(forge, agent, benchmark): Upgrade to Pydantic v2 (#7280) Update Pydantic dependency of `autogpt`, `forge` and `benchmark` to `^2.7` [Pydantic Migration Guide](https://docs.pydantic.dev/2.7/migration/) - Migrate usages of now-deprecated functions to their replacements - Update `Field` definitions - Ellipsis `...` for required fields is deprecated - `Field` no longer supports extra `kwargs`, replace use of this feature with field metadata - Replace `Config` class for specifying model configuration with `model_config = ConfigDict(..)` - Removed `ModelContainer` in `BaseAgent`, component configuration dict is now directly serialized using Pydantic v2 helper functions - Forked `agent-protocol` and updated `packages/client/python` for Pydantic v2 support: https://github.com/Significant-Gravitas/agent-protocol --------- Co-authored-by: Reinier van der Leer <pwuts@agpt.co> 2024-07-02 18:45:32 +00:00			`report = Report.model_validate(json_data)`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00
			`for test_name, test_data in report.tests.items():`
			`test_json = {`
			`"agent": agent_name.lower(),`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`"benchmark_start_time": report.benchmark_start_time,`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`}`

			`if isinstance(test_data, SuiteTest):`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`if (`
			`test_data.category`
			`): # this means it's a same task test`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`test_json["challenge"] = test_name`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json["attempted"] = test_data.tests[`
			`list(test_data.tests.keys())[0]`
			`].metrics.attempted`
			`test_json["categories"] = ", ".join(`
			`test_data.category`
			`)`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`test_json["task"] = test_data.task`
			`test_json["success"] = test_data.metrics.percentage`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json[`
			`"difficulty"`
			`] = test_data.metrics.highest_difficulty`
			`test_json[`
			`"success_%"`
			`] = test_data.metrics.percentage`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`test_json["run_time"] = test_data.metrics.run_time`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json["is_regression"] = test_data.tests[`
			`list(test_data.tests.keys())[0]`
			`].is_regression`
			`else: # separate tasks in 1 suite`
			`for (`
			`suite_test_name,`
			`suite_data,`
			`) in test_data.tests.items():`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`test_json["challenge"] = suite_test_name`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json[`
			`"attempted"`
			`] = suite_data.metrics.attempted`
			`test_json["categories"] = ", ".join(`
			`suite_data.category`
			`)`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`test_json["task"] = suite_data.task`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json["success"] = (`
			`100.0 if suite_data.metrics.success else 0`
			`)`
			`test_json[`
			`"difficulty"`
			`] = suite_data.metrics.difficulty`
			`test_json[`
			`"success_%"`
Make agbenchmark a proxy of the evaluated agent (#5279) Make agbenchmark a Proxy of the evaluated agent Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-20 23:06:00 +00:00			`] = suite_data.metrics.success_percentage`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json[`
			`"run_time"`
			`] = suite_data.metrics.run_time`
			`test_json[`
			`"is_regression"`
			`] = suite_data.is_regression`

combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`else:`
			`test_json["challenge"] = test_name`
			`test_json["attempted"] = test_data.metrics.attempted`
			`test_json["categories"] = ", ".join(test_data.category)`
			`test_json["task"] = test_data.task`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json["success"] = (`
			`100.0 if test_data.metrics.success else 0`
			`)`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`test_json["difficulty"] = test_data.metrics.difficulty`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00			`test_json[`
			`"success_%"`
Make agbenchmark a proxy of the evaluated agent (#5279) Make agbenchmark a Proxy of the evaluated agent Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-20 23:06:00 +00:00			`] = test_data.metrics.success_percentage`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`test_json["run_time"] = test_data.metrics.run_time`
			`test_json["is_regression"] = test_data.is_regression`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`report_data.append(test_json)`
Add battleship game (#351) Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> 2023-09-04 21:11:56 +00:00
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`return pd.DataFrame(report_data)`


			`def get_helicone_data():`
			`helicone_api_key = os.getenv("HELICONE_API_KEY")`

			`url = "https://www.helicone.ai/api/graphql"`
			`# Replace <KEY> with your personal access key`
			`transport = AIOHTTPTransport(`
			`url=url, headers={"authorization": f"Bearer {helicone_api_key}"}`
			`)`

			`client = Client(transport=transport, fetch_schema_from_transport=True)`

			`SIZE = 250`

			`i = 0`

			`data = []`
			`print("Fetching data from Helicone")`
			`while True:`
			`query = gql(`
			`"""`
			`query ExampleQuery($limit: Int, $offset: Int){`
			`heliconeRequest(`
			`limit: $limit`
			`offset: $offset`
			`) {`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`costUSD`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`prompt`
			`properties{`
			`name`
			`value`
			`}`

			`requestBody`
			`response`
			`createdAt`

			`}`

			`}`
			`"""`
			`)`
			`print(f"Fetching {i * SIZE} to {(i + 1) * SIZE} records")`
			`try:`
			`result = client.execute(`
			`query, variable_values={"limit": SIZE, "offset": i * SIZE}`
			`)`
			`except Exception as e:`
			`print(f"Error occurred: {e}")`
			`result = None`

			`i += 1`

			`if result:`
			`for item in result["heliconeRequest"]:`
			`properties = {`
			`prop["name"]: prop["value"] for prop in item["properties"]`
			`}`
			`data.append(`
			`{`
			`"createdAt": item["createdAt"],`
			`"agent": properties.get("agent"),`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`"costUSD": item["costUSD"],`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`"job_id": properties.get("job_id"),`
			`"challenge": properties.get("challenge"),`
			`"benchmark_start_time": properties.get("benchmark_start_time"),`
			`"prompt": item["prompt"],`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`"response": item["response"],`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`"model": item["requestBody"].get("model"),`
			`"request": item["requestBody"].get("messages"),`
			`}`
			`)`

			`if not result or (len(result["heliconeRequest"]) == 0):`
			`print("No more results")`
			`break`

			`df = pd.DataFrame(data)`
			`# Drop rows where agent is None`
			`df = df.dropna(subset=["agent"])`

			`# Convert the remaining agent names to lowercase`
			`df["agent"] = df["agent"].str.lower()`

			`return df`


combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`if os.path.exists("raw_reports.pkl") and os.path.exists("raw_helicone.pkl"):`
			`reports_df = pd.read_pickle("raw_reports.pkl")`
			`helicone_df = pd.read_pickle("raw_helicone.pkl")`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`else:`
			`reports_df = get_reports()`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`reports_df.to_pickle("raw_reports.pkl")`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`helicone_df = get_helicone_data()`
combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`helicone_df.to_pickle("raw_helicone.pkl")`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00

			`def try_formats(date_str):`
			`formats = ["%Y-%m-%d-%H:%M", "%Y-%m-%dT%H:%M:%S%z"]`
			`for fmt in formats:`
			`try:`
			`return pd.to_datetime(date_str, format=fmt)`
			`except ValueError:`
			`pass`
			`return None`


			`helicone_df["benchmark_start_time"] = pd.to_datetime(`
			`helicone_df["benchmark_start_time"].apply(try_formats), utc=True`
			`)`
			`helicone_df = helicone_df.dropna(subset=["benchmark_start_time"])`
			`helicone_df["createdAt"] = pd.to_datetime(`
			`helicone_df["createdAt"], unit="ms", origin="unix"`
			`)`
			`reports_df["benchmark_start_time"] = pd.to_datetime(`
			`reports_df["benchmark_start_time"].apply(try_formats), utc=True`
			`)`
			`reports_df = reports_df.dropna(subset=["benchmark_start_time"])`

			`assert pd.api.types.is_datetime64_any_dtype(`
			`helicone_df["benchmark_start_time"]`
			`), "benchmark_start_time in helicone_df is not datetime"`
			`assert pd.api.types.is_datetime64_any_dtype(`
			`reports_df["benchmark_start_time"]`
			`), "benchmark_start_time in reports_df is not datetime"`

			`reports_df["report_time"] = reports_df["benchmark_start_time"]`

combined reports with json (#349) 2023-09-03 05:28:47 +00:00			`# df = pd.merge_asof(`
			`# helicone_df.sort_values("benchmark_start_time"),`
			`# reports_df.sort_values("benchmark_start_time"),`
			`# left_on="benchmark_start_time",`
			`# right_on="benchmark_start_time",`
			`# by=["agent", "challenge"],`
			`# direction="backward",`
			`# )`

			`df = pd.merge(`
			`helicone_df,`
			`reports_df,`
			`on=["benchmark_start_time", "agent", "challenge"],`
small data changes 2023-09-12 01:20:03 +00:00			`how="inner",`
bug fixes, sha frontend, updating file locations 2023-09-01 17:54:35 +00:00			`)`

			`df.to_pickle("df.pkl")`
			`print(df.info())`
			`print("Data saved to df.pkl")`
			`print("To load the data use: df = pd.read_pickle('df.pkl')")`