small data changes
parent
a5a9142b20
commit
39efed59af
|
@ -4254,6 +4254,126 @@
|
|||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-13.4.13.tgz",
|
||||
"integrity": "sha512-ZptVhHjzUuivnXMNCJ6lER33HN7lC+rZ01z+PM10Ows21NHFYMvGhi5iXkGtBDk6VmtzsbqnAjnx4Oz5um0FjA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-13.4.13.tgz",
|
||||
"integrity": "sha512-t9nTiWCLApw8W4G1kqJyYP7y6/7lyal3PftmRturIxAIBlZss9wrtVN8nci50StDHmIlIDxfguYIEGVr9DbFTg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-13.4.13.tgz",
|
||||
"integrity": "sha512-xEHUqC8eqR5DHe8SOmMnDU1K3ggrJ28uIKltrQAwqFSSSmzjnN/XMocZkcVhuncuxYrpbri0iMQstRyRVdQVWg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-13.4.13.tgz",
|
||||
"integrity": "sha512-sNf3MnLAm8rquSSAoeD9nVcdaDeRYOeey4stOWOyWIgbBDtP+C93amSgH/LPTDoUV7gNiU6f+ghepTjTjRgIUQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-13.4.13.tgz",
|
||||
"integrity": "sha512-WhcRaJJSHyx9OWmKjjz+OWHumiPZWRqmM/09Bt7Up4UqUJFFhGExeztR4trtv3rflvULatu9IH/nTV8fUUgaMA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-13.4.13.tgz",
|
||||
"integrity": "sha512-+Y4LLhOWWZQIDKVwr2R17lq2KSN0F1c30QVgGIWfnjjHpH8nrIWHEndhqYU+iFuW8It78CiJjQKTw4f51HD7jA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-13.4.13.tgz",
|
||||
"integrity": "sha512-rWurdOR20uxjfqd1X9vDAgv0Jb26KjyL8akF9CBeFqX8rVaBAnW/Wf6A2gYEwyYY4Bai3T7p1kro6DFrsvBAAw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "13.4.13",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-13.4.13.tgz",
|
||||
"integrity": "sha512-E8bSPwRuY5ibJ3CzLQmJEt8qaWrPYuUTwnrwygPUEWoLzD5YRx9SD37oXRdU81TgGwDzCxpl7z5Nqlfk50xAog==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
// This is your Prisma schema file,
|
||||
// learn more about it in the docs: https://pris.ly/d/prisma-schema
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
}
|
||||
|
||||
datasource db {
|
||||
provider = "sqlite"
|
||||
url = env("DATABASE_URL")
|
||||
}
|
||||
|
||||
model Metrics {
|
||||
id Int @id @default(autoincrement())
|
||||
difficulty String
|
||||
success Boolean
|
||||
successPercent Float
|
||||
runTime String?
|
||||
failReason String?
|
||||
Test Test[]
|
||||
}
|
||||
|
||||
model MetricsOverall {
|
||||
id Int @id @default(autoincrement())
|
||||
runTime String
|
||||
highestDifficulty String
|
||||
percentage Float?
|
||||
SuiteTest SuiteTest[]
|
||||
Report Report[]
|
||||
}
|
||||
|
||||
model Test {
|
||||
id Int @id @default(autoincrement())
|
||||
dataPath String
|
||||
isRegression Boolean
|
||||
answer String
|
||||
description String
|
||||
metricsId Int
|
||||
metrics Metrics @relation(fields: [metricsId], references: [id])
|
||||
categoryId Int?
|
||||
category Category? @relation(fields: [categoryId], references: [id])
|
||||
task String?
|
||||
reachedCutoff Boolean?
|
||||
}
|
||||
|
||||
model SuiteTest {
|
||||
id Int @id @default(autoincrement())
|
||||
dataPath String
|
||||
metricsOverallId Int
|
||||
metricsOverall MetricsOverall @relation(fields: [metricsOverallId], references: [id])
|
||||
categoryId Int?
|
||||
category Category? @relation(fields: [categoryId], references: [id])
|
||||
task String?
|
||||
reachedCutoff Boolean?
|
||||
}
|
||||
|
||||
model Category {
|
||||
id Int @id @default(autoincrement())
|
||||
name String @unique
|
||||
tests Test[]
|
||||
suiteTests SuiteTest[]
|
||||
}
|
||||
|
||||
model Report {
|
||||
id Int @id @default(autoincrement())
|
||||
command String
|
||||
completionTime String
|
||||
benchmarkStartTime String
|
||||
metricsOverallId Int
|
||||
metricsOverall MetricsOverall @relation(fields: [metricsOverallId], references: [id])
|
||||
configKey String
|
||||
configValue String
|
||||
agentId Int
|
||||
agent Agent @relation(fields: [agentId], references: [id])
|
||||
}
|
||||
|
||||
model Agent {
|
||||
id Int @id @default(autoincrement())
|
||||
name String @unique
|
||||
reports Report[]
|
||||
}
|
|
@ -7,7 +7,7 @@ export const env = createEnv({
|
|||
* isn't built with invalid env vars.
|
||||
*/
|
||||
server: {
|
||||
DATABASE_URL: z.string().url(),
|
||||
// DATABASE_URL: z.string().url(),
|
||||
NODE_ENV: z.enum(["development", "test", "production"]),
|
||||
},
|
||||
|
||||
|
@ -25,7 +25,7 @@ export const env = createEnv({
|
|||
* middlewares) or client-side so we need to destruct manually.
|
||||
*/
|
||||
runtimeEnv: {
|
||||
DATABASE_URL: process.env.DATABASE_URL,
|
||||
// DATABASE_URL: process.env.DATABASE_URL,
|
||||
NODE_ENV: process.env.NODE_ENV,
|
||||
// NEXT_PUBLIC_CLIENTVAR: process.env.NEXT_PUBLIC_CLIENTVAR,
|
||||
},
|
||||
|
|
|
@ -51,7 +51,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.dropna(subset=['benchmark_start_time', 'response', 'model'], inplace=True)"
|
||||
"df.dropna(subset=['benchmark_start_time', 'response', 'model', 'agent'], inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -684,7 +684,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 121,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -708,8 +708,8 @@
|
|||
" return x\n",
|
||||
"\n",
|
||||
"challenge = \"TestRememberMultipleIds\"\n",
|
||||
"agent_array = ['beebot'] # df['agent'].unique()\n",
|
||||
"request_type = 'request' # 'request' or 'response'\n",
|
||||
"agent_array = df['agent'].unique()\n",
|
||||
"request_type = 'response' # 'request' or 'response'\n",
|
||||
"\n",
|
||||
"# Loop through unique agents\n",
|
||||
"for agent in agent_array:\n",
|
||||
|
@ -1494,7 +1494,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1533,7 +1533,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
|
@ -6,7 +6,55 @@ from gql.transport.aiohttp import AIOHTTPTransport
|
|||
from gql import gql, Client
|
||||
import os
|
||||
|
||||
from agbenchmark.reports.processing.report_types import Report, SuiteTest
|
||||
# from agbenchmark.reports.processing.report_types import Report, SuiteTest
|
||||
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class Metrics(BaseModel):
|
||||
difficulty: str
|
||||
success: bool
|
||||
success_percent: float = Field(..., alias="success_%")
|
||||
run_time: Optional[str] = None
|
||||
fail_reason: Optional[str] = None
|
||||
attempted: Optional[bool] = None
|
||||
|
||||
|
||||
class MetricsOverall(BaseModel):
|
||||
run_time: str
|
||||
highest_difficulty: str
|
||||
percentage: Optional[float] = None
|
||||
|
||||
|
||||
class Test(BaseModel):
|
||||
data_path: str
|
||||
is_regression: bool
|
||||
answer: str
|
||||
description: str
|
||||
metrics: Metrics
|
||||
category: List[str]
|
||||
task: Optional[str] = None
|
||||
reached_cutoff: Optional[bool] = None
|
||||
|
||||
|
||||
class SuiteTest(BaseModel):
|
||||
data_path: str
|
||||
metrics: MetricsOverall
|
||||
tests: Dict[str, Test]
|
||||
category: Optional[List[str]] = None
|
||||
task: Optional[str] = None
|
||||
reached_cutoff: Optional[bool] = None
|
||||
|
||||
|
||||
class Report(BaseModel):
|
||||
command: str
|
||||
completion_time: str
|
||||
benchmark_start_time: str
|
||||
metrics: MetricsOverall
|
||||
tests: Dict[str, Union[Test, SuiteTest]]
|
||||
config: Dict[str, str | dict[str, str]]
|
||||
|
||||
|
||||
def get_reports():
|
||||
|
@ -31,13 +79,21 @@ def get_reports():
|
|||
# Check if the item is a directory (an agent directory)
|
||||
if os.path.isdir(agent_dir):
|
||||
# Construct the path to the report.json file
|
||||
# Use glob to find all run directories in the agent_dir
|
||||
# Get all directories and files, but note that this will also include any file, not just directories.
|
||||
run_dirs = glob.glob(os.path.join(agent_dir, "*"))
|
||||
|
||||
# Get all json files starting with 'file'
|
||||
# old_report_files = glob.glob(os.path.join(agent_dir, "file*.json"))
|
||||
|
||||
# For each run directory, add the report.json to the end
|
||||
# Only include the path if it's actually a directory
|
||||
report_files = [
|
||||
os.path.join(run_dir, "report.json") for run_dir in run_dirs
|
||||
os.path.join(run_dir, "report.json")
|
||||
for run_dir in run_dirs
|
||||
if os.path.isdir(run_dir)
|
||||
]
|
||||
# old_report_files already contains the full paths, so no need to join again
|
||||
# report_files = report_files + old_report_files
|
||||
for report_file in report_files:
|
||||
# Check if the report.json file exists
|
||||
if os.path.isfile(report_file):
|
||||
|
@ -45,6 +101,7 @@ def get_reports():
|
|||
with open(report_file, "r") as f:
|
||||
# Load the JSON data from the file
|
||||
json_data = json.load(f)
|
||||
print(f"Processing {report_file}")
|
||||
report = Report.parse_obj(json_data)
|
||||
|
||||
for test_name, test_data in report.tests.items():
|
||||
|
@ -265,7 +322,7 @@ df = pd.merge(
|
|||
helicone_df,
|
||||
reports_df,
|
||||
on=["benchmark_start_time", "agent", "challenge"],
|
||||
how="left",
|
||||
how="inner",
|
||||
)
|
||||
|
||||
df.to_pickle("df.pkl")
|
||||
|
|
Loading…
Reference in New Issue