small data changes

pull/5196/head
Silen Naihin 2023-09-11 18:20:03 -07:00
parent a5a9142b20
commit 39efed59af
5 changed files with 189 additions and 93 deletions

View File

@ -4254,6 +4254,126 @@
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-13.4.13.tgz",
"integrity": "sha512-ZptVhHjzUuivnXMNCJ6lER33HN7lC+rZ01z+PM10Ows21NHFYMvGhi5iXkGtBDk6VmtzsbqnAjnx4Oz5um0FjA==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-13.4.13.tgz",
"integrity": "sha512-t9nTiWCLApw8W4G1kqJyYP7y6/7lyal3PftmRturIxAIBlZss9wrtVN8nci50StDHmIlIDxfguYIEGVr9DbFTg==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-13.4.13.tgz",
"integrity": "sha512-xEHUqC8eqR5DHe8SOmMnDU1K3ggrJ28uIKltrQAwqFSSSmzjnN/XMocZkcVhuncuxYrpbri0iMQstRyRVdQVWg==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-13.4.13.tgz",
"integrity": "sha512-sNf3MnLAm8rquSSAoeD9nVcdaDeRYOeey4stOWOyWIgbBDtP+C93amSgH/LPTDoUV7gNiU6f+ghepTjTjRgIUQ==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-13.4.13.tgz",
"integrity": "sha512-WhcRaJJSHyx9OWmKjjz+OWHumiPZWRqmM/09Bt7Up4UqUJFFhGExeztR4trtv3rflvULatu9IH/nTV8fUUgaMA==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-13.4.13.tgz",
"integrity": "sha512-+Y4LLhOWWZQIDKVwr2R17lq2KSN0F1c30QVgGIWfnjjHpH8nrIWHEndhqYU+iFuW8It78CiJjQKTw4f51HD7jA==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-13.4.13.tgz",
"integrity": "sha512-rWurdOR20uxjfqd1X9vDAgv0Jb26KjyL8akF9CBeFqX8rVaBAnW/Wf6A2gYEwyYY4Bai3T7p1kro6DFrsvBAAw==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@next/swc-win32-ia32-msvc": {
"version": "13.4.13",
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-13.4.13.tgz",
"integrity": "sha512-E8bSPwRuY5ibJ3CzLQmJEt8qaWrPYuUTwnrwygPUEWoLzD5YRx9SD37oXRdU81TgGwDzCxpl7z5Nqlfk50xAog==",
"cpu": [
"ia32"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
}
}
}

View File

@ -1,81 +0,0 @@
// This is your Prisma schema file,
// learn more about it in the docs: https://pris.ly/d/prisma-schema
generator client {
provider = "prisma-client-js"
}
datasource db {
provider = "sqlite"
url = env("DATABASE_URL")
}
model Metrics {
id Int @id @default(autoincrement())
difficulty String
success Boolean
successPercent Float
runTime String?
failReason String?
Test Test[]
}
model MetricsOverall {
id Int @id @default(autoincrement())
runTime String
highestDifficulty String
percentage Float?
SuiteTest SuiteTest[]
Report Report[]
}
model Test {
id Int @id @default(autoincrement())
dataPath String
isRegression Boolean
answer String
description String
metricsId Int
metrics Metrics @relation(fields: [metricsId], references: [id])
categoryId Int?
category Category? @relation(fields: [categoryId], references: [id])
task String?
reachedCutoff Boolean?
}
model SuiteTest {
id Int @id @default(autoincrement())
dataPath String
metricsOverallId Int
metricsOverall MetricsOverall @relation(fields: [metricsOverallId], references: [id])
categoryId Int?
category Category? @relation(fields: [categoryId], references: [id])
task String?
reachedCutoff Boolean?
}
model Category {
id Int @id @default(autoincrement())
name String @unique
tests Test[]
suiteTests SuiteTest[]
}
model Report {
id Int @id @default(autoincrement())
command String
completionTime String
benchmarkStartTime String
metricsOverallId Int
metricsOverall MetricsOverall @relation(fields: [metricsOverallId], references: [id])
configKey String
configValue String
agentId Int
agent Agent @relation(fields: [agentId], references: [id])
}
model Agent {
id Int @id @default(autoincrement())
name String @unique
reports Report[]
}

View File

@ -7,7 +7,7 @@ export const env = createEnv({
* isn't built with invalid env vars.
*/
server: {
DATABASE_URL: z.string().url(),
// DATABASE_URL: z.string().url(),
NODE_ENV: z.enum(["development", "test", "production"]),
},
@ -25,7 +25,7 @@ export const env = createEnv({
* middlewares) or client-side so we need to destruct manually.
*/
runtimeEnv: {
DATABASE_URL: process.env.DATABASE_URL,
// DATABASE_URL: process.env.DATABASE_URL,
NODE_ENV: process.env.NODE_ENV,
// NEXT_PUBLIC_CLIENTVAR: process.env.NEXT_PUBLIC_CLIENTVAR,
},

View File

@ -51,7 +51,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.dropna(subset=['benchmark_start_time', 'response', 'model'], inplace=True)"
"df.dropna(subset=['benchmark_start_time', 'response', 'model', 'agent'], inplace=True)"
]
},
{
@ -684,7 +684,7 @@
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
@ -708,8 +708,8 @@
" return x\n",
"\n",
"challenge = \"TestRememberMultipleIds\"\n",
"agent_array = ['beebot'] # df['agent'].unique()\n",
"request_type = 'request' # 'request' or 'response'\n",
"agent_array = df['agent'].unique()\n",
"request_type = 'response' # 'request' or 'response'\n",
"\n",
"# Loop through unique agents\n",
"for agent in agent_array:\n",
@ -1494,7 +1494,7 @@
},
{
"cell_type": "code",
"execution_count": 109,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1533,7 +1533,7 @@
},
{
"cell_type": "code",
"execution_count": 112,
"execution_count": null,
"metadata": {},
"outputs": [
{

View File

@ -6,7 +6,55 @@ from gql.transport.aiohttp import AIOHTTPTransport
from gql import gql, Client
import os
from agbenchmark.reports.processing.report_types import Report, SuiteTest
# from agbenchmark.reports.processing.report_types import Report, SuiteTest
from typing import Dict, List, Optional, Union
from pydantic import BaseModel, Field
class Metrics(BaseModel):
difficulty: str
success: bool
success_percent: float = Field(..., alias="success_%")
run_time: Optional[str] = None
fail_reason: Optional[str] = None
attempted: Optional[bool] = None
class MetricsOverall(BaseModel):
run_time: str
highest_difficulty: str
percentage: Optional[float] = None
class Test(BaseModel):
data_path: str
is_regression: bool
answer: str
description: str
metrics: Metrics
category: List[str]
task: Optional[str] = None
reached_cutoff: Optional[bool] = None
class SuiteTest(BaseModel):
data_path: str
metrics: MetricsOverall
tests: Dict[str, Test]
category: Optional[List[str]] = None
task: Optional[str] = None
reached_cutoff: Optional[bool] = None
class Report(BaseModel):
command: str
completion_time: str
benchmark_start_time: str
metrics: MetricsOverall
tests: Dict[str, Union[Test, SuiteTest]]
config: Dict[str, str | dict[str, str]]
def get_reports():
@ -31,13 +79,21 @@ def get_reports():
# Check if the item is a directory (an agent directory)
if os.path.isdir(agent_dir):
# Construct the path to the report.json file
# Use glob to find all run directories in the agent_dir
# Get all directories and files, but note that this will also include any file, not just directories.
run_dirs = glob.glob(os.path.join(agent_dir, "*"))
# Get all json files starting with 'file'
# old_report_files = glob.glob(os.path.join(agent_dir, "file*.json"))
# For each run directory, add the report.json to the end
# Only include the path if it's actually a directory
report_files = [
os.path.join(run_dir, "report.json") for run_dir in run_dirs
os.path.join(run_dir, "report.json")
for run_dir in run_dirs
if os.path.isdir(run_dir)
]
# old_report_files already contains the full paths, so no need to join again
# report_files = report_files + old_report_files
for report_file in report_files:
# Check if the report.json file exists
if os.path.isfile(report_file):
@ -45,6 +101,7 @@ def get_reports():
with open(report_file, "r") as f:
# Load the JSON data from the file
json_data = json.load(f)
print(f"Processing {report_file}")
report = Report.parse_obj(json_data)
for test_name, test_data in report.tests.items():
@ -265,7 +322,7 @@ df = pd.merge(
helicone_df,
reports_df,
on=["benchmark_start_time", "agent", "challenge"],
how="left",
how="inner",
)
df.to_pickle("df.pkl")