file naming when --test (#164)

pull/5155/head
Silen Naihin 2023-07-17 11:24:16 -04:00 committed by GitHub
parent dffc1dfd51
commit 8aa6452cc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 315 additions and 72 deletions

View File

@ -1,40 +1,72 @@
{
"mini-agi": {
"TestBasicMemory": [true, true, true],
"TestBasicRetrieval": [true, true, true],
"TestCreateSimpleWebServer": [false, false, false],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [false, false, false],
"TestReadFile": [true, true, true, true],
"TestRememberMultipleIds": [true, true, true],
"TestRememberMultipleIdsWithNoise": [true, true, true],
"TestRememberMultiplePhrasesWithNoise": [true, true, true],
"TestRetrieval2": [true, true, true],
"TestRetrieval3": [true, true, true],
"TestSearch": [true, true, true, true],
"TestWriteFile": [
true,
true,
true,
false,
false,
false,
false,
true,
false,
true,
false,
false,
false,
false,
true
]
}
}
"mini-agi": {
"TestBasicMemory": [
true,
true,
true
],
"TestBasicRetrieval": [
true,
true,
true
],
"TestCreateSimpleWebServer": [
false,
false,
false
],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [
false,
false,
false
],
"TestReadFile": [
true,
true,
true,
true,
true
],
"TestRememberMultipleIds": [
true,
true,
true
],
"TestRememberMultipleIdsWithNoise": [
true,
true,
true
],
"TestRememberMultiplePhrasesWithNoise": [
true,
true,
true
],
"TestRetrieval2": [
true,
true,
true
],
"TestRetrieval3": [
true,
true,
true
],
"TestSearch": [
true,
true,
true,
true
],
"TestWriteFile": [
true,
true,
true
]
}
}

View File

@ -0,0 +1,36 @@
{
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"non_mock_success_%": 100.0,
"run_time": "0.009 seconds"
}
},
"additional": {
"model": "gpt-3.5-turbo"
},
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-17-09:54",
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
},
"metrics": {
"run_time": "22.36 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 40.0,
"run_time": "22.169 seconds"
}
}
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-15-22:13",
"metrics": {
"run_time": "12.4 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 50.0,
"run_time": "12.127 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestReadFile",
"completion_time": "2023-07-17-10:12",
"metrics": {
"run_time": "65.27 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestReadFile": {
"data_path": "agbenchmark/challenges/interface/read_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "65.074 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
},
"additional": {
"model": "gpt-4",
"reached_termination_time": true
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestReadFile",
"completion_time": "2023-07-15-22:13",
"metrics": {
"run_time": "31.2 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestReadFile": {
"data_path": "agbenchmark/challenges/interface/read_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "30.903 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestSearch",
"completion_time": "2023-07-15-22:14",
"metrics": {
"run_time": "16.88 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestSearch": {
"data_path": "agbenchmark/challenges/interface/search",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "16.572 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,28 @@
{
"command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
"completion_time": "2023-07-15-22:16",
"metrics": {
"run_time": "45.92 seconds",
"highest_difficulty": ": 0"
},
"tests": {
"TestDebugSimpleTypoWithGuidance": {
"data_path": "agbenchmark/challenges/code/d1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": false,
"fail_reason": "assert 1 in [0.0]",
"success_%": 0.0,
"run_time": "45.599 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,28 @@
{
"command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
"completion_time": "2023-07-15-22:15",
"metrics": {
"run_time": "32.99 seconds",
"highest_difficulty": ": 0"
},
"tests": {
"TestDebugSimpleTypoWithGuidance": {
"data_path": "agbenchmark/challenges/code/d1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": false,
"fail_reason": "assert 1 in [0.0]",
"success_%": 0.0,
"run_time": "32.582 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -1,23 +0,0 @@
{
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-16-13:07",
"metrics": {
"run_time": "13.91 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 30.0,
"run_time": "13.684 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
}
}

View File

@ -1,7 +1,9 @@
# radio charts, logs, helper functions for tests, anything else relevant.
import glob
import math
import os
import re
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
@ -17,17 +19,49 @@ HOME_ENV = os.getenv("HOME_ENV")
def calculate_info_test_path(reports_path: Path) -> str:
command = sys.argv
if not reports_path.exists():
reports_path.mkdir(parents=True, exist_ok=True)
return str(
reports_path / f"file1_{datetime.now().strftime('%m-%d-%H-%M')}.json"
)
else:
json_files = glob.glob(str(reports_path / "*.json"))
file_count = len(json_files)
run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
new_file_path = reports_path / run_name
return str(new_file_path)
json_files = glob.glob(str(reports_path / "*.json"))
# Default naming scheme
file_count = len(json_files)
run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
# # If "--test" is in command
if "--test" in command:
test_index = command.index("--test")
try:
test_arg = command[test_index + 1] # Argument after --test
except IndexError:
raise ValueError("Expected an argument after --test")
# Get all files that include the string that is the argument after --test
related_files = [f for f in json_files if test_arg in f]
related_file_count = len(related_files)
# Determine the prefix based on the existing files
if related_file_count == 0:
# Try to find the highest prefix number among all files, then increment it
all_prefix_numbers = []
for f in json_files:
number = float(Path(f).stem.split("_")[0])
all_prefix_numbers.append(math.floor(number))
max_prefix = max(all_prefix_numbers, default=0)
print("HEY WE ARE HERE BIG DAWG", max_prefix)
run_name = f"{max_prefix + 1}_{test_arg}.json"
else:
# Take the number from before the _ and add the .{number}
prefix_str = Path(related_files[0]).stem.rsplit("_", 1)[0].split(".")[0]
prefix = math.floor(float(prefix_str))
run_name = f"{prefix}.{related_file_count}_{test_arg}.json"
print("run_namerun_namerun_name", run_name)
new_file_path = reports_path / run_name
return str(new_file_path)
def replace_backslash(value: Any) -> Any:

@ -1 +1 @@
Subproject commit bb02bf0d5cdbf045ff145271b78e4b4ee7225011
Subproject commit 0a9fcd8c3d6352ef42d436cff7b64683a7a7ca2d