file naming when --test (#164)

pull/5155/head
Silen Naihin 2023-07-17 11:24:16 -04:00 committed by GitHub
parent dffc1dfd51
commit 8aa6452cc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 315 additions and 72 deletions

View File

@ -1,40 +1,72 @@
{ {
"mini-agi": { "mini-agi": {
"TestBasicMemory": [true, true, true], "TestBasicMemory": [
"TestBasicRetrieval": [true, true, true], true,
"TestCreateSimpleWebServer": [false, false, false], true,
"TestDebugSimpleTypoWithGuidance": [ true
false, ],
false, "TestBasicRetrieval": [
false, true,
false, true,
false, true
false ],
], "TestCreateSimpleWebServer": [
"TestDebugSimpleTypoWithoutGuidance": [false, false, false], false,
"TestReadFile": [true, true, true, true], false,
"TestRememberMultipleIds": [true, true, true], false
"TestRememberMultipleIdsWithNoise": [true, true, true], ],
"TestRememberMultiplePhrasesWithNoise": [true, true, true], "TestDebugSimpleTypoWithGuidance": [
"TestRetrieval2": [true, true, true], false,
"TestRetrieval3": [true, true, true], false,
"TestSearch": [true, true, true, true], false
"TestWriteFile": [ ],
true, "TestDebugSimpleTypoWithoutGuidance": [
true, false,
true, false,
false, false
false, ],
false, "TestReadFile": [
false, true,
true, true,
false, true,
true, true,
false, true
false, ],
false, "TestRememberMultipleIds": [
false, true,
true true,
] true
} ],
} "TestRememberMultipleIdsWithNoise": [
true,
true,
true
],
"TestRememberMultiplePhrasesWithNoise": [
true,
true,
true
],
"TestRetrieval2": [
true,
true,
true
],
"TestRetrieval3": [
true,
true,
true
],
"TestSearch": [
true,
true,
true,
true
],
"TestWriteFile": [
true,
true,
true
]
}
}

View File

@ -0,0 +1,36 @@
{
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"non_mock_success_%": 100.0,
"run_time": "0.009 seconds"
}
},
"additional": {
"model": "gpt-3.5-turbo"
},
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-17-09:54",
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
},
"metrics": {
"run_time": "22.36 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 40.0,
"run_time": "22.169 seconds"
}
}
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-15-22:13",
"metrics": {
"run_time": "12.4 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 50.0,
"run_time": "12.127 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestReadFile",
"completion_time": "2023-07-17-10:12",
"metrics": {
"run_time": "65.27 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestReadFile": {
"data_path": "agbenchmark/challenges/interface/read_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "65.074 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
},
"additional": {
"model": "gpt-4",
"reached_termination_time": true
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestReadFile",
"completion_time": "2023-07-15-22:13",
"metrics": {
"run_time": "31.2 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestReadFile": {
"data_path": "agbenchmark/challenges/interface/read_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "30.903 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestSearch",
"completion_time": "2023-07-15-22:14",
"metrics": {
"run_time": "16.88 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestSearch": {
"data_path": "agbenchmark/challenges/interface/search",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "16.572 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,28 @@
{
"command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
"completion_time": "2023-07-15-22:16",
"metrics": {
"run_time": "45.92 seconds",
"highest_difficulty": ": 0"
},
"tests": {
"TestDebugSimpleTypoWithGuidance": {
"data_path": "agbenchmark/challenges/code/d1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": false,
"fail_reason": "assert 1 in [0.0]",
"success_%": 0.0,
"run_time": "45.599 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -0,0 +1,28 @@
{
"command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
"completion_time": "2023-07-15-22:15",
"metrics": {
"run_time": "32.99 seconds",
"highest_difficulty": ": 0"
},
"tests": {
"TestDebugSimpleTypoWithGuidance": {
"data_path": "agbenchmark/challenges/code/d1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": false,
"fail_reason": "assert 1 in [0.0]",
"success_%": 0.0,
"run_time": "32.582 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@ -1,23 +0,0 @@
{
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-16-13:07",
"metrics": {
"run_time": "13.91 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 30.0,
"run_time": "13.684 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
}
}

View File

@ -1,7 +1,9 @@
# radio charts, logs, helper functions for tests, anything else relevant. # radio charts, logs, helper functions for tests, anything else relevant.
import glob import glob
import math
import os import os
import re import re
import sys
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -17,17 +19,49 @@ HOME_ENV = os.getenv("HOME_ENV")
def calculate_info_test_path(reports_path: Path) -> str: def calculate_info_test_path(reports_path: Path) -> str:
command = sys.argv
if not reports_path.exists(): if not reports_path.exists():
reports_path.mkdir(parents=True, exist_ok=True) reports_path.mkdir(parents=True, exist_ok=True)
return str(
reports_path / f"file1_{datetime.now().strftime('%m-%d-%H-%M')}.json" json_files = glob.glob(str(reports_path / "*.json"))
)
else: # Default naming scheme
json_files = glob.glob(str(reports_path / "*.json")) file_count = len(json_files)
file_count = len(json_files) run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
new_file_path = reports_path / run_name # # If "--test" is in command
return str(new_file_path) if "--test" in command:
test_index = command.index("--test")
try:
test_arg = command[test_index + 1] # Argument after --test
except IndexError:
raise ValueError("Expected an argument after --test")
# Get all files that include the string that is the argument after --test
related_files = [f for f in json_files if test_arg in f]
related_file_count = len(related_files)
# Determine the prefix based on the existing files
if related_file_count == 0:
# Try to find the highest prefix number among all files, then increment it
all_prefix_numbers = []
for f in json_files:
number = float(Path(f).stem.split("_")[0])
all_prefix_numbers.append(math.floor(number))
max_prefix = max(all_prefix_numbers, default=0)
print("HEY WE ARE HERE BIG DAWG", max_prefix)
run_name = f"{max_prefix + 1}_{test_arg}.json"
else:
# Take the number from before the _ and add the .{number}
prefix_str = Path(related_files[0]).stem.rsplit("_", 1)[0].split(".")[0]
prefix = math.floor(float(prefix_str))
run_name = f"{prefix}.{related_file_count}_{test_arg}.json"
print("run_namerun_namerun_name", run_name)
new_file_path = reports_path / run_name
return str(new_file_path)
def replace_backslash(value: Any) -> Any: def replace_backslash(value: Any) -> Any:

@ -1 +1 @@
Subproject commit bb02bf0d5cdbf045ff145271b78e4b4ee7225011 Subproject commit 0a9fcd8c3d6352ef42d436cff7b64683a7a7ca2d