AutoGPT/classic/benchmark/reports/mini-agi/regression_tests.json

58 lines
2.3 KiB
JSON
Raw Permalink Normal View History

2023-08-12 17:35:45 +00:00
{
2023-08-13 01:24:09 +00:00
"TestBasicContentGen": {
2023-08-12 17:35:45 +00:00
"difficulty": "basic",
2023-08-13 01:24:09 +00:00
"data_path": "agbenchmark/challenges/content_gen/1_summary/data.json"
2023-08-12 17:35:45 +00:00
},
2023-09-02 08:21:16 +00:00
"TestBasicRetrieval": {
"difficulty": "basic",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/retrieval/r1_book_price/data.json"
2023-09-02 08:21:16 +00:00
},
2023-08-15 08:28:26 +00:00
"TestBasicSafety_BuildNuke": {
"difficulty": "basic",
"data_path": "agbenchmark/challenges/safety/s4_safety/1_simple/data.json"
},
2023-09-02 08:21:16 +00:00
"TestDebugSimpleTypoWithGuidance": {
2023-08-27 08:27:23 +00:00
"difficulty": "novice",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/code/c2_debug_suite/d2.1_guided/data.json"
2023-09-01 08:24:31 +00:00
},
2023-09-02 08:21:16 +00:00
"TestReadFile": {
2023-09-01 08:24:31 +00:00
"difficulty": "interface",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/interface/read_file/data.json"
2023-09-01 08:24:31 +00:00
},
"TestRememberGoal_Hard": {
"difficulty": "intermediate",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/safety/s1_loss_suite_1/4_hard/data.json"
2023-09-01 08:24:31 +00:00
},
2023-09-02 08:21:16 +00:00
"TestRememberGoal_Simple": {
"difficulty": "intermediate",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/safety/s1_loss_suite_1/1_simple/data.json"
2023-09-01 08:24:31 +00:00
},
"TestSearch": {
"difficulty": "interface",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/interface/search/data.json"
2023-09-01 08:24:31 +00:00
},
2023-09-02 08:21:16 +00:00
"TestWrite5FilesWithArray": {
2023-09-01 08:24:31 +00:00
"difficulty": "novice",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/safety/s2_divergence/data.json"
2023-09-01 08:24:31 +00:00
},
2023-09-02 08:21:16 +00:00
"TestWrite6Files": {
2023-09-01 08:24:31 +00:00
"difficulty": "intermediate",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/safety/s3_instructions/data.json"
2023-09-01 08:24:31 +00:00
},
2023-09-02 08:21:16 +00:00
"TestWriteFile": {
"difficulty": "interface",
2023-09-03 08:29:26 +00:00
"data_path": "agbenchmark/challenges/deprecated/interface/write_file/data.json"
2023-09-05 08:37:08 +00:00
},
"TestRememberGoal_Medium": {
"difficulty": "intermediate",
"data_path": "agbenchmark/challenges/deprecated/safety/s1_loss_suite_1/2_medium/data.json"
},
"TestDebugSimpleTypoWithoutGuidance": {
"difficulty": "intermediate",
"data_path": "agbenchmark/challenges/deprecated/code/c2_debug_suite/d2.2_vague/data.json"
},
"TestRememberGoal_Advanced": {
"difficulty": "intermediate",
"data_path": "agbenchmark/challenges/deprecated/safety/s1_loss_suite_1/3_advanced/data.json"
2023-08-12 17:35:45 +00:00
}
}