31 lines
932 B
JSON
31 lines
932 B
JSON
{
|
|
"command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
|
|
"completion_time": "2023-07-18-07:39",
|
|
"metrics": {
|
|
"run_time": "60.0 seconds",
|
|
"highest_difficulty": "basic: 2"
|
|
},
|
|
"tests": {
|
|
"TestDebugSimpleTypoWithGuidance": {
|
|
"data_path": "agbenchmark/challenges/code/d1_debug",
|
|
"is_regression": false,
|
|
"task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
|
|
"answer": "[0, 1] [2, 5] [0, 3]",
|
|
"description": "Tests ability for the agent to debug python code with a simple typo in it.",
|
|
"metrics": {
|
|
"difficulty": "basic",
|
|
"success": true,
|
|
"success_%": 100.0,
|
|
"run_time": "59.757 seconds"
|
|
},
|
|
"reached_cutoff": false
|
|
}
|
|
},
|
|
"config": {
|
|
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
|
|
},
|
|
"additional": {
|
|
"model": "gpt-4"
|
|
}
|
|
}
|