61 lines
2.8 KiB
JSON
61 lines
2.8 KiB
JSON
|
{
|
||
|
"command": "agbenchmark start --suite TestRevenueRetrieval",
|
||
|
"completion_time": "2023-07-24-13:34",
|
||
|
"metrics": {
|
||
|
"run_time": "62.03 seconds",
|
||
|
"highest_difficulty": "novice: 3"
|
||
|
},
|
||
|
"tests": {
|
||
|
"TestRevenueRetrieval": {
|
||
|
"data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1",
|
||
|
"task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
|
||
|
"category": [
|
||
|
"retrieval"
|
||
|
],
|
||
|
"metrics": {
|
||
|
"percentage": 33.33,
|
||
|
"highest_difficulty": "novice",
|
||
|
"run_time": "61.579 seconds"
|
||
|
},
|
||
|
"tests": {
|
||
|
"TestRevenueRetrieval_1.0": {
|
||
|
"data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/1_tesla_revenue/data.json",
|
||
|
"is_regression": true,
|
||
|
"answer": "It was $81.462 billion in 2022.",
|
||
|
"description": "A no guardrails search for info",
|
||
|
"metrics": {
|
||
|
"difficulty": "novice",
|
||
|
"success": true,
|
||
|
"success_%": 100.0
|
||
|
}
|
||
|
},
|
||
|
"TestRevenueRetrieval_1.1": {
|
||
|
"data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/2_specific/data.json",
|
||
|
"is_regression": false,
|
||
|
"answer": "It was $81.462 billion in 2022.",
|
||
|
"description": "This one checks the accuracy of the information over r2",
|
||
|
"metrics": {
|
||
|
"difficulty": "novice",
|
||
|
"success": false,
|
||
|
"success_%": 0.0
|
||
|
}
|
||
|
},
|
||
|
"TestRevenueRetrieval_1.2": {
|
||
|
"data_path": "C:/Users/silen/Desktop/Untitled Folder/Projects/Auto-GPT/Auto-GPT-Benchmarks/agbenchmark/challenges/retrieval/r2_search_suite_1/3_formatting/data.json",
|
||
|
"is_regression": false,
|
||
|
"answer": "It was $81.462 billion in 2022. In millions the answer is 81,462.",
|
||
|
"description": "Advanced version of the r2.1 challenge that also asks for specific formatting.",
|
||
|
"metrics": {
|
||
|
"difficulty": "intermediate",
|
||
|
"success": false,
|
||
|
"success_%": 0.0
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
"reached_cutoff": true
|
||
|
}
|
||
|
},
|
||
|
"config": {
|
||
|
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
|
||
|
}
|
||
|
}
|