AutoGPT/classic/benchmark/reports/mini-agi/13.1_TestRevenueRetrieval.json

61 lines
2.6 KiB
JSON

{
"command": "agbenchmark start --suite TestRevenueRetrieval",
"completion_time": "2023-07-22-19:18",
"metrics": {
"run_time": "40.3 seconds",
"highest_difficulty": "novice: 3"
},
"tests": {
"TestRevenueRetrieval": {
"data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1",
"task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
"category": [
"retrieval"
],
"metrics": {
"percentage": 33.33,
"highest_difficulty": "novice",
"run_time": "39.972 seconds"
},
"tests": {
"TestRevenueRetrieval_1.0": {
"data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1/1_tesla_revenue/data.json",
"is_regression": true,
"answer": "It was $81.462 billion in 2022.",
"description": "A no guardrails search for info",
"metrics": {
"difficulty": "novice",
"success": true,
"success_%": 100.0
}
},
"TestRevenueRetrieval_1.1": {
"data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1/2_specific/data.json",
"is_regression": false,
"answer": "It was $81.462 billion in 2022.",
"description": "This one checks the accuracy of the information over r2",
"metrics": {
"difficulty": "novice",
"success": false,
"success_%": 0.0
}
},
"TestRevenueRetrieval_1.2": {
"data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1/3_formatting/data.json",
"is_regression": false,
"answer": "It was $81.462 billion in 2022. In millions the answer is 81,462.",
"description": "Advanced version of the r2.1 challenge that also asks for specific formatting.",
"metrics": {
"difficulty": "intermediate",
"success": false,
"success_%": 0.0
}
}
},
"reached_cutoff": false
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
}
}