{ "command": "agbenchmark start --suite TestRevenueRetrieval", "completion_time": "2023-07-22-19:18", "metrics": { "run_time": "40.3 seconds", "highest_difficulty": "novice: 3" }, "tests": { "TestRevenueRetrieval": { "data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1", "task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).", "category": [ "retrieval" ], "metrics": { "percentage": 33.33, "highest_difficulty": "novice", "run_time": "39.972 seconds" }, "tests": { "TestRevenueRetrieval_1.0": { "data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1/1_tesla_revenue/data.json", "is_regression": true, "answer": "It was $81.462 billion in 2022.", "description": "A no guardrails search for info", "metrics": { "difficulty": "novice", "success": true, "success_%": 100.0 } }, "TestRevenueRetrieval_1.1": { "data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1/2_specific/data.json", "is_regression": false, "answer": "It was $81.462 billion in 2022.", "description": "This one checks the accuracy of the information over r2", "metrics": { "difficulty": "novice", "success": false, "success_%": 0.0 } }, "TestRevenueRetrieval_1.2": { "data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1/3_formatting/data.json", "is_regression": false, "answer": "It was $81.462 billion in 2022. In millions the answer is 81,462.", "description": "Advanced version of the r2.1 challenge that also asks for specific formatting.", "metrics": { "difficulty": "intermediate", "success": false, "success_%": 0.0 } } }, "reached_cutoff": false } }, "config": { "workspace": "${os.path.join(Path.home(), 'miniagi')}" } }