reverting new challenges
parent
825c3adf62
commit
e5e0c4bf9d
|
@ -1 +0,0 @@
|
|||
Scotland
|
|
@ -1,34 +0,0 @@
|
|||
{
|
||||
"category": [
|
||||
"scrape_synthesize",
|
||||
"general"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestRevenueRetrieval"
|
||||
],
|
||||
"eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
|
||||
"ground": {
|
||||
"answer": "Toran is from Scotland https://uk.linkedin.com/in/toran-richards.",
|
||||
"case_sensitive": true,
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.txt"
|
||||
],
|
||||
"should_contain": [
|
||||
"Scotland"
|
||||
],
|
||||
"should_not_contain": []
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can do multi step retrieval (1. who is the CEO of AutoGPT 2. find Toran is from)",
|
||||
"difficulty": "intermediate",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestAutoGPTCountry",
|
||||
"task": "Write the country the the inventor/creator of AutoGPT is from to a file called output.txt"
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
Reed
|
|
@ -1,35 +0,0 @@
|
|||
{
|
||||
"category": [
|
||||
"scrape_synthesize",
|
||||
"general"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestAutoGPTCountry",
|
||||
"TestGetInformation"
|
||||
],
|
||||
"eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
|
||||
"ground": {
|
||||
"answer": "https://www.amazon.com/gp/bestsellers/2021/books, second book, the answer is Reed",
|
||||
"case_sensitive": true,
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.txt"
|
||||
],
|
||||
"should_contain": [
|
||||
"Reed"
|
||||
],
|
||||
"should_not_contain": []
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can follow very specific instructions for retrieval.",
|
||||
"difficulty": "intermediate",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestAmazonBestseller",
|
||||
"task": "Write the middle name of the author of the best selling book on Amazon from 2021 that got the least amount of reviews out of the first six books on the best selling list to a file called output.txt."
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
Head of Ventures, M&A, and Business Development
|
|
@ -1,35 +0,0 @@
|
|||
{
|
||||
"category": [
|
||||
"scrape_synthesize",
|
||||
"general"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestAutoGPTCountry",
|
||||
"TestGetInformation"
|
||||
],
|
||||
"eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
|
||||
"ground": {
|
||||
"answer": "Get to https://www.forbes.com/special-report/2012/30-under-30/30-under-30_games.html, then https://www.linkedin.com/in/brianjcho/details/experience/ is the first 27 year old, then find his longest working job on Linkedin which is Riot Games.",
|
||||
"case_sensitive": true,
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.txt"
|
||||
],
|
||||
"should_contain": [
|
||||
"Head of Ventures, M&A, and Business Development"
|
||||
],
|
||||
"should_not_contain": []
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can navigate the web through trials and tribulations.",
|
||||
"difficulty": "intermediate",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestAmazonBestseller",
|
||||
"task": "Find the Forbes 30 under 30 list for 2012. For the Games category, find the company that the first 27 year old on the list spend the longest amount of time working at according to Linkedin. What was his role at this company?"
|
||||
}
|
Loading…
Reference in New Issue