parent
6f289e6dfa
commit
793ff1c163
|
@ -50,10 +50,6 @@ async def run_api_agent(
|
|||
raise TimeoutError("Time limit exceeded")
|
||||
if not step or step.is_last:
|
||||
steps_remaining = False
|
||||
if os.getenv("IS_MOCK"):
|
||||
time.sleep(
|
||||
1
|
||||
) # will help with the integration og the "polling updates" features since mock agent is too fast.
|
||||
# if we're calling a mock agent, we "cheat" and give the correct artifacts to pass the tests
|
||||
if os.getenv("IS_MOCK"):
|
||||
await upload_artifacts(
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
id,name,timestamp
|
||||
3,Alice,2023-09-25 14:10:00
|
||||
1,Bob,2023-09-24 12:05:00
|
||||
2,Charlie,2023-09-24 12:10:00
|
||||
4,David,2023-09-26 16:20:00
|
|
|
@ -0,0 +1,5 @@
|
|||
id,name,timestamp
|
||||
1,Bob,2023-09-24 12:05:00
|
||||
2,Charlie,2023-09-24 12:10:00
|
||||
3,Alice,2023-09-25 14:10:00
|
||||
4,David,2023-09-26 16:20:00
|
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestReadFile"
|
||||
],
|
||||
"eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
|
||||
"ground": {
|
||||
"answer": "The csv sorted by date",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "SortCsv",
|
||||
"task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
Item
|
||||
Banana
|
||||
Leaf
|
||||
Sky
|
||||
Sunflower
|
||||
Grass
|
||||
Jeans
|
||||
Lemon
|
||||
Tree
|
||||
Ocean
|
||||
Daisy
|
||||
Fern
|
|
|
@ -0,0 +1,12 @@
|
|||
Item, Color
|
||||
Banana, Yellow
|
||||
Leaf, Green
|
||||
Sky, Blue
|
||||
Sunflower, Yellow
|
||||
Grass, Green
|
||||
Jeans, Blue
|
||||
Lemon, Yellow
|
||||
Tree, Green
|
||||
Ocean, Blue
|
||||
Daisy, Yellow
|
||||
Fern, Green
|
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestSortCsv"
|
||||
],
|
||||
"eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
|
||||
"ground": {
|
||||
"answer": "The csv labelled",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "LabelData",
|
||||
"task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
ID,Name,Age
|
||||
101,John,28
|
||||
102,Alice,34
|
||||
103,Bob,45
|
|
|
@ -0,0 +1,4 @@
|
|||
ID,Occupation,Salary
|
||||
101,Engineer,80000
|
||||
102,Doctor,120000
|
||||
103,Lawyer,95000
|
|
|
@ -0,0 +1,4 @@
|
|||
Age,ID,Name,Occupation,Salary
|
||||
28,101,John,Engineer,80000
|
||||
34,102,Alice,Doctor,120000
|
||||
45,103,Bob,Lawyer,95000
|
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestLabelData"
|
||||
],
|
||||
"eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
|
||||
"ground": {
|
||||
"answer": "The csv data is combined",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can combine data from a csv",
|
||||
"difficulty": "intermediate",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "CombineCsv",
|
||||
"task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID in ascending order and the columns alphabetically. Write the output in output.csv"
|
||||
}
|
|
@ -12,6 +12,12 @@
|
|||
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
|
||||
|
@ -83,6 +89,18 @@
|
|||
"from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
|
||||
}
|
||||
],
|
||||
"nodes": [
|
||||
|
@ -574,6 +592,117 @@
|
|||
"label": "RevenueRetrieval2",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestSortCsv"
|
||||
],
|
||||
"eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
|
||||
"ground": {
|
||||
"answer": "The csv labelled",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestLabelData",
|
||||
"task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"label": "LabelData",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestReadFile"
|
||||
],
|
||||
"eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
|
||||
"ground": {
|
||||
"answer": "The csv sorted by date",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestSortCsv",
|
||||
"task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"label": "SortCsv",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestLabelData"
|
||||
],
|
||||
"eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
|
||||
"ground": {
|
||||
"answer": "The csv data is combined",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can combine data from a csv",
|
||||
"difficulty": "intermediate",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestCombineCsv",
|
||||
"task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
|
||||
"label": "CombineCsv",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
|
|
|
@ -1,4 +1,214 @@
|
|||
{
|
||||
"edges": [],
|
||||
"nodes": []
|
||||
"edges": [
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
|
||||
}
|
||||
],
|
||||
"nodes": [
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestSortCsv"
|
||||
],
|
||||
"eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
|
||||
"ground": {
|
||||
"answer": "The csv labelled",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestLabelData",
|
||||
"task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"label": "LabelData",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestReadFile"
|
||||
],
|
||||
"eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
|
||||
"ground": {
|
||||
"answer": "The csv sorted by date",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestSortCsv",
|
||||
"task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"label": "SortCsv",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"interface"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestWriteFile"
|
||||
],
|
||||
"eval_id": "261ccfaa-02a2-4c1a-8a56-c76c66f7dba1",
|
||||
"ground": {
|
||||
"answer": "The content of output.txt should be 'Hello World!'",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.txt"
|
||||
],
|
||||
"should_contain": [
|
||||
"Hello World!"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "tests the ability for an agent to read a file.",
|
||||
"difficulty": "interface",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestReadFile",
|
||||
"task": "Read the file called file_to_read.txt and write its content to a file called output.txt"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
|
||||
"label": "ReadFile",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"interface"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [],
|
||||
"eval_id": "81b64bf9-2b6a-4ac8-bcd2-8bfe36244ac0",
|
||||
"ground": {
|
||||
"answer": "The word 'Washington', printed to a .txt file named anything",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
".txt"
|
||||
],
|
||||
"should_contain": [
|
||||
"Washington"
|
||||
],
|
||||
"should_not_contain": []
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests the agents ability to write to a file",
|
||||
"difficulty": "interface",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestWriteFile",
|
||||
"task": "Write the word 'Washington' to a .txt file"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
|
||||
"label": "WriteFile",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestLabelData"
|
||||
],
|
||||
"eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
|
||||
"ground": {
|
||||
"answer": "The csv data is combined",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can combine data from a csv",
|
||||
"difficulty": "intermediate",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestCombineCsv",
|
||||
"task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
|
||||
"label": "CombineCsv",
|
||||
"shape": "dot"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -12,6 +12,12 @@
|
|||
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
|
||||
|
@ -83,6 +89,18 @@
|
|||
"from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
|
||||
},
|
||||
{
|
||||
"arrows": "to",
|
||||
"from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
|
||||
}
|
||||
],
|
||||
"nodes": [
|
||||
|
@ -574,6 +592,117 @@
|
|||
"label": "RevenueRetrieval2",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestSortCsv"
|
||||
],
|
||||
"eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
|
||||
"ground": {
|
||||
"answer": "The csv labelled",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestLabelData",
|
||||
"task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
|
||||
"label": "LabelData",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestReadFile"
|
||||
],
|
||||
"eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
|
||||
"ground": {
|
||||
"answer": "The csv sorted by date",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can sort a csv",
|
||||
"difficulty": "basic",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestSortCsv",
|
||||
"task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
|
||||
"label": "SortCsv",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
"category": [
|
||||
"data"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [
|
||||
"TestLabelData"
|
||||
],
|
||||
"eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
|
||||
"ground": {
|
||||
"answer": "The csv data is combined",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"output.csv"
|
||||
],
|
||||
"should_contain": [
|
||||
"Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
|
||||
]
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests if the agent can combine data from a csv",
|
||||
"difficulty": "intermediate",
|
||||
"side_effects": [
|
||||
""
|
||||
]
|
||||
},
|
||||
"name": "TestCombineCsv",
|
||||
"task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
|
||||
},
|
||||
"id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
|
||||
"label": "CombineCsv",
|
||||
"shape": "dot"
|
||||
},
|
||||
{
|
||||
"color": "grey",
|
||||
"data": {
|
||||
|
|
|
@ -45,10 +45,10 @@ packages:
|
|||
dependency: "direct main"
|
||||
description:
|
||||
name: collection
|
||||
sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687
|
||||
sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.17.2"
|
||||
version: "1.17.1"
|
||||
crypto:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
@ -292,18 +292,18 @@ packages:
|
|||
dependency: transitive
|
||||
description:
|
||||
name: matcher
|
||||
sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e"
|
||||
sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.12.16"
|
||||
version: "0.12.15"
|
||||
material_color_utilities:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: material_color_utilities
|
||||
sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41"
|
||||
sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.5.0"
|
||||
version: "0.2.0"
|
||||
meta:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
@ -449,10 +449,10 @@ packages:
|
|||
dependency: transitive
|
||||
description:
|
||||
name: source_span
|
||||
sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
|
||||
sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.10.0"
|
||||
version: "1.9.1"
|
||||
sprintf:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
@ -497,10 +497,10 @@ packages:
|
|||
dependency: transitive
|
||||
description:
|
||||
name: test_api
|
||||
sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8"
|
||||
sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.6.0"
|
||||
version: "0.5.1"
|
||||
typed_data:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
@ -589,14 +589,6 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.1.4"
|
||||
web:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: web
|
||||
sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.1.4-beta"
|
||||
win32:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
@ -614,5 +606,5 @@ packages:
|
|||
source: hosted
|
||||
version: "1.0.3"
|
||||
sdks:
|
||||
dart: ">=3.1.0-185.0.dev <4.0.0"
|
||||
dart: ">=3.0.0 <4.0.0"
|
||||
flutter: ">=3.10.0"
|
||||
|
|
Loading…
Reference in New Issue