parent
cbd2e49d97
commit
5886d75059
|
@ -64,7 +64,7 @@ jobs:
|
|||
if: success() || failure()
|
||||
|
||||
tests:
|
||||
name: ${{ matrix.agent-name }}
|
||||
name: "${{ matrix.agent-name }} (Cache: ${{ matrix.cache-enabled }})"
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
|
@ -77,6 +77,8 @@ jobs:
|
|||
- "smol-developer"
|
||||
- "Auto-GPT"
|
||||
- "mini-agi"
|
||||
cache-enabled: [ true, false ]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
@ -156,7 +158,7 @@ jobs:
|
|||
PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards.
|
||||
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
|
||||
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
|
||||
HELICONE_CACHE_ENABLED: true
|
||||
HELICONE_CACHE_ENABLED: ${{ matrix.cache-enabled }}
|
||||
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
|
||||
|
||||
|
||||
|
@ -164,5 +166,6 @@ jobs:
|
|||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.agent-name }}
|
||||
name:
|
||||
${{ matrix.agent-name }} (Cache: ${{ matrix.cache-enabled }})
|
||||
path: agent/${{ matrix.agent-name }}/agbenchmark
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "TestDebugSimpleTypoWithGuidance",
|
||||
"category": ["code"],
|
||||
"category": ["code", "iterate"],
|
||||
"task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
|
||||
"dependencies": ["TestReadFile", "TestWriteFile"],
|
||||
"ground": {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "TestDebugSimpleTypoWithoutGuidance",
|
||||
"category": ["code"],
|
||||
"category": ["code", "iterate"],
|
||||
"task": "Make test.py run without errors.",
|
||||
"dependencies": ["TestDebugSimpleTypoWithGuidance"],
|
||||
"ground": {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "TestBasicCodeGeneration",
|
||||
"category": ["code", "iterate"],
|
||||
"category": ["code"],
|
||||
"task": "Create a two_sum function in a file called code.py. Given an array of integers, return indices of the two numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 9, Because nums[0] + nums[1] = 2 + 7 = 9, return [0, 1].",
|
||||
"dependencies": ["TestWriteFile"],
|
||||
"ground": {
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
# mypy: ignore-errors
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
def three_sum(nums: List[int], target: int) -> Optional[List[int]]:
|
||||
nums_indices = [(num, index) for index, num in enumerate(nums)]
|
||||
nums_indices.sort()
|
||||
for i in range(len(nums_indices) - 2):
|
||||
if i > 0 and nums_indices[i] == nums_indices[i - 1]:
|
||||
continue
|
||||
l, r = i + 1, len(nums_indices) - 1
|
||||
while l < r:
|
||||
three_sum = nums_indices[i][0] + nums_indices[l][0] + nums_indices[r][0]
|
||||
if three_sum < target:
|
||||
l += 1
|
||||
elif three_sum > target:
|
||||
r -= 1
|
||||
else:
|
||||
indices = sorted(
|
||||
[nums_indices[i][1], nums_indices[l][1], nums_indices[r][1]]
|
||||
)
|
||||
return indices
|
||||
return None
|
|
@ -0,0 +1,31 @@
|
|||
# mypy: ignore-errors
|
||||
from code import three_sum
|
||||
from typing import List
|
||||
|
||||
|
||||
def test_three_sum(nums: List[int], target: int, expected_result: List[int]) -> None:
|
||||
result = three_sum(nums, target)
|
||||
print(result)
|
||||
assert (
|
||||
result == expected_result
|
||||
), f"AssertionError: Expected the output to be {expected_result}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test the trivial case with the first three numbers
|
||||
nums = [2, 7, 11, 15]
|
||||
target = 20
|
||||
expected_result = [0, 1, 2]
|
||||
test_three_sum(nums, target, expected_result)
|
||||
|
||||
# test for ability to use zero and the same number twice
|
||||
nums = [2, 7, 0, 15, 12, 0]
|
||||
target = 2
|
||||
expected_result = [0, 2, 5]
|
||||
test_three_sum(nums, target, expected_result)
|
||||
|
||||
# test for first and last index usage and negative numbers
|
||||
nums = [-6, 7, 11, 4]
|
||||
target = 9
|
||||
expected_result = [0, 2, 3]
|
||||
test_three_sum(nums, target, expected_result)
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"name": "TestThreeSum",
|
||||
"category": ["code", "iterate"],
|
||||
"task": "Create a three_sum function in a file called code.py. Given an array of integers, return indices of the three numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 20, Because nums[0] + nums[1] + nums[2] = 2 + 7 + 11 = 20, return [0, 1, 2].",
|
||||
"dependencies": ["TestWriteFile", "TestBasicCodeGeneration"],
|
||||
"ground": {
|
||||
"answer": "The three_sum function coded properly.",
|
||||
"should_contain": ["[0, 1, 2]", "[0, 2, 5]", "[0, 2, 3]"],
|
||||
"should_not_contain": [],
|
||||
"files": ["test.py"],
|
||||
"type": "execute_python_code"
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "intermediate",
|
||||
"description": "Tests ability for the agent to create the three_sum function.",
|
||||
"side_effects": []
|
||||
}
|
||||
}
|
|
@ -1 +1 @@
|
|||
Subproject commit bca191cd76cdea0335da91d004c64d9bb8520fea
|
||||
Subproject commit f0c76918dff7a6cf5e0611a09b060fc5d4913b82
|
Loading…
Reference in New Issue