170 lines
5.6 KiB
YAML
170 lines
5.6 KiB
YAML
name: AGBenchmark CI
|
|
|
|
on:
|
|
push:
|
|
branches: [ master, development, ci-test* ]
|
|
paths:
|
|
- 'benchmark/**'
|
|
- .github/workflows/benchmark-ci.yml
|
|
- '!benchmark/reports/**'
|
|
pull_request:
|
|
branches: [ master, development, release-* ]
|
|
paths:
|
|
- 'benchmark/**'
|
|
- '!benchmark/reports/**'
|
|
- .github/workflows/benchmark-ci.yml
|
|
|
|
concurrency:
|
|
group: ${{ format('benchmark-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
|
|
cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') }}
|
|
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
|
|
env:
|
|
min-python-version: '3.10'
|
|
|
|
jobs:
|
|
test:
|
|
permissions:
|
|
contents: read
|
|
timeout-minutes: 30
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
python-version: ["3.10"]
|
|
platform-os: [ubuntu, macos, macos-arm64, windows]
|
|
runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
working-directory: benchmark
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
submodules: true
|
|
|
|
- name: Set up Python ${{ matrix.python-version }}
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.python-version }}
|
|
|
|
- name: Set up Python dependency cache
|
|
# On Windows, unpacking cached dependencies takes longer than just installing them
|
|
if: runner.os != 'Windows'
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
|
|
key: poetry-${{ runner.os }}-${{ hashFiles('benchmark/poetry.lock') }}
|
|
|
|
- name: Install Poetry (Unix)
|
|
if: runner.os != 'Windows'
|
|
run: |
|
|
curl -sSL https://install.python-poetry.org | python3 -
|
|
|
|
if [ "${{ runner.os }}" = "macOS" ]; then
|
|
PATH="$HOME/.local/bin:$PATH"
|
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
|
fi
|
|
|
|
- name: Install Poetry (Windows)
|
|
if: runner.os == 'Windows'
|
|
shell: pwsh
|
|
run: |
|
|
(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
|
|
|
|
$env:PATH += ";$env:APPDATA\Python\Scripts"
|
|
echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
|
|
|
|
- name: Install Python dependencies
|
|
run: poetry install
|
|
|
|
- name: Run pytest with coverage
|
|
run: |
|
|
poetry run pytest -vv \
|
|
--cov=agbenchmark --cov-branch --cov-report term-missing --cov-report xml \
|
|
--durations=10 \
|
|
tests
|
|
env:
|
|
CI: true
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
|
|
- name: Upload coverage reports to Codecov
|
|
uses: codecov/codecov-action@v4
|
|
with:
|
|
token: ${{ secrets.CODECOV_TOKEN }}
|
|
flags: agbenchmark,${{ runner.os }}
|
|
|
|
self-test-with-agent:
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
agent-name: [ forge ]
|
|
fail-fast: false
|
|
timeout-minutes: 20
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
submodules: true
|
|
|
|
- name: Set up Python ${{ env.min-python-version }}
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ env.min-python-version }}
|
|
|
|
- name: Install Poetry
|
|
run: |
|
|
curl -sSL https://install.python-poetry.org | python -
|
|
|
|
- name: Run regression tests
|
|
working-directory: .
|
|
run: |
|
|
./run agent start ${{ matrix.agent-name }}
|
|
cd ${{ matrix.agent-name }}
|
|
|
|
set +e # Ignore non-zero exit codes and continue execution
|
|
echo "Running the following command: poetry run agbenchmark --maintain --mock"
|
|
poetry run agbenchmark --maintain --mock
|
|
EXIT_CODE=$?
|
|
set -e # Stop ignoring non-zero exit codes
|
|
# Check if the exit code was 5, and if so, exit with 0 instead
|
|
if [ $EXIT_CODE -eq 5 ]; then
|
|
echo "regression_tests.json is empty."
|
|
fi
|
|
|
|
echo "Running the following command: poetry run agbenchmark --mock"
|
|
poetry run agbenchmark --mock
|
|
|
|
echo "Running the following command: poetry run agbenchmark --mock --category=data"
|
|
poetry run agbenchmark --mock --category=data
|
|
|
|
echo "Running the following command: poetry run agbenchmark --mock --category=coding"
|
|
poetry run agbenchmark --mock --category=coding
|
|
|
|
echo "Running the following command: poetry run agbenchmark --test=WriteFile"
|
|
poetry run agbenchmark --test=WriteFile
|
|
cd ../benchmark
|
|
poetry install
|
|
echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
|
|
export BUILD_SKILL_TREE=true
|
|
|
|
poetry run agbenchmark --mock
|
|
|
|
CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../frontend/assets)') || echo "No diffs"
|
|
if [ ! -z "$CHANGED" ]; then
|
|
echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
|
|
echo "$CHANGED"
|
|
exit 1
|
|
else
|
|
echo "No unstaged changes."
|
|
fi
|
|
env:
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
|
|
TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
|