98 lines
3.6 KiB
YAML
98 lines
3.6 KiB
YAML
name: AutoGPTs Nightly Benchmark
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
schedule:
|
|
- cron: '0 2 * * *'
|
|
|
|
jobs:
|
|
benchmark:
|
|
permissions:
|
|
contents: write
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
agent-name: [ autogpt ]
|
|
fail-fast: false
|
|
timeout-minutes: 120
|
|
env:
|
|
min-python-version: '3.10'
|
|
REPORTS_BRANCH: data/benchmark-reports
|
|
REPORTS_FOLDER: ${{ format('benchmark/reports/{0}', matrix.agent-name) }}
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
submodules: true
|
|
|
|
- name: Set up Python ${{ env.min-python-version }}
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ env.min-python-version }}
|
|
|
|
- name: Install Poetry
|
|
run: curl -sSL https://install.python-poetry.org | python -
|
|
|
|
- name: Prepare reports folder
|
|
run: mkdir -p ${{ env.REPORTS_FOLDER }}
|
|
|
|
- run: poetry -C benchmark install
|
|
|
|
- name: Benchmark ${{ matrix.agent-name }}
|
|
run: |
|
|
./run agent start ${{ matrix.agent-name }}
|
|
cd ${{ matrix.agent-name }}
|
|
|
|
set +e # Do not quit on non-zero exit codes
|
|
poetry run agbenchmark run -N 3 \
|
|
--test=ReadFile \
|
|
--test=BasicRetrieval --test=RevenueRetrieval2 \
|
|
--test=CombineCsv --test=LabelCsv --test=AnswerQuestionCombineCsv \
|
|
--test=UrlShortener --test=TicTacToe --test=Battleship \
|
|
--test=WebArenaTask_0 --test=WebArenaTask_21 --test=WebArenaTask_124 \
|
|
--test=WebArenaTask_134 --test=WebArenaTask_163
|
|
|
|
# Convert exit code 1 (some challenges failed) to exit code 0
|
|
if [ $? -eq 0 ] || [ $? -eq 1 ]; then
|
|
exit 0
|
|
else
|
|
exit $?
|
|
fi
|
|
env:
|
|
AGENT_NAME: ${{ matrix.agent-name }}
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
|
|
REPORTS_FOLDER: ${{ format('../../{0}', env.REPORTS_FOLDER) }} # account for changed workdir
|
|
|
|
TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
|
|
TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
|
|
|
|
- name: Push reports to data branch
|
|
run: |
|
|
# BODGE: Remove success_rate.json and regression_tests.json to avoid conflicts on checkout
|
|
rm ${{ env.REPORTS_FOLDER }}/*.json
|
|
|
|
# Find folder with newest (untracked) report in it
|
|
report_subfolder=$(find ${{ env.REPORTS_FOLDER }} -type f -name 'report.json' \
|
|
| xargs -I {} dirname {} \
|
|
| xargs -I {} git ls-files --others --exclude-standard {} \
|
|
| xargs -I {} dirname {} \
|
|
| sort -u)
|
|
json_report_file="$report_subfolder/report.json"
|
|
|
|
# Convert JSON report to Markdown
|
|
markdown_report_file="$report_subfolder/report.md"
|
|
poetry -C benchmark run benchmark/reports/format.py "$json_report_file" > "$markdown_report_file"
|
|
cat "$markdown_report_file" >> $GITHUB_STEP_SUMMARY
|
|
|
|
git config --global user.name 'GitHub Actions'
|
|
git config --global user.email 'github-actions@agpt.co'
|
|
git fetch origin ${{ env.REPORTS_BRANCH }}:${{ env.REPORTS_BRANCH }} \
|
|
&& git checkout ${{ env.REPORTS_BRANCH }} \
|
|
|| git checkout --orphan ${{ env.REPORTS_BRANCH }}
|
|
git reset --hard
|
|
git add ${{ env.REPORTS_FOLDER }}
|
|
git commit -m "Benchmark report for ${{ matrix.agent-name }} @ $(date +'%Y-%m-%d')" \
|
|
&& git push origin ${{ env.REPORTS_BRANCH }}
|